From b68f15b7a57f7df52d955f7f75f9df3b78041764 Mon Sep 17 00:00:00 2001 Message-Id: From: "Jan Alexander Steffens (heftig)" Date: Tue, 19 Mar 2019 20:45:22 +0100 Subject: [PATCH] bz 1468911 https://bugzilla.mozilla.org/show_bug.cgi?id=1521249 --- .cargo/config.in | 5 + Cargo.lock | 53 +- Cargo.toml | 1 + third_party/rust/cfg-if/.cargo-checksum.json | 2 +- third_party/rust/cfg-if/Cargo.toml | 28 +- third_party/rust/cfg-if/README.md | 18 +- third_party/rust/cfg-if/src/lib.rs | 53 +- .../rust/encoding_rs/.cargo-checksum.json | 2 +- third_party/rust/encoding_rs/Cargo.toml | 12 +- third_party/rust/encoding_rs/README.md | 52 +- third_party/rust/encoding_rs/build.rs | 8 + third_party/rust/encoding_rs/src/handles.rs | 2 +- third_party/rust/encoding_rs/src/lib.rs | 7 +- third_party/rust/encoding_rs/src/mem.rs | 24 +- .../rust/encoding_rs/src/simd_funcs.rs | 93 +- .../rust/encoding_rs/src/x_user_defined.rs | 7 +- third_party/rust/packed_simd/.appveyor.yml | 59 + .../rust/packed_simd/.cargo-checksum.json | 1 + third_party/rust/packed_simd/.travis.yml | 308 ++++ third_party/rust/packed_simd/Cargo.toml | 42 + .../rust/{simd => packed_simd}/LICENSE-APACHE | 0 .../rust/{simd => packed_simd}/LICENSE-MIT | 4 +- third_party/rust/packed_simd/bors.toml | 3 + third_party/rust/packed_simd/build.rs | 8 + third_party/rust/packed_simd/ci/all.sh | 71 + .../packed_simd/ci/android-install-ndk.sh | 37 + .../packed_simd/ci/android-install-sdk.sh | 60 + .../rust/packed_simd/ci/android-sysimage.sh | 56 + third_party/rust/packed_simd/ci/benchmark.sh | 32 + .../ci/deploy_and_run_on_ios_simulator.rs | 176 +++ .../docker/aarch64-linux-android/Dockerfile | 47 + .../aarch64-unknown-linux-gnu/Dockerfile | 14 + .../docker/arm-linux-androideabi/Dockerfile | 47 + .../arm-unknown-linux-gnueabi/Dockerfile | 15 + .../arm-unknown-linux-gnueabihf/Dockerfile | 13 + .../armv7-unknown-linux-gnueabihf/Dockerfile | 13 + .../docker/i586-unknown-linux-gnu/Dockerfile | 7 + .../docker/i686-unknown-linux-gnu/Dockerfile | 7 + .../docker/mips-unknown-linux-gnu/Dockerfile | 13 + .../mips64-unknown-linux-gnuabi64/Dockerfile | 10 + .../Dockerfile | 10 + .../mipsel-unknown-linux-musl/Dockerfile | 25 + .../powerpc-unknown-linux-gnu/Dockerfile | 12 + .../powerpc64-unknown-linux-gnu/Dockerfile | 17 + .../powerpc64le-unknown-linux-gnu/Dockerfile | 11 + .../docker/s390x-unknown-linux-gnu/Dockerfile | 20 + .../sparc64-unknown-linux-gnu/Dockerfile | 18 + .../thumbv7neon-linux-androideabi/Dockerfile | 47 + .../Dockerfile | 13 + .../docker/wasm32-unknown-unknown/Dockerfile | 37 + .../ci/docker/x86_64-linux-android/Dockerfile | 29 + .../Dockerfile | 16 + .../x86_64-unknown-linux-gnu/Dockerfile | 10 + third_party/rust/packed_simd/ci/dox.sh | 24 + .../rust/packed_simd/ci/linux-s390x.sh | 18 + .../rust/packed_simd/ci/linux-sparc64.sh | 17 + third_party/rust/packed_simd/ci/lld-shim.rs | 11 + .../rust/packed_simd/ci/max_line_width.sh | 17 + third_party/rust/packed_simd/ci/run-docker.sh | 38 + third_party/rust/packed_simd/ci/run.sh | 96 ++ .../rust/packed_simd/ci/run_examples.sh | 51 + .../rust/packed_simd/ci/runtest-android.rs | 45 + .../rust/packed_simd/ci/setup_benchmarks.sh | 10 + .../rust/packed_simd/ci/test-runner-linux | 24 + third_party/rust/packed_simd/contributing.md | 67 + .../rust/packed_simd/perf-guide/.gitignore | 1 + .../rust/packed_simd/perf-guide/book.toml | 12 + .../packed_simd/perf-guide/src/SUMMARY.md | 21 + .../rust/packed_simd/perf-guide/src/ascii.css | 4 + .../perf-guide/src/bound_checks.md | 22 + .../perf-guide/src/float-math/approx.md | 8 + .../perf-guide/src/float-math/fma.md | 6 + .../perf-guide/src/float-math/fp.md | 3 + .../perf-guide/src/float-math/svml.md | 7 + .../perf-guide/src/introduction.md | 26 + .../packed_simd/perf-guide/src/prof/linux.md | 107 ++ .../packed_simd/perf-guide/src/prof/mca.md | 100 ++ .../perf-guide/src/prof/profiling.md | 14 + .../src/target-feature/attribute.md | 5 + .../perf-guide/src/target-feature/features.md | 13 + .../perf-guide/src/target-feature/inlining.md | 5 + .../perf-guide/src/target-feature/practice.md | 31 + .../perf-guide/src/target-feature/runtime.md | 5 + .../src/target-feature/rustflags.md | 77 + .../perf-guide/src/vert-hor-ops.md | 76 + third_party/rust/packed_simd/readme.md | 182 +++ third_party/rust/packed_simd/rustfmt.toml | 7 + third_party/rust/packed_simd/src/api.rs | 301 ++++ .../rust/packed_simd/src/api/bit_manip.rs | 128 ++ third_party/rust/packed_simd/src/api/cast.rs | 108 ++ .../rust/packed_simd/src/api/cast/macros.rs | 82 + .../rust/packed_simd/src/api/cast/v128.rs | 79 + .../rust/packed_simd/src/api/cast/v16.rs | 17 + .../rust/packed_simd/src/api/cast/v256.rs | 81 + .../rust/packed_simd/src/api/cast/v32.rs | 30 + .../rust/packed_simd/src/api/cast/v512.rs | 68 + .../rust/packed_simd/src/api/cast/v64.rs | 47 + third_party/rust/packed_simd/src/api/cmp.rs | 16 + .../rust/packed_simd/src/api/cmp/eq.rs | 27 + .../rust/packed_simd/src/api/cmp/ord.rs | 43 + .../packed_simd/src/api/cmp/partial_eq.rs | 67 + .../packed_simd/src/api/cmp/partial_ord.rs | 234 +++ .../rust/packed_simd/src/api/cmp/vertical.rs | 114 ++ .../rust/packed_simd/src/api/default.rs | 28 + third_party/rust/packed_simd/src/api/fmt.rs | 12 + .../rust/packed_simd/src/api/fmt/binary.rs | 56 + .../rust/packed_simd/src/api/fmt/debug.rs | 62 + .../rust/packed_simd/src/api/fmt/lower_hex.rs | 56 + .../rust/packed_simd/src/api/fmt/octal.rs | 56 + .../rust/packed_simd/src/api/fmt/upper_hex.rs | 56 + third_party/rust/packed_simd/src/api/from.rs | 7 + .../packed_simd/src/api/from/from_array.rs | 121 ++ .../packed_simd/src/api/from/from_vector.rs | 67 + third_party/rust/packed_simd/src/api/hash.rs | 47 + .../rust/packed_simd/src/api/into_bits.rs | 59 + .../src/api/into_bits/arch_specific.rs | 190 +++ .../packed_simd/src/api/into_bits/macros.rs | 74 + .../packed_simd/src/api/into_bits/v128.rs | 28 + .../rust/packed_simd/src/api/into_bits/v16.rs | 9 + .../packed_simd/src/api/into_bits/v256.rs | 27 + .../rust/packed_simd/src/api/into_bits/v32.rs | 13 + .../packed_simd/src/api/into_bits/v512.rs | 27 + .../rust/packed_simd/src/api/into_bits/v64.rs | 18 + third_party/rust/packed_simd/src/api/math.rs | 4 + .../rust/packed_simd/src/api/math/float.rs | 61 + .../packed_simd/src/api/math/float/abs.rs | 31 + .../packed_simd/src/api/math/float/consts.rs | 86 + .../packed_simd/src/api/math/float/cos.rs | 44 + .../packed_simd/src/api/math/float/exp.rs | 33 + .../rust/packed_simd/src/api/math/float/ln.rs | 33 + .../packed_simd/src/api/math/float/mul_add.rs | 44 + .../src/api/math/float/mul_adde.rs | 48 + .../packed_simd/src/api/math/float/powf.rs | 36 + .../packed_simd/src/api/math/float/recpre.rs | 36 + .../packed_simd/src/api/math/float/rsqrte.rs | 40 + .../packed_simd/src/api/math/float/sin.rs | 50 + .../packed_simd/src/api/math/float/sqrt.rs | 35 + .../packed_simd/src/api/math/float/sqrte.rs | 44 + .../rust/packed_simd/src/api/minimal.rs | 6 + .../rust/packed_simd/src/api/minimal/iuf.rs | 167 ++ .../rust/packed_simd/src/api/minimal/mask.rs | 174 +++ .../rust/packed_simd/src/api/minimal/ptr.rs | 1385 +++++++++++++++++ third_party/rust/packed_simd/src/api/ops.rs | 32 + .../src/api/ops/scalar_arithmetic.rs | 203 +++ .../packed_simd/src/api/ops/scalar_bitwise.rs | 162 ++ .../src/api/ops/scalar_mask_bitwise.rs | 140 ++ .../packed_simd/src/api/ops/scalar_shifts.rs | 107 ++ .../src/api/ops/vector_arithmetic.rs | 148 ++ .../packed_simd/src/api/ops/vector_bitwise.rs | 129 ++ .../src/api/ops/vector_float_min_max.rs | 69 + .../src/api/ops/vector_int_min_max.rs | 57 + .../src/api/ops/vector_mask_bitwise.rs | 116 ++ .../packed_simd/src/api/ops/vector_neg.rs | 43 + .../packed_simd/src/api/ops/vector_rotates.rs | 90 ++ .../packed_simd/src/api/ops/vector_shifts.rs | 107 ++ third_party/rust/packed_simd/src/api/ptr.rs | 4 + .../packed_simd/src/api/ptr/gather_scatter.rs | 241 +++ .../rust/packed_simd/src/api/reductions.rs | 12 + .../packed_simd/src/api/reductions/bitwise.rs | 151 ++ .../src/api/reductions/float_arithmetic.rs | 312 ++++ .../src/api/reductions/integer_arithmetic.rs | 197 +++ .../packed_simd/src/api/reductions/mask.rs | 89 ++ .../packed_simd/src/api/reductions/min_max.rs | 377 +++++ .../rust/packed_simd/src/api/select.rs | 75 + .../rust/packed_simd/src/api/shuffle.rs | 190 +++ .../rust/packed_simd/src/api/shuffle1_dyn.rs | 159 ++ third_party/rust/packed_simd/src/api/slice.rs | 7 + .../packed_simd/src/api/slice/from_slice.rs | 216 +++ .../src/api/slice/write_to_slice.rs | 211 +++ .../rust/packed_simd/src/api/swap_bytes.rs | 192 +++ third_party/rust/packed_simd/src/codegen.rs | 59 + .../rust/packed_simd/src/codegen/bit_manip.rs | 354 +++++ .../rust/packed_simd/src/codegen/llvm.rs | 99 ++ .../rust/packed_simd/src/codegen/math.rs | 3 + .../packed_simd/src/codegen/math/float.rs | 18 + .../packed_simd/src/codegen/math/float/abs.rs | 103 ++ .../packed_simd/src/codegen/math/float/cos.rs | 103 ++ .../src/codegen/math/float/cos_pi.rs | 87 ++ .../packed_simd/src/codegen/math/float/exp.rs | 112 ++ .../packed_simd/src/codegen/math/float/ln.rs | 112 ++ .../src/codegen/math/float/macros.rs | 559 +++++++ .../src/codegen/math/float/mul_add.rs | 109 ++ .../src/codegen/math/float/mul_adde.rs | 66 + .../src/codegen/math/float/powf.rs | 112 ++ .../packed_simd/src/codegen/math/float/sin.rs | 103 ++ .../src/codegen/math/float/sin_cos_pi.rs | 195 +++ .../src/codegen/math/float/sin_pi.rs | 87 ++ .../src/codegen/math/float/sqrt.rs | 103 ++ .../src/codegen/math/float/sqrte.rs | 67 + .../src/codegen/pointer_sized_int.rs | 28 + .../packed_simd/src/codegen/reductions.rs | 1 + .../src/codegen/reductions/mask.rs | 69 + .../src/codegen/reductions/mask/aarch64.rs | 71 + .../src/codegen/reductions/mask/arm.rs | 54 + .../src/codegen/reductions/mask/fallback.rs | 6 + .../codegen/reductions/mask/fallback_impl.rs | 237 +++ .../src/codegen/reductions/mask/x86.rs | 194 +++ .../src/codegen/reductions/mask/x86/avx.rs | 101 ++ .../src/codegen/reductions/mask/x86/avx2.rs | 35 + .../src/codegen/reductions/mask/x86/sse.rs | 68 + .../src/codegen/reductions/mask/x86/sse2.rs | 70 + .../rust/packed_simd/src/codegen/shuffle.rs | 302 ++++ .../packed_simd/src/codegen/shuffle1_dyn.rs | 432 +++++ .../packed_simd/src/codegen/swap_bytes.rs | 189 +++ .../rust/packed_simd/src/codegen/v128.rs | 46 + .../rust/packed_simd/src/codegen/v16.rs | 7 + .../rust/packed_simd/src/codegen/v256.rs | 78 + .../rust/packed_simd/src/codegen/v32.rs | 11 + .../rust/packed_simd/src/codegen/v512.rs | 145 ++ .../rust/packed_simd/src/codegen/v64.rs | 21 + .../rust/packed_simd/src/codegen/vPtr.rs | 33 + .../rust/packed_simd/src/codegen/vSize.rs | 43 + third_party/rust/packed_simd/src/lib.rs | 327 ++++ third_party/rust/packed_simd/src/masks.rs | 128 ++ third_party/rust/packed_simd/src/sealed.rs | 41 + third_party/rust/packed_simd/src/testing.rs | 8 + .../rust/packed_simd/src/testing/macros.rs | 44 + .../rust/packed_simd/src/testing/utils.rs | 135 ++ third_party/rust/packed_simd/src/v128.rs | 80 + third_party/rust/packed_simd/src/v16.rs | 16 + third_party/rust/packed_simd/src/v256.rs | 86 + third_party/rust/packed_simd/src/v32.rs | 29 + third_party/rust/packed_simd/src/v512.rs | 99 ++ third_party/rust/packed_simd/src/v64.rs | 66 + third_party/rust/packed_simd/src/vPtr.rs | 34 + third_party/rust/packed_simd/src/vSize.rs | 53 + .../rust/packed_simd/tests/endianness.rs | 262 ++++ third_party/rust/simd/.cargo-checksum.json | 1 - third_party/rust/simd/Cargo.toml | 37 - third_party/rust/simd/README.md | 11 - third_party/rust/simd/benches/mandelbrot.rs | 117 -- third_party/rust/simd/benches/matrix.rs | 485 ------ third_party/rust/simd/build.rs | 3 - third_party/rust/simd/examples/axpy.rs | 65 - third_party/rust/simd/examples/convert.rs | 38 - third_party/rust/simd/examples/dot-product.rs | 60 - .../simd/examples/fannkuch-redux-nosimd.rs | 156 -- .../rust/simd/examples/fannkuch-redux.rs | 233 --- third_party/rust/simd/examples/mandelbrot.rs | 125 -- .../rust/simd/examples/matrix-inverse.rs | 281 ---- .../rust/simd/examples/nbody-nosimd.rs | 156 -- third_party/rust/simd/examples/nbody.rs | 170 -- third_party/rust/simd/examples/ops.rs | 10 - .../simd/examples/spectral-norm-nosimd.rs | 106 -- .../rust/simd/examples/spectral-norm.rs | 74 - third_party/rust/simd/src/aarch64/mod.rs | 3 - third_party/rust/simd/src/aarch64/neon.rs | 681 -------- third_party/rust/simd/src/arm/mod.rs | 4 - third_party/rust/simd/src/arm/neon.rs | 622 -------- third_party/rust/simd/src/common.rs | 520 ------- third_party/rust/simd/src/lib.rs | 804 ---------- third_party/rust/simd/src/sixty_four.rs | 228 --- third_party/rust/simd/src/v256.rs | 436 ------ third_party/rust/simd/src/x86/avx.rs | 290 ---- third_party/rust/simd/src/x86/avx2.rs | 65 - third_party/rust/simd/src/x86/mod.rs | 16 - third_party/rust/simd/src/x86/sse2.rs | 359 ----- third_party/rust/simd/src/x86/sse3.rs | 57 - third_party/rust/simd/src/x86/sse4_1.rs | 155 -- third_party/rust/simd/src/x86/sse4_2.rs | 19 - third_party/rust/simd/src/x86/ssse3.rs | 172 -- toolkit/moz.configure | 11 +- 262 files changed, 17410 insertions(+), 6733 deletions(-) create mode 100644 third_party/rust/packed_simd/.appveyor.yml create mode 100644 third_party/rust/packed_simd/.cargo-checksum.json create mode 100644 third_party/rust/packed_simd/.travis.yml create mode 100644 third_party/rust/packed_simd/Cargo.toml rename third_party/rust/{simd => packed_simd}/LICENSE-APACHE (100%) rename third_party/rust/{simd => packed_simd}/LICENSE-MIT (93%) create mode 100644 third_party/rust/packed_simd/bors.toml create mode 100644 third_party/rust/packed_simd/build.rs create mode 100644 third_party/rust/packed_simd/ci/all.sh create mode 100644 third_party/rust/packed_simd/ci/android-install-ndk.sh create mode 100644 third_party/rust/packed_simd/ci/android-install-sdk.sh create mode 100644 third_party/rust/packed_simd/ci/android-sysimage.sh create mode 100644 third_party/rust/packed_simd/ci/benchmark.sh create mode 100644 third_party/rust/packed_simd/ci/deploy_and_run_on_ios_simulator.rs create mode 100644 third_party/rust/packed_simd/ci/docker/aarch64-linux-android/Dockerfile create mode 100644 third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile create mode 100644 third_party/rust/packed_simd/ci/docker/arm-linux-androideabi/Dockerfile create mode 100644 third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile create mode 100644 third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile create mode 100644 third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile create mode 100644 third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile create mode 100644 third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile create mode 100644 third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile create mode 100644 third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile create mode 100644 third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile create mode 100644 third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile create mode 100644 third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile create mode 100644 third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile create mode 100644 third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile create mode 100644 third_party/rust/packed_simd/ci/docker/s390x-unknown-linux-gnu/Dockerfile create mode 100644 third_party/rust/packed_simd/ci/docker/sparc64-unknown-linux-gnu/Dockerfile create mode 100644 third_party/rust/packed_simd/ci/docker/thumbv7neon-linux-androideabi/Dockerfile create mode 100644 third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile create mode 100644 third_party/rust/packed_simd/ci/docker/wasm32-unknown-unknown/Dockerfile create mode 100644 third_party/rust/packed_simd/ci/docker/x86_64-linux-android/Dockerfile create mode 100644 third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile create mode 100644 third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile create mode 100644 third_party/rust/packed_simd/ci/dox.sh create mode 100644 third_party/rust/packed_simd/ci/linux-s390x.sh create mode 100644 third_party/rust/packed_simd/ci/linux-sparc64.sh create mode 100644 third_party/rust/packed_simd/ci/lld-shim.rs create mode 100644 third_party/rust/packed_simd/ci/max_line_width.sh create mode 100644 third_party/rust/packed_simd/ci/run-docker.sh create mode 100644 third_party/rust/packed_simd/ci/run.sh create mode 100644 third_party/rust/packed_simd/ci/run_examples.sh create mode 100644 third_party/rust/packed_simd/ci/runtest-android.rs create mode 100644 third_party/rust/packed_simd/ci/setup_benchmarks.sh create mode 100644 third_party/rust/packed_simd/ci/test-runner-linux create mode 100644 third_party/rust/packed_simd/contributing.md create mode 100644 third_party/rust/packed_simd/perf-guide/.gitignore create mode 100644 third_party/rust/packed_simd/perf-guide/book.toml create mode 100644 third_party/rust/packed_simd/perf-guide/src/SUMMARY.md create mode 100644 third_party/rust/packed_simd/perf-guide/src/ascii.css create mode 100644 third_party/rust/packed_simd/perf-guide/src/bound_checks.md create mode 100644 third_party/rust/packed_simd/perf-guide/src/float-math/approx.md create mode 100644 third_party/rust/packed_simd/perf-guide/src/float-math/fma.md create mode 100644 third_party/rust/packed_simd/perf-guide/src/float-math/fp.md create mode 100644 third_party/rust/packed_simd/perf-guide/src/float-math/svml.md create mode 100644 third_party/rust/packed_simd/perf-guide/src/introduction.md create mode 100644 third_party/rust/packed_simd/perf-guide/src/prof/linux.md create mode 100644 third_party/rust/packed_simd/perf-guide/src/prof/mca.md create mode 100644 third_party/rust/packed_simd/perf-guide/src/prof/profiling.md create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/attribute.md create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/features.md create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/inlining.md create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/practice.md create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/runtime.md create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/rustflags.md create mode 100644 third_party/rust/packed_simd/perf-guide/src/vert-hor-ops.md create mode 100644 third_party/rust/packed_simd/readme.md create mode 100644 third_party/rust/packed_simd/rustfmt.toml create mode 100644 third_party/rust/packed_simd/src/api.rs create mode 100644 third_party/rust/packed_simd/src/api/bit_manip.rs create mode 100644 third_party/rust/packed_simd/src/api/cast.rs create mode 100644 third_party/rust/packed_simd/src/api/cast/macros.rs create mode 100644 third_party/rust/packed_simd/src/api/cast/v128.rs create mode 100644 third_party/rust/packed_simd/src/api/cast/v16.rs create mode 100644 third_party/rust/packed_simd/src/api/cast/v256.rs create mode 100644 third_party/rust/packed_simd/src/api/cast/v32.rs create mode 100644 third_party/rust/packed_simd/src/api/cast/v512.rs create mode 100644 third_party/rust/packed_simd/src/api/cast/v64.rs create mode 100644 third_party/rust/packed_simd/src/api/cmp.rs create mode 100644 third_party/rust/packed_simd/src/api/cmp/eq.rs create mode 100644 third_party/rust/packed_simd/src/api/cmp/ord.rs create mode 100644 third_party/rust/packed_simd/src/api/cmp/partial_eq.rs create mode 100644 third_party/rust/packed_simd/src/api/cmp/partial_ord.rs create mode 100644 third_party/rust/packed_simd/src/api/cmp/vertical.rs create mode 100644 third_party/rust/packed_simd/src/api/default.rs create mode 100644 third_party/rust/packed_simd/src/api/fmt.rs create mode 100644 third_party/rust/packed_simd/src/api/fmt/binary.rs create mode 100644 third_party/rust/packed_simd/src/api/fmt/debug.rs create mode 100644 third_party/rust/packed_simd/src/api/fmt/lower_hex.rs create mode 100644 third_party/rust/packed_simd/src/api/fmt/octal.rs create mode 100644 third_party/rust/packed_simd/src/api/fmt/upper_hex.rs create mode 100644 third_party/rust/packed_simd/src/api/from.rs create mode 100644 third_party/rust/packed_simd/src/api/from/from_array.rs create mode 100644 third_party/rust/packed_simd/src/api/from/from_vector.rs create mode 100644 third_party/rust/packed_simd/src/api/hash.rs create mode 100644 third_party/rust/packed_simd/src/api/into_bits.rs create mode 100644 third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs create mode 100644 third_party/rust/packed_simd/src/api/into_bits/macros.rs create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v128.rs create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v16.rs create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v256.rs create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v32.rs create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v512.rs create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v64.rs create mode 100644 third_party/rust/packed_simd/src/api/math.rs create mode 100644 third_party/rust/packed_simd/src/api/math/float.rs create mode 100644 third_party/rust/packed_simd/src/api/math/float/abs.rs create mode 100644 third_party/rust/packed_simd/src/api/math/float/consts.rs create mode 100644 third_party/rust/packed_simd/src/api/math/float/cos.rs create mode 100644 third_party/rust/packed_simd/src/api/math/float/exp.rs create mode 100644 third_party/rust/packed_simd/src/api/math/float/ln.rs create mode 100644 third_party/rust/packed_simd/src/api/math/float/mul_add.rs create mode 100644 third_party/rust/packed_simd/src/api/math/float/mul_adde.rs create mode 100644 third_party/rust/packed_simd/src/api/math/float/powf.rs create mode 100644 third_party/rust/packed_simd/src/api/math/float/recpre.rs create mode 100644 third_party/rust/packed_simd/src/api/math/float/rsqrte.rs create mode 100644 third_party/rust/packed_simd/src/api/math/float/sin.rs create mode 100644 third_party/rust/packed_simd/src/api/math/float/sqrt.rs create mode 100644 third_party/rust/packed_simd/src/api/math/float/sqrte.rs create mode 100644 third_party/rust/packed_simd/src/api/minimal.rs create mode 100644 third_party/rust/packed_simd/src/api/minimal/iuf.rs create mode 100644 third_party/rust/packed_simd/src/api/minimal/mask.rs create mode 100644 third_party/rust/packed_simd/src/api/minimal/ptr.rs create mode 100644 third_party/rust/packed_simd/src/api/ops.rs create mode 100644 third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs create mode 100644 third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs create mode 100644 third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs create mode 100644 third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_neg.rs create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_rotates.rs create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_shifts.rs create mode 100644 third_party/rust/packed_simd/src/api/ptr.rs create mode 100644 third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs create mode 100644 third_party/rust/packed_simd/src/api/reductions.rs create mode 100644 third_party/rust/packed_simd/src/api/reductions/bitwise.rs create mode 100644 third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs create mode 100644 third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs create mode 100644 third_party/rust/packed_simd/src/api/reductions/mask.rs create mode 100644 third_party/rust/packed_simd/src/api/reductions/min_max.rs create mode 100644 third_party/rust/packed_simd/src/api/select.rs create mode 100644 third_party/rust/packed_simd/src/api/shuffle.rs create mode 100644 third_party/rust/packed_simd/src/api/shuffle1_dyn.rs create mode 100644 third_party/rust/packed_simd/src/api/slice.rs create mode 100644 third_party/rust/packed_simd/src/api/slice/from_slice.rs create mode 100644 third_party/rust/packed_simd/src/api/slice/write_to_slice.rs create mode 100644 third_party/rust/packed_simd/src/api/swap_bytes.rs create mode 100644 third_party/rust/packed_simd/src/codegen.rs create mode 100644 third_party/rust/packed_simd/src/codegen/bit_manip.rs create mode 100644 third_party/rust/packed_simd/src/codegen/llvm.rs create mode 100644 third_party/rust/packed_simd/src/codegen/math.rs create mode 100644 third_party/rust/packed_simd/src/codegen/math/float.rs create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/abs.rs create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/cos.rs create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/cos_pi.rs create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/exp.rs create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/ln.rs create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/macros.rs create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/mul_add.rs create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/mul_adde.rs create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/powf.rs create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/sin.rs create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/sin_cos_pi.rs create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/sin_pi.rs create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/sqrt.rs create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/sqrte.rs create mode 100644 third_party/rust/packed_simd/src/codegen/pointer_sized_int.rs create mode 100644 third_party/rust/packed_simd/src/codegen/reductions.rs create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask.rs create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs create mode 100644 third_party/rust/packed_simd/src/codegen/shuffle.rs create mode 100644 third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs create mode 100644 third_party/rust/packed_simd/src/codegen/swap_bytes.rs create mode 100644 third_party/rust/packed_simd/src/codegen/v128.rs create mode 100644 third_party/rust/packed_simd/src/codegen/v16.rs create mode 100644 third_party/rust/packed_simd/src/codegen/v256.rs create mode 100644 third_party/rust/packed_simd/src/codegen/v32.rs create mode 100644 third_party/rust/packed_simd/src/codegen/v512.rs create mode 100644 third_party/rust/packed_simd/src/codegen/v64.rs create mode 100644 third_party/rust/packed_simd/src/codegen/vPtr.rs create mode 100644 third_party/rust/packed_simd/src/codegen/vSize.rs create mode 100644 third_party/rust/packed_simd/src/lib.rs create mode 100644 third_party/rust/packed_simd/src/masks.rs create mode 100644 third_party/rust/packed_simd/src/sealed.rs create mode 100644 third_party/rust/packed_simd/src/testing.rs create mode 100644 third_party/rust/packed_simd/src/testing/macros.rs create mode 100644 third_party/rust/packed_simd/src/testing/utils.rs create mode 100644 third_party/rust/packed_simd/src/v128.rs create mode 100644 third_party/rust/packed_simd/src/v16.rs create mode 100644 third_party/rust/packed_simd/src/v256.rs create mode 100644 third_party/rust/packed_simd/src/v32.rs create mode 100644 third_party/rust/packed_simd/src/v512.rs create mode 100644 third_party/rust/packed_simd/src/v64.rs create mode 100644 third_party/rust/packed_simd/src/vPtr.rs create mode 100644 third_party/rust/packed_simd/src/vSize.rs create mode 100644 third_party/rust/packed_simd/tests/endianness.rs delete mode 100644 third_party/rust/simd/.cargo-checksum.json delete mode 100644 third_party/rust/simd/Cargo.toml delete mode 100644 third_party/rust/simd/README.md delete mode 100755 third_party/rust/simd/benches/mandelbrot.rs delete mode 100755 third_party/rust/simd/benches/matrix.rs delete mode 100644 third_party/rust/simd/build.rs delete mode 100755 third_party/rust/simd/examples/axpy.rs delete mode 100644 third_party/rust/simd/examples/convert.rs delete mode 100755 third_party/rust/simd/examples/dot-product.rs delete mode 100644 third_party/rust/simd/examples/fannkuch-redux-nosimd.rs delete mode 100755 third_party/rust/simd/examples/fannkuch-redux.rs delete mode 100755 third_party/rust/simd/examples/mandelbrot.rs delete mode 100644 third_party/rust/simd/examples/matrix-inverse.rs delete mode 100644 third_party/rust/simd/examples/nbody-nosimd.rs delete mode 100755 third_party/rust/simd/examples/nbody.rs delete mode 100644 third_party/rust/simd/examples/ops.rs delete mode 100644 third_party/rust/simd/examples/spectral-norm-nosimd.rs delete mode 100755 third_party/rust/simd/examples/spectral-norm.rs delete mode 100644 third_party/rust/simd/src/aarch64/mod.rs delete mode 100644 third_party/rust/simd/src/aarch64/neon.rs delete mode 100644 third_party/rust/simd/src/arm/mod.rs delete mode 100644 third_party/rust/simd/src/arm/neon.rs delete mode 100644 third_party/rust/simd/src/common.rs delete mode 100644 third_party/rust/simd/src/lib.rs delete mode 100644 third_party/rust/simd/src/sixty_four.rs delete mode 100644 third_party/rust/simd/src/v256.rs delete mode 100644 third_party/rust/simd/src/x86/avx.rs delete mode 100644 third_party/rust/simd/src/x86/avx2.rs delete mode 100644 third_party/rust/simd/src/x86/mod.rs delete mode 100644 third_party/rust/simd/src/x86/sse2.rs delete mode 100644 third_party/rust/simd/src/x86/sse3.rs delete mode 100644 third_party/rust/simd/src/x86/sse4_1.rs delete mode 100644 third_party/rust/simd/src/x86/sse4_2.rs delete mode 100644 third_party/rust/simd/src/x86/ssse3.rs diff --git a/.cargo/config.in b/.cargo/config.in index 94f5732891fb..57ae36311e52 100644 --- a/.cargo/config.in +++ b/.cargo/config.in @@ -17,6 +17,11 @@ git = "https://github.com/froydnj/winapi-rs" branch = "aarch64" replace-with = "vendored-sources" +[source."https://github.com/rust-lang-nursery/packed_simd"] +git = "https://github.com/hsivonen/packed_simd" +branch = "rust_1_32" +replace-with = "vendored-sources" + [source.vendored-sources] directory = '@top_srcdir@/third_party/rust' diff --git a/Cargo.lock b/Cargo.lock index 8896cfeddb28..a048a5522ab0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -141,7 +141,7 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)", - "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", "rustc-demangle 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", "winapi 0.3.6 (git+https://github.com/froydnj/winapi-rs?branch=aarch64)", @@ -218,7 +218,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", "cexpr 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", - "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", "clang-sys 0.26.1 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.31.2 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -372,7 +372,7 @@ dependencies = [ [[package]] name = "cfg-if" -version = "0.1.2" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] @@ -575,41 +575,41 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "arrayvec 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", - "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam-utils 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "nodrop 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)", "scopeguard 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "crossbeam-epoch" version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "arrayvec 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", - "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam-utils 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "scopeguard 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "crossbeam-utils" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "crossbeam-utils" version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -848,25 +848,25 @@ name = "encoding_c" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "encoding_glue" version = "0.1.0" dependencies = [ - "encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)", "nserror 0.1.0", "nsstring 0.1.0", ] [[package]] name = "encoding_rs" -version = "0.8.14" +version = "0.8.16" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "simd 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "packed_simd 0.3.3 (git+https://github.com/hsivonen/packed_simd?branch=rust_1_32)", ] [[package]] @@ -1477,7 +1477,7 @@ name = "log" version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -1719,7 +1719,7 @@ name = "net2" version = "0.2.32" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", "winapi 0.3.6 (git+https://github.com/froydnj/winapi-rs?branch=aarch64)", ] @@ -1773,7 +1773,7 @@ name = "nsstring" version = "0.1.0" dependencies = [ "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", - "encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -1859,6 +1859,14 @@ dependencies = [ "stable_deref_trait 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "packed_simd" +version = "0.3.3" +source = "git+https://github.com/hsivonen/packed_simd?branch=rust_1_32#3541e3818fdc7c2a24f87e3459151a4ce955a67a" +dependencies = [ + "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "parking_lot" version = "0.6.3" @@ -2354,11 +2362,6 @@ dependencies = [ "opaque-debug 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "simd" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "siphasher" version = "0.2.1" @@ -2936,7 +2939,7 @@ name = "uuid" version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -3017,7 +3020,7 @@ dependencies = [ "bincode 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", "byteorder 1.2.7 (registry+https://github.com/rust-lang/crates.io-index)", - "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", "core-foundation 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", "core-graphics 0.17.1 (registry+https://github.com/rust-lang/crates.io-index)", "core-text 13.0.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -3253,7 +3256,7 @@ dependencies = [ "checksum cast 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "926013f2860c46252efceabb19f4a6b308197505082c609025aa6706c011d427" "checksum cc 1.0.23 (registry+https://github.com/rust-lang/crates.io-index)" = "c37f0efaa4b9b001fa6f02d4b644dee4af97d3414df07c51e3e4f015f3a3e131" "checksum cexpr 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8fc0086be9ca82f7fc89fc873435531cb898b86e850005850de1f820e2db6e9b" -"checksum cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d4c819a1287eb618df47cc647173c5c4c66ba19d888a6e50d605672aed3140de" +"checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4" "checksum chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "45912881121cb26fad7c38c17ba7daa18764771836b34fab7d3fbd93ed633878" "checksum clang-sys 0.26.1 (registry+https://github.com/rust-lang/crates.io-index)" = "481e42017c1416b1c0856ece45658ecbb7c93d8a93455f7e5fa77f3b35455557" "checksum clap 2.31.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f0f16b89cbb9ee36d87483dc939fe9f1e13c05898d56d7b230a0d4dff033a536" @@ -3303,7 +3306,7 @@ dependencies = [ "checksum either 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18785c1ba806c258137c937e44ada9ee7e69a37e3c72077542cd2f069d78562a" "checksum ena 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)" = "88dc8393b3c7352f94092497f6b52019643e493b6b890eb417cdb7c46117e621" "checksum encoding_c 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "769ecb8b33323998e482b218c0d13cd64c267609023b4b7ec3ee740714c318ee" -"checksum encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)" = "a69d152eaa438a291636c1971b0a370212165ca8a75759eb66818c5ce9b538f7" +"checksum encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)" = "0535f350c60aac0b87ccf28319abc749391e912192255b0c00a2c12c6917bd73" "checksum env_logger 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0561146661ae44c579e993456bc76d11ce1e0c7d745e57b2fa7146b6e49fa2ad" "checksum error-chain 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ff511d5dc435d703f4971bc399647c9bc38e20cb41452e3b9feb4765419ed3f3" "checksum euclid 0.19.5 (registry+https://github.com/rust-lang/crates.io-index)" = "d1a7698bdda3d7444a79d33bdc96e8b518d44ea3ff101d8492a6ca1207b886ea" @@ -3388,6 +3391,7 @@ dependencies = [ "checksum ordered-float 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2f0015e9e8e28ee20c581cfbfe47c650cedeb9ed0721090e0b7ebb10b9cdbcc2" "checksum ordermap 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a86ed3f5f244b372d6b1a00b72ef7f8876d0bc6a78a4c9985c53614041512063" "checksum owning_ref 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "49a4b8ea2179e6a2e27411d3bca09ca6dd630821cf6894c6c7c8467a8ee7ef13" +"checksum packed_simd 0.3.3 (git+https://github.com/hsivonen/packed_simd?branch=rust_1_32)" = "" "checksum parking_lot 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "69376b761943787ebd5cc85a5bc95958651a22609c5c1c2b65de21786baec72b" "checksum parking_lot_core 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "4db1a8ccf734a7bce794cc19b3df06ed87ab2f3907036b693c68f56b4d4537fa" "checksum peeking_take_while 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" @@ -3441,7 +3445,6 @@ dependencies = [ "checksum serde_json 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)" = "44dd2cfde475037451fa99b7e5df77aa3cfd1536575fa8e7a538ab36dcde49ae" "checksum sha2 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9eb6be24e4c23a84d7184280d2722f7f2731fcdd4a9d886efbfe4413e4847ea0" "checksum sha2 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b4d8bfd0e469f417657573d8451fb33d16cfe0989359b93baf3a1ffc639543d" -"checksum simd 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0048b17eb9577ac545c61d85c3559b41dfb4cbea41c9bd9ca6a4f73ff05fda84" "checksum siphasher 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2ffc669b726f2bc9a3bcff66e5e23b56ba6bf70e22a34c3d7b6d0b3450b65b84" "checksum slab 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "17b4fcaed89ab08ef143da37bc52adbcc04d4a69014f4c1208d6b51f0c47bc23" "checksum slab 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5f9776d6b986f77b35c6cf846c11ad986ff128fe0b2b63a3628e3755e8d3102d" diff --git a/Cargo.toml b/Cargo.toml index d64cbc77b53d..25859a20ecc3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -59,3 +59,4 @@ codegen-units = 1 libudev-sys = { path = "dom/webauthn/libudev-sys" } serde_derive = { git = "https://github.com/servo/serde", branch = "deserialize_from_enums9" } winapi = { git = "https://github.com/froydnj/winapi-rs", branch = "aarch64" } +packed_simd = { git = "https://github.com/hsivonen/packed_simd", branch = "rust_1_32" } diff --git a/third_party/rust/cfg-if/.cargo-checksum.json b/third_party/rust/cfg-if/.cargo-checksum.json index 89b14a227216..b744a21d9fd1 100644 --- a/third_party/rust/cfg-if/.cargo-checksum.json +++ b/third_party/rust/cfg-if/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.toml":"764b9ce160653e841430da3919ff968b957ff811f7da42c8483c8bfc2f06be25","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"378f5840b258e2779c39418f3f2d7b2ba96f1c7917dd6be0713f88305dbda397","README.md":"3fa9368c60bc701dea294fbacae0469188c4be1de79f82e972bb9b321776cd52","src/lib.rs":"6915169e3ca05f28e1cb0e052379d74f2496400de1240b74c56e55c2674a6560","tests/xcrate.rs":"30dcb70fbb9c96fda2b7825592558279f534776f72e2a8a0a3e26df4dedb3caa"},"package":"d4c819a1287eb618df47cc647173c5c4c66ba19d888a6e50d605672aed3140de"} \ No newline at end of file +{"files":{"Cargo.toml":"090d983ec20ad09e59f6b7679b48b9b54e9c0841cf2922b81cba485edcd40876","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"378f5840b258e2779c39418f3f2d7b2ba96f1c7917dd6be0713f88305dbda397","README.md":"1cd0ebc3b30a9c9eddb0fda5515b5a52ec2b85a087328f0ee9f4d68cbb28afc2","src/lib.rs":"f02d6e295109365cf54884e5282a3e7d1e1f62857c700f23cd013e94a56bd803","tests/xcrate.rs":"30dcb70fbb9c96fda2b7825592558279f534776f72e2a8a0a3e26df4dedb3caa"},"package":"082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"} \ No newline at end of file diff --git a/third_party/rust/cfg-if/Cargo.toml b/third_party/rust/cfg-if/Cargo.toml index 7afa063d1ef5..84c4fc7835ab 100644 --- a/third_party/rust/cfg-if/Cargo.toml +++ b/third_party/rust/cfg-if/Cargo.toml @@ -1,14 +1,24 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g. crates.io) dependencies +# +# If you believe there's an error in this file please file an +# issue against the rust-lang/cargo repository. If you're +# editing this file be aware that the upstream Cargo.toml +# will likely look very different (and much more reasonable) + [package] name = "cfg-if" -version = "0.1.2" +version = "0.1.6" authors = ["Alex Crichton "] -license = "MIT/Apache-2.0" +description = "A macro to ergonomically define an item depending on a large number of #[cfg]\nparameters. Structured like an if-else chain, the first matching branch is the\nitem that gets emitted.\n" +homepage = "https://github.com/alexcrichton/cfg-if" +documentation = "https://docs.rs/cfg-if" readme = "README.md" +license = "MIT/Apache-2.0" repository = "https://github.com/alexcrichton/cfg-if" -homepage = "https://github.com/alexcrichton/cfg-if" -documentation = "http://alexcrichton.com/cfg-if" -description = """ -A macro to ergonomically define an item depending on a large number of #[cfg] -parameters. Structured like an if-else chain, the first matching branch is the -item that gets emitted. -""" +[badges.travis-ci] +repository = "alexcrichton/cfg-if" diff --git a/third_party/rust/cfg-if/README.md b/third_party/rust/cfg-if/README.md index e9859dadb609..344a946c0487 100644 --- a/third_party/rust/cfg-if/README.md +++ b/third_party/rust/cfg-if/README.md @@ -2,7 +2,7 @@ [![Build Status](https://travis-ci.org/alexcrichton/cfg-if.svg?branch=master)](https://travis-ci.org/alexcrichton/cfg-if) -[Documentation](http://alexcrichton.com/cfg-if) +[Documentation](https://docs.rs/cfg-if) A macro to ergonomically define an item depending on a large number of #[cfg] parameters. Structured like an if-else chain, the first matching branch is the @@ -36,9 +36,17 @@ fn main() { # License -`cfg-if` is primarily distributed under the terms of both the MIT license and -the Apache License (Version 2.0), with portions covered by various BSD-like -licenses. +This project is licensed under either of -See LICENSE-APACHE, and LICENSE-MIT for details. + * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or + http://www.apache.org/licenses/LICENSE-2.0) + * MIT license ([LICENSE-MIT](LICENSE-MIT) or + http://opensource.org/licenses/MIT) +at your option. + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in Serde by you, as defined in the Apache-2.0 license, shall be +dual licensed as above, without any additional terms or conditions. diff --git a/third_party/rust/cfg-if/src/lib.rs b/third_party/rust/cfg-if/src/lib.rs index 563cda81f42d..ff144f69f862 100644 --- a/third_party/rust/cfg-if/src/lib.rs +++ b/third_party/rust/cfg-if/src/lib.rs @@ -1,81 +1,90 @@ -//! A macro for defining #[cfg] if-else statements. +//! A macro for defining `#[cfg]` if-else statements. //! //! The macro provided by this crate, `cfg_if`, is similar to the `if/elif` C //! preprocessor macro by allowing definition of a cascade of `#[cfg]` cases, //! emitting the implementation which matches first. //! -//! This allows you to conveniently provide a long list #[cfg]'d blocks of code +//! This allows you to conveniently provide a long list `#[cfg]`'d blocks of code //! without having to rewrite each clause multiple times. //! //! # Example //! //! ``` //! #[macro_use] //! extern crate cfg_if; //! //! cfg_if! { //! if #[cfg(unix)] { //! fn foo() { /* unix specific functionality */ } //! } else if #[cfg(target_pointer_width = "32")] { //! fn foo() { /* non-unix, 32-bit functionality */ } //! } else { //! fn foo() { /* fallback implementation */ } //! } //! } //! //! # fn main() {} //! ``` #![no_std] -#![doc(html_root_url = "http://alexcrichton.com/cfg-if")] +#![doc(html_root_url = "https://docs.rs/cfg-if")] #![deny(missing_docs)] #![cfg_attr(test, deny(warnings))] -#[macro_export] +#[macro_export(local_inner_macros)] macro_rules! cfg_if { + // match if/else chains with a final `else` ($( if #[cfg($($meta:meta),*)] { $($it:item)* } ) else * else { $($it2:item)* }) => { - __cfg_if_items! { + cfg_if! { + @__items () ; $( ( ($($meta),*) ($($it)*) ), )* ( () ($($it2)*) ), } }; + + // match if/else chains lacking a final `else` ( if #[cfg($($i_met:meta),*)] { $($i_it:item)* } $( else if #[cfg($($e_met:meta),*)] { $($e_it:item)* } )* ) => { - __cfg_if_items! { + cfg_if! { + @__items () ; ( ($($i_met),*) ($($i_it)*) ), $( ( ($($e_met),*) ($($e_it)*) ), )* ( () () ), } - } -} + }; -#[macro_export] -#[doc(hidden)] -macro_rules! __cfg_if_items { - (($($not:meta,)*) ; ) => {}; - (($($not:meta,)*) ; ( ($($m:meta),*) ($($it:item)*) ), $($rest:tt)*) => { - __cfg_if_apply! { cfg(all($($m,)* not(any($($not),*)))), $($it)* } - __cfg_if_items! { ($($not,)* $($m,)*) ; $($rest)* } - } -} + // Internal and recursive macro to emit all the items + // + // Collects all the negated cfgs in a list at the beginning and after the + // semicolon is all the remaining items + (@__items ($($not:meta,)*) ; ) => {}; + (@__items ($($not:meta,)*) ; ( ($($m:meta),*) ($($it:item)*) ), $($rest:tt)*) => { + // Emit all items within one block, applying an approprate #[cfg]. The + // #[cfg] will require all `$m` matchers specified and must also negate + // all previous matchers. + cfg_if! { @__apply cfg(all($($m,)* not(any($($not),*)))), $($it)* } -#[macro_export] -#[doc(hidden)] -macro_rules! __cfg_if_apply { - ($m:meta, $($it:item)*) => { + // Recurse to emit all other items in `$rest`, and when we do so add all + // our `$m` matchers to the list of `$not` matchers as future emissions + // will have to negate everything we just matched as well. + cfg_if! { @__items ($($not,)* $($m,)*) ; $($rest)* } + }; + + // Internal macro to Apply a cfg attribute to a list of items + (@__apply $m:meta, $($it:item)*) => { $(#[$m] $it)* - } + }; } #[cfg(test)] diff --git a/third_party/rust/encoding_rs/.cargo-checksum.json b/third_party/rust/encoding_rs/.cargo-checksum.json index c063d4c27534..7c1901dce515 100644 --- a/third_party/rust/encoding_rs/.cargo-checksum.json +++ b/third_party/rust/encoding_rs/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"CONTRIBUTING.md":"06c26277e8dbd3f57be2eb51b5e3285dc1cbbf8c11326df413868ae702e6a61c","COPYRIGHT":"8b98376eb373dcf81950474efe34b5576a8171460dff500cc58a1ed8d160cd57","Cargo.toml":"f4c9b33981fe222ef322d640f5ef680828d75dcd534b8aa2bfdd576598deea64","Ideas.md":"b7452893f500163868d8de52c09addaf91e1632454ed02e892c467ed7ec39dbd","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"f2ad48641d9c997d9ae3b95d93d1cd6e1ab12ab4c44de89937c7bfabbd076a4a","README.md":"ad140c9178067c8bdba8ae43ddffd0506d70d49474731247a050ff99a3ff7832","build.rs":"f5defca2c68b73e8723f489a9279af4fbe9724abc6e9abf58d32542e8a459e26","doc/Big5.txt":"f73a2edc5cb6c2d140ba6e07f4542e1c4a234950378acde1df93480f0ca0be0b","doc/EUC-JP.txt":"ee2818b907d0137f40a9ab9fd525fc700a44dbdddb6cf0c157a656566bae4bf1","doc/EUC-KR.txt":"71d9e2ccf3b124e8bdfb433c8cf2773fd878077038d0cec3c7237a50f4a78a30","doc/GBK.txt":"c1b522b5a799884e5001da661f42c5a8f4d0acb9ef1d74b206f22b5f65365606","doc/IBM866.txt":"a5a433e804d0f83af785015179fbc1d9b0eaf1f7960efcd04093e136b51fbd0e","doc/ISO-2022-JP.txt":"af86684f5a8f0e2868d7b2c292860140c3d2e5527530ca091f1b28198e8e2fe6","doc/ISO-8859-10.txt":"6d3949ad7c81ca176895101ed81a1db7df1060d64e262880b94bd31bb344ab4d","doc/ISO-8859-13.txt":"3951dd89cf93f7729148091683cf8511f4529388b7dc8dcd0d62eaed55be93fa","doc/ISO-8859-14.txt":"3d330784a0374fd255a38b47949675cc7168c800530534b0a01cac6edc623adc","doc/ISO-8859-15.txt":"24b1084aab5127a85aab99153f86e24694d0a3615f53b5ce23683f97cf66c47a","doc/ISO-8859-16.txt":"ce0272559b92ba76d7a7e476f6424ae4a5cc72e75b183611b08392e44add4d25","doc/ISO-8859-2.txt":"18ceff88c13d1b5ba455a3919b1e3de489045c4c3d2dd7e8527c125c75d54aad","doc/ISO-8859-3.txt":"21798404c68f4f5db59223362f24999da96968c0628427321fccce7d2849a130","doc/ISO-8859-4.txt":"d27f6520c6c5bfbcc19176b71d081cdb3bccde1622bb3e420d5680e812632d53","doc/ISO-8859-5.txt":"a10ec8d6ea7a78ad15da7275f6cb1a3365118527e28f9af6d0d5830501303f3a","doc/ISO-8859-6.txt":"ccda8a2efc96115336bdd77776637b9712425e44fbcf745353b9057fbef144e7","doc/ISO-8859-7.txt":"17900fa1f27a445958f0a77d7d9056be375a6bd7ee4492aa680c7c1500bab85e","doc/ISO-8859-8-I.txt":"8357555646d54265a9b9ffa3e68b08d132312f1561c60108ff9b8b1167b6ecf2","doc/ISO-8859-8.txt":"72cd6f3afb7b4a9c16a66a362473315770b7755d72c86c870e52fc3eba86c8af","doc/KOI8-R.txt":"839cf19a38da994488004ed7814b1f6151640156a9a2af02bf2efca745fb5966","doc/KOI8-U.txt":"0cc76624ed1f024183e2298b7e019957da2c70c8ca06e0fc4e6f353f50a5054f","doc/Shift_JIS.txt":"34c49141818cb9ddbcf59cc858f78a79be8ad148d563f26415108ae1f148443f","doc/UTF-16BE.txt":"e2e280d8acbaa6d2a6b3569d60e17500a285f2baa0df3363dd85537cd5a1ef8f","doc/UTF-16LE.txt":"70bdc170e3fc5298ba68f10125fb5eeb8b077036cc96bb4416c4de396f6d76c1","doc/UTF-8.txt":"ea7bae742e613010ced002cf4b601a737d2203fad65e115611451bc4428f548a","doc/gb18030.txt":"dc71378a8f07a2d8659f69ee81fb8791fef56ba86f124b429978285237bb4a7b","doc/macintosh.txt":"57491e53866711b4672d9b9ff35380b9dac9e0d8e3d6c20bdd6140603687c023","doc/replacement.txt":"4b6c3bbd7999d9d4108a281594bd02d13607e334a95465afff8c2c08d395f0e4","doc/windows-1250.txt":"61296bb6a21cdab602300d32ecfba434cb82de5ac3bc88d58710d2f125e28d39","doc/windows-1251.txt":"7deea1c61dea1485c8ff02db2c7d578db7a9aab63ab1cfd02ec04b515864689e","doc/windows-1252.txt":"933ef3bdddfce5ee132b9f1a1aa8b47423d2587bbe475b19028d0a6d38e180b6","doc/windows-1253.txt":"1a38748b88e99071a5c7b3d5456ead4caedeabab50d50d658be105bc113714de","doc/windows-1254.txt":"f8372f86c6f8d642563cd6ddc025260553292a39423df1683a98670bd7bf2b47","doc/windows-1255.txt":"4e5852494730054e2da258a74e1b9d780abbcdd8ce22ebc218ca2efe9e90493d","doc/windows-1256.txt":"c0879c5172abedead302a406e8f60d9cd9598694a0ffa4fd288ffe4fef7b8ea1","doc/windows-1257.txt":"c28a0c9f964fcb2b46d21f537c402446501a2800670481d6abf9fd9e9018d523","doc/windows-1258.txt":"5019ae4d61805c79aacbf17c93793342dbb098d65a1837783bc3e2c6d6a23602","doc/windows-874.txt":"4ef0e4501c5feba8b17aee1818602ed44b36ca8475db771ce2fc16d392cabecc","doc/x-mac-cyrillic.txt":"58be154d8a888ca3d484b83b44f749823ef339ab27f14d90ca9a856f5050a8bd","doc/x-user-defined.txt":"f9cd07c4321bf5cfb0be4bdddd251072999b04a6cf7a6f5bc63709a84e2c1ffc","generate-encoding-data.py":"92ddec35a834b6bc815fffffe6d07d9938a90d3c4526298637d8624410d83078","rustfmt.toml":"85c1a3b4382fd89e991cbb81b70fb52780472edc064c963943cdaaa56e0a2030","src/ascii.rs":"800cfbe3036d0c97ce27e07a4fd05edbcb7354ebec20903d81c76136d734931c","src/big5.rs":"1c94b35813314775c3fa1b10923cf8e8f8eba8c465d9833ad4293594e16c17f2","src/data.rs":"9544c019c7360a669bd3adaa90b70331124abd1df59841db66e74912bcdb96a5","src/euc_jp.rs":"0842e4f564a36051c6b85c47bbb652efae2f2926e91491daf77e4ceeecb18163","src/euc_kr.rs":"8e68590efa65485583bf57cae44ebf6de535bac1d37232e7f0307a38425fb992","src/gb18030.rs":"d269efb5e5d175f9d2ecf01d5606955a284b6f00749bb0ee23d3412c83aa3d59","src/handles.rs":"71aa7de1c5236a34ea0a8bb85332987751d2466b756fca6b3f6ac0da765cf91e","src/iso_2022_jp.rs":"3adc380736f24a5de36bc1cf81049bbe64473de10e6f12774195e6213c27c322","src/lib.rs":"e786de9e92e5652bc200266cf318753eea869e8971857cc0caa65a3cfe687545","src/macros.rs":"c7a019fd81d31de77569036ac36fd4e404b3f20144bbf79747faf4ea21538d09","src/mem.rs":"f412f60f2d4afb7e32ffba94dc5f93716e6ae9f065799ca17bb1f1b2145f6ee4","src/replacement.rs":"182c2093a6edb162183ca5990554fd7b199d3011924a8d80d894ba98ee7c479e","src/shift_jis.rs":"1c0c69ba6c123fcf720276646074660193bf9e6fa4327fe0d739a3e67874e081","src/simd_funcs.rs":"565ceeffe81173b85700c55c396ab72068751ef809bea8e1cb1e6c7919f5a905","src/single_byte.rs":"383d325dedbf3295acd50d880db1cecc29b69efe332ae2a37367cf40bf138ac4","src/test_data/big5_in.txt":"4c5a8691f8dc717311889c63894026d2fb62725a86c4208ca274a9cc8d42a503","src/test_data/big5_in_ref.txt":"99d399e17750cf9c7cf30bb253dbfe35b81c4fcbdead93cfa48b1429213473c7","src/test_data/big5_out.txt":"6193ca97c297aa20e09396038d18e938bb7ea331c26f0f2454097296723a0b13","src/test_data/big5_out_ref.txt":"36567691f557df144f6cc520015a87038dfa156f296fcf103b56ae9a718be1fc","src/test_data/euc_kr_in.txt":"c86a7224f3215fa0d04e685622a752fdc72763e8ae076230c7fd62de57ec4074","src/test_data/euc_kr_in_ref.txt":"1f419f4ca47d708b54c73c461545a022ae2e20498fdbf8005a483d752a204883","src/test_data/euc_kr_out.txt":"e7f32e026f70be1e1b58e0047baf7d3d2c520269c4f9b9992e158b4decb0a1a3","src/test_data/euc_kr_out_ref.txt":"c9907857980b20b8e9e3b584482ed6567a2be6185d72237b6322f0404944924e","src/test_data/gb18030_in.txt":"ab7231b2d3e9afacdbd7d7f3b9e5361a7ff9f7e1cfdb4f3bd905b9362b309e53","src/test_data/gb18030_in_ref.txt":"dc5069421adca2043c55f5012b55a76fdff651d22e6e699fd0978f8d5706815c","src/test_data/gb18030_out.txt":"f0208d527f5ca63de7d9a0323be8d5cf12d8a104b2943d92c2701f0c3364dac1","src/test_data/gb18030_out_ref.txt":"6819fe47627e4ea01027003fc514b9f21a1322e732d7f1fb92cc6c5455bc6c07","src/test_data/iso_2022_jp_in.txt":"cd24bbdcb1834e25db54646fbf4c41560a13dc7540f6be3dba4f5d97d44513af","src/test_data/iso_2022_jp_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/iso_2022_jp_out.txt":"9b6f015329dda6c3f9ee5ce6dbd6fa9c89acc21283e886836c78b8d833480c21","src/test_data/iso_2022_jp_out_ref.txt":"78cb260093a20116ad9a42f43b05d1848c5ab100b6b9a850749809e943884b35","src/test_data/jis0208_in.txt":"6df3030553ffb0a6615bb33dc8ea9dca6d9623a9028e2ffec754ce3c3da824cc","src/test_data/jis0208_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/jis0208_out.txt":"4ec24477e1675ce750733bdc3c5add1cd27b6bd4ce1f09289564646e9654e857","src/test_data/jis0208_out_ref.txt":"c3e1cef5032b2b1d93a406f31ff940c4e2dfe8859b8b17ca2761fee7a75a0e48","src/test_data/jis0212_in.txt":"c011f0dd72bd7c8cd922df9374ef8d2769a77190514c77f6c62b415852eeb9fe","src/test_data/jis0212_in_ref.txt":"7d9458b3d2f73e7092a7f505c08ce1d233dde18aa679fbcf9889256239cc9e06","src/test_data/shift_jis_in.txt":"02e389ccef0dd2122e63f503899402cb7f797912c2444cc80ab93131116c5524","src/test_data/shift_jis_in_ref.txt":"512f985950ca902e643c88682dba9708b7c38d3c5ec2925168ab00ac94ab19f9","src/test_data/shift_jis_out.txt":"5fbc44da7bf639bf6cfe0fa1fd3eba7102b88f81919c9ea991302712f69426fb","src/test_data/shift_jis_out_ref.txt":"466322c6fed8286c64582731755290c2296508efdd258826e6279686649b481f","src/test_labels_names.rs":"c962c7aeac3d9ef2aca70c9e21983b231d4cf998cb06879374b0401e5149d1da","src/testing.rs":"b299d27055f3b068de66cc10a75c024b881c48bc093627c01e0b1f8bd7d94666","src/utf_16.rs":"1ec4e1c8ed7e42e4de401c6d0f64c2835bd80c2a306f358959957d30e6ff1501","src/utf_8.rs":"f639fc5dccd5dcc2458936baa942237d0fd58ac398c83ea3f48e51dceb5b6a81","src/variant.rs":"619a8e604d2febe6a874e3ad73cddf3ef9e6011480aecf86f23708b313415251","src/x_user_defined.rs":"ab26ea900c8f7b7a4d1172872b7ca4bc573bc60b7b1979c93aafdfb86b2c2235"},"package":"a69d152eaa438a291636c1971b0a370212165ca8a75759eb66818c5ce9b538f7"} \ No newline at end of file +{"files":{"CONTRIBUTING.md":"06c26277e8dbd3f57be2eb51b5e3285dc1cbbf8c11326df413868ae702e6a61c","COPYRIGHT":"8b98376eb373dcf81950474efe34b5576a8171460dff500cc58a1ed8d160cd57","Cargo.toml":"fd56e8d662553f0cc559f8ef7097effefbc815ac3485799b37dee9df08ec803c","Ideas.md":"b7452893f500163868d8de52c09addaf91e1632454ed02e892c467ed7ec39dbd","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"f2ad48641d9c997d9ae3b95d93d1cd6e1ab12ab4c44de89937c7bfabbd076a4a","README.md":"8ae2a3548dee23c19e20564a90e2fd0dfa600cf4c2dfcc538f3455f4462d7133","build.rs":"82747097b0bb8999cdaf689a9e46195f6df5d691ee90bcde8a7b79f16bd976f0","doc/Big5.txt":"f73a2edc5cb6c2d140ba6e07f4542e1c4a234950378acde1df93480f0ca0be0b","doc/EUC-JP.txt":"ee2818b907d0137f40a9ab9fd525fc700a44dbdddb6cf0c157a656566bae4bf1","doc/EUC-KR.txt":"71d9e2ccf3b124e8bdfb433c8cf2773fd878077038d0cec3c7237a50f4a78a30","doc/GBK.txt":"c1b522b5a799884e5001da661f42c5a8f4d0acb9ef1d74b206f22b5f65365606","doc/IBM866.txt":"a5a433e804d0f83af785015179fbc1d9b0eaf1f7960efcd04093e136b51fbd0e","doc/ISO-2022-JP.txt":"af86684f5a8f0e2868d7b2c292860140c3d2e5527530ca091f1b28198e8e2fe6","doc/ISO-8859-10.txt":"6d3949ad7c81ca176895101ed81a1db7df1060d64e262880b94bd31bb344ab4d","doc/ISO-8859-13.txt":"3951dd89cf93f7729148091683cf8511f4529388b7dc8dcd0d62eaed55be93fa","doc/ISO-8859-14.txt":"3d330784a0374fd255a38b47949675cc7168c800530534b0a01cac6edc623adc","doc/ISO-8859-15.txt":"24b1084aab5127a85aab99153f86e24694d0a3615f53b5ce23683f97cf66c47a","doc/ISO-8859-16.txt":"ce0272559b92ba76d7a7e476f6424ae4a5cc72e75b183611b08392e44add4d25","doc/ISO-8859-2.txt":"18ceff88c13d1b5ba455a3919b1e3de489045c4c3d2dd7e8527c125c75d54aad","doc/ISO-8859-3.txt":"21798404c68f4f5db59223362f24999da96968c0628427321fccce7d2849a130","doc/ISO-8859-4.txt":"d27f6520c6c5bfbcc19176b71d081cdb3bccde1622bb3e420d5680e812632d53","doc/ISO-8859-5.txt":"a10ec8d6ea7a78ad15da7275f6cb1a3365118527e28f9af6d0d5830501303f3a","doc/ISO-8859-6.txt":"ccda8a2efc96115336bdd77776637b9712425e44fbcf745353b9057fbef144e7","doc/ISO-8859-7.txt":"17900fa1f27a445958f0a77d7d9056be375a6bd7ee4492aa680c7c1500bab85e","doc/ISO-8859-8-I.txt":"8357555646d54265a9b9ffa3e68b08d132312f1561c60108ff9b8b1167b6ecf2","doc/ISO-8859-8.txt":"72cd6f3afb7b4a9c16a66a362473315770b7755d72c86c870e52fc3eba86c8af","doc/KOI8-R.txt":"839cf19a38da994488004ed7814b1f6151640156a9a2af02bf2efca745fb5966","doc/KOI8-U.txt":"0cc76624ed1f024183e2298b7e019957da2c70c8ca06e0fc4e6f353f50a5054f","doc/Shift_JIS.txt":"34c49141818cb9ddbcf59cc858f78a79be8ad148d563f26415108ae1f148443f","doc/UTF-16BE.txt":"e2e280d8acbaa6d2a6b3569d60e17500a285f2baa0df3363dd85537cd5a1ef8f","doc/UTF-16LE.txt":"70bdc170e3fc5298ba68f10125fb5eeb8b077036cc96bb4416c4de396f6d76c1","doc/UTF-8.txt":"ea7bae742e613010ced002cf4b601a737d2203fad65e115611451bc4428f548a","doc/gb18030.txt":"dc71378a8f07a2d8659f69ee81fb8791fef56ba86f124b429978285237bb4a7b","doc/macintosh.txt":"57491e53866711b4672d9b9ff35380b9dac9e0d8e3d6c20bdd6140603687c023","doc/replacement.txt":"4b6c3bbd7999d9d4108a281594bd02d13607e334a95465afff8c2c08d395f0e4","doc/windows-1250.txt":"61296bb6a21cdab602300d32ecfba434cb82de5ac3bc88d58710d2f125e28d39","doc/windows-1251.txt":"7deea1c61dea1485c8ff02db2c7d578db7a9aab63ab1cfd02ec04b515864689e","doc/windows-1252.txt":"933ef3bdddfce5ee132b9f1a1aa8b47423d2587bbe475b19028d0a6d38e180b6","doc/windows-1253.txt":"1a38748b88e99071a5c7b3d5456ead4caedeabab50d50d658be105bc113714de","doc/windows-1254.txt":"f8372f86c6f8d642563cd6ddc025260553292a39423df1683a98670bd7bf2b47","doc/windows-1255.txt":"4e5852494730054e2da258a74e1b9d780abbcdd8ce22ebc218ca2efe9e90493d","doc/windows-1256.txt":"c0879c5172abedead302a406e8f60d9cd9598694a0ffa4fd288ffe4fef7b8ea1","doc/windows-1257.txt":"c28a0c9f964fcb2b46d21f537c402446501a2800670481d6abf9fd9e9018d523","doc/windows-1258.txt":"5019ae4d61805c79aacbf17c93793342dbb098d65a1837783bc3e2c6d6a23602","doc/windows-874.txt":"4ef0e4501c5feba8b17aee1818602ed44b36ca8475db771ce2fc16d392cabecc","doc/x-mac-cyrillic.txt":"58be154d8a888ca3d484b83b44f749823ef339ab27f14d90ca9a856f5050a8bd","doc/x-user-defined.txt":"f9cd07c4321bf5cfb0be4bdddd251072999b04a6cf7a6f5bc63709a84e2c1ffc","generate-encoding-data.py":"92ddec35a834b6bc815fffffe6d07d9938a90d3c4526298637d8624410d83078","rustfmt.toml":"85c1a3b4382fd89e991cbb81b70fb52780472edc064c963943cdaaa56e0a2030","src/ascii.rs":"800cfbe3036d0c97ce27e07a4fd05edbcb7354ebec20903d81c76136d734931c","src/big5.rs":"1c94b35813314775c3fa1b10923cf8e8f8eba8c465d9833ad4293594e16c17f2","src/data.rs":"9544c019c7360a669bd3adaa90b70331124abd1df59841db66e74912bcdb96a5","src/euc_jp.rs":"0842e4f564a36051c6b85c47bbb652efae2f2926e91491daf77e4ceeecb18163","src/euc_kr.rs":"8e68590efa65485583bf57cae44ebf6de535bac1d37232e7f0307a38425fb992","src/gb18030.rs":"d269efb5e5d175f9d2ecf01d5606955a284b6f00749bb0ee23d3412c83aa3d59","src/handles.rs":"0646bd091892ff7a76f34efccda4e5ddabe1e624e890baa9fdc9d48011d2d38b","src/iso_2022_jp.rs":"3adc380736f24a5de36bc1cf81049bbe64473de10e6f12774195e6213c27c322","src/lib.rs":"e2917fb9f605662ec4705d8c0b3c179f2264697a761191c3ec8101748cf717dc","src/macros.rs":"c7a019fd81d31de77569036ac36fd4e404b3f20144bbf79747faf4ea21538d09","src/mem.rs":"5498de31e816f51348b8d298d4fc9568da6b0b9363146f87ca5503131d33397f","src/replacement.rs":"182c2093a6edb162183ca5990554fd7b199d3011924a8d80d894ba98ee7c479e","src/shift_jis.rs":"1c0c69ba6c123fcf720276646074660193bf9e6fa4327fe0d739a3e67874e081","src/simd_funcs.rs":"857e61c1bda9d65286c23a6c3910d6814680bbc3064bf0ff92de5bc4f3edb6f3","src/single_byte.rs":"383d325dedbf3295acd50d880db1cecc29b69efe332ae2a37367cf40bf138ac4","src/test_data/big5_in.txt":"4c5a8691f8dc717311889c63894026d2fb62725a86c4208ca274a9cc8d42a503","src/test_data/big5_in_ref.txt":"99d399e17750cf9c7cf30bb253dbfe35b81c4fcbdead93cfa48b1429213473c7","src/test_data/big5_out.txt":"6193ca97c297aa20e09396038d18e938bb7ea331c26f0f2454097296723a0b13","src/test_data/big5_out_ref.txt":"36567691f557df144f6cc520015a87038dfa156f296fcf103b56ae9a718be1fc","src/test_data/euc_kr_in.txt":"c86a7224f3215fa0d04e685622a752fdc72763e8ae076230c7fd62de57ec4074","src/test_data/euc_kr_in_ref.txt":"1f419f4ca47d708b54c73c461545a022ae2e20498fdbf8005a483d752a204883","src/test_data/euc_kr_out.txt":"e7f32e026f70be1e1b58e0047baf7d3d2c520269c4f9b9992e158b4decb0a1a3","src/test_data/euc_kr_out_ref.txt":"c9907857980b20b8e9e3b584482ed6567a2be6185d72237b6322f0404944924e","src/test_data/gb18030_in.txt":"ab7231b2d3e9afacdbd7d7f3b9e5361a7ff9f7e1cfdb4f3bd905b9362b309e53","src/test_data/gb18030_in_ref.txt":"dc5069421adca2043c55f5012b55a76fdff651d22e6e699fd0978f8d5706815c","src/test_data/gb18030_out.txt":"f0208d527f5ca63de7d9a0323be8d5cf12d8a104b2943d92c2701f0c3364dac1","src/test_data/gb18030_out_ref.txt":"6819fe47627e4ea01027003fc514b9f21a1322e732d7f1fb92cc6c5455bc6c07","src/test_data/iso_2022_jp_in.txt":"cd24bbdcb1834e25db54646fbf4c41560a13dc7540f6be3dba4f5d97d44513af","src/test_data/iso_2022_jp_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/iso_2022_jp_out.txt":"9b6f015329dda6c3f9ee5ce6dbd6fa9c89acc21283e886836c78b8d833480c21","src/test_data/iso_2022_jp_out_ref.txt":"78cb260093a20116ad9a42f43b05d1848c5ab100b6b9a850749809e943884b35","src/test_data/jis0208_in.txt":"6df3030553ffb0a6615bb33dc8ea9dca6d9623a9028e2ffec754ce3c3da824cc","src/test_data/jis0208_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/jis0208_out.txt":"4ec24477e1675ce750733bdc3c5add1cd27b6bd4ce1f09289564646e9654e857","src/test_data/jis0208_out_ref.txt":"c3e1cef5032b2b1d93a406f31ff940c4e2dfe8859b8b17ca2761fee7a75a0e48","src/test_data/jis0212_in.txt":"c011f0dd72bd7c8cd922df9374ef8d2769a77190514c77f6c62b415852eeb9fe","src/test_data/jis0212_in_ref.txt":"7d9458b3d2f73e7092a7f505c08ce1d233dde18aa679fbcf9889256239cc9e06","src/test_data/shift_jis_in.txt":"02e389ccef0dd2122e63f503899402cb7f797912c2444cc80ab93131116c5524","src/test_data/shift_jis_in_ref.txt":"512f985950ca902e643c88682dba9708b7c38d3c5ec2925168ab00ac94ab19f9","src/test_data/shift_jis_out.txt":"5fbc44da7bf639bf6cfe0fa1fd3eba7102b88f81919c9ea991302712f69426fb","src/test_data/shift_jis_out_ref.txt":"466322c6fed8286c64582731755290c2296508efdd258826e6279686649b481f","src/test_labels_names.rs":"c962c7aeac3d9ef2aca70c9e21983b231d4cf998cb06879374b0401e5149d1da","src/testing.rs":"b299d27055f3b068de66cc10a75c024b881c48bc093627c01e0b1f8bd7d94666","src/utf_16.rs":"1ec4e1c8ed7e42e4de401c6d0f64c2835bd80c2a306f358959957d30e6ff1501","src/utf_8.rs":"f639fc5dccd5dcc2458936baa942237d0fd58ac398c83ea3f48e51dceb5b6a81","src/variant.rs":"619a8e604d2febe6a874e3ad73cddf3ef9e6011480aecf86f23708b313415251","src/x_user_defined.rs":"da51def859b870ced29cb87987f02d27b220eac0f222876cb72a1dc616f9d8ec"},"package":"0535f350c60aac0b87ccf28319abc749391e912192255b0c00a2c12c6917bd73"} \ No newline at end of file diff --git a/third_party/rust/encoding_rs/Cargo.toml b/third_party/rust/encoding_rs/Cargo.toml index 65fc8e8dffcd..e29f19fb9afe 100644 --- a/third_party/rust/encoding_rs/Cargo.toml +++ b/third_party/rust/encoding_rs/Cargo.toml @@ -12,47 +12,47 @@ [package] name = "encoding_rs" -version = "0.8.14" +version = "0.8.16" authors = ["Henri Sivonen "] description = "A Gecko-oriented implementation of the Encoding Standard" homepage = "https://docs.rs/encoding_rs/" documentation = "https://docs.rs/encoding_rs/" readme = "README.md" keywords = ["encoding", "web", "unicode", "charset"] categories = ["text-processing", "encoding", "web-programming", "internationalization"] license = "MIT/Apache-2.0" repository = "https://github.com/hsivonen/encoding_rs" [profile.release] lto = true [dependencies.cfg-if] version = "0.1.0" +[dependencies.packed_simd] +version = "0.3.3" +optional = true + [dependencies.serde] version = "1.0" optional = true - -[dependencies.simd] -version = "0.2.3" -optional = true [dev-dependencies.bincode] version = "0.8" [dev-dependencies.serde_derive] version = "1.0" [dev-dependencies.serde_json] version = "1.0" [features] fast-big5-hanzi-encode = [] fast-gb-hanzi-encode = [] fast-hangul-encode = [] fast-hanja-encode = [] fast-kanji-encode = [] fast-legacy-encode = ["fast-hangul-encode", "fast-hanja-encode", "fast-kanji-encode", "fast-gb-hanzi-encode", "fast-big5-hanzi-encode"] less-slow-big5-hanzi-encode = [] less-slow-gb-hanzi-encode = [] less-slow-kanji-encode = [] -simd-accel = ["simd"] +simd-accel = ["packed_simd", "packed_simd/into_bits"] [badges.travis-ci] repository = "hsivonen/encoding_rs" diff --git a/third_party/rust/encoding_rs/README.md b/third_party/rust/encoding_rs/README.md index 3446efd0bb43..8a72b515450e 100644 --- a/third_party/rust/encoding_rs/README.md +++ b/third_party/rust/encoding_rs/README.md @@ -126,17 +126,39 @@ There are currently these optional cargo features: ### `simd-accel` -Enables SSE2 acceleration on x86 and x86_64 and NEON acceleration on Aarch64 -and ARMv7. _Enabling this cargo feature is recommended when building for x86, -x86_64, ARMv7 or Aarch64._ The intention is for the functionality enabled by -this feature to become the normal on-by-default behavior once -[portable SIMD](https://github.com/rust-lang/rfcs/pull/2366) becames part of -stable Rust. - -Enabling this feature breaks the build unless the target is x86 with SSE2 -(Rust's default 32-bit x86 target, `i686`, has SSE2, but Linux distros may -use an x86 target without SSE2, i.e. `i586` in `rustup` terms), ARMv7 or -thumbv7 with NEON (`-C target_feature=+neon`), x86_64 or Aarch64. +Enables SIMD acceleration using the nightly-dependent `packed_simd` crate. + +This is an opt-in feature, because enabling this feature _opts out_ of Rust's +guarantees of future compilers compiling old code (aka. "stability story"). + +Currently, this has not been tested to be an improvement except for these +targets: + +* x86_64 +* i686 +* aarch64 +* thumbv7neon + +If you use nightly Rust, you use targets whose first component is one of the +above, and you are prepared _to have to revise your configuration when updating +Rust_, you should enable this feature. Otherwise, please _do not_ enable this +feature. + +_Note!_ If you are compiling for a target that does not have 128-bit SIMD +enabled as part of the target definition and you are enabling 128-bit SIMD +using `-C target_feature`, you need to enable the `core_arch` Cargo feature +for `packed_simd` to compile a crates.io snapshot of `core_arch` instead of +using the standard-library copy of `core::arch`, because the `core::arch` +module of the pre-compiled standard library has been compiled with the +assumption that the CPU doesn't have 128-bit SIMD. At present this applies +mainly to 32-bit ARM targets whose first component does not include the +substring `neon`. + +The encoding_rs side of things has not been properly set up for POWER, +PowerPC, MIPS, etc., SIMD at this time, so even if you were to follow +the advice from the previous paragraph, you probably shouldn't use +the `simd-accel` option on the less mainstream architectures at this +time. Used by Firefox. @@ -382,6 +404,14 @@ To regenerate the generated code: ## Release Notes +### 0.8.16 + +* Switch from the `simd` crate to `packed_simd`. + +### 0.8.15 + +* Adjust documentation for `simd-accel` (README-only release). + ### 0.8.14 * Made UTF-16 to UTF-8 encode conversion fill the output buffer as diff --git a/third_party/rust/encoding_rs/build.rs b/third_party/rust/encoding_rs/build.rs index 1b7adf780010..e687878081f7 100644 --- a/third_party/rust/encoding_rs/build.rs +++ b/third_party/rust/encoding_rs/build.rs @@ -1,4 +1,12 @@ fn main() { + // This does not enable `RUSTC_BOOTSTRAP=1` for `packed_simd`. + // You still need to knowingly have a setup that makes + // `packed_simd` compile. Therefore, having this file on + // crates.io is harmless in terms of users of `encoding_rs` + // accidentally depending on nightly features. Having this + // here means that if you knowingly want this, you only + // need to maintain a fork of `packed_simd` without _also_ + // having to maintain a fork of `encoding_rs`. #[cfg(feature = "simd-accel")] println!("cargo:rustc-env=RUSTC_BOOTSTRAP=1"); } diff --git a/third_party/rust/encoding_rs/src/handles.rs b/third_party/rust/encoding_rs/src/handles.rs index d75b65d75ce3..08da62d20051 100644 --- a/third_party/rust/encoding_rs/src/handles.rs +++ b/third_party/rust/encoding_rs/src/handles.rs @@ -34,7 +34,7 @@ use simd_funcs::*; all(target_endian = "little", target_feature = "neon") ) ))] -use simd::u16x8; +use packed_simd::u16x8; use super::DecoderResult; use super::EncoderResult; diff --git a/third_party/rust/encoding_rs/src/lib.rs b/third_party/rust/encoding_rs/src/lib.rs index 912c349a0e25..23069375d6f8 100644 --- a/third_party/rust/encoding_rs/src/lib.rs +++ b/third_party/rust/encoding_rs/src/lib.rs @@ -11,7 +11,7 @@ feature = "cargo-clippy", allow(doc_markdown, inline_always, new_ret_no_self) )] -#![doc(html_root_url = "https://docs.rs/encoding_rs/0.8.14")] +#![doc(html_root_url = "https://docs.rs/encoding_rs/0.8.16")] //! encoding_rs is a Gecko-oriented Free Software / Open Source implementation //! of the [Encoding Standard](https://encoding.spec.whatwg.org/) in Rust. @@ -665,20 +665,21 @@ //! See the section [_UTF-16LE, UTF-16BE and Unicode Encoding Schemes_](#utf-16le-utf-16be-and-unicode-encoding-schemes) //! for discussion about the UTF-16 family. -#![cfg_attr(feature = "simd-accel", feature(platform_intrinsics, core_intrinsics))] +#![cfg_attr(feature = "simd-accel", feature(stdsimd, core_intrinsics))] #[macro_use] extern crate cfg_if; #[cfg(all( feature = "simd-accel", any( target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"), all(target_endian = "little", target_feature = "neon") ) ))] -extern crate simd; +#[macro_use(shuffle)] +extern crate packed_simd; #[cfg(feature = "serde")] extern crate serde; diff --git a/third_party/rust/encoding_rs/src/mem.rs b/third_party/rust/encoding_rs/src/mem.rs index 6cd1a4448056..c5ee605c1b13 100644 --- a/third_party/rust/encoding_rs/src/mem.rs +++ b/third_party/rust/encoding_rs/src/mem.rs @@ -228,8 +228,8 @@ macro_rules! by_unit_check_simd { cfg_if! { if #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"), all(target_endian = "little", target_feature = "neon"))))] { use simd_funcs::*; - use simd::u8x16; - use simd::u16x8; + use packed_simd::u8x16; + use packed_simd::u16x8; const SIMD_ALIGNMENT: usize = 16; @@ -631,47 +631,42 @@ cfg_if! { /// /// May read the entire buffer even if it isn't all-ASCII. (I.e. the function /// is not guaranteed to fail fast.) -#[inline] pub fn is_ascii(buffer: &[u8]) -> bool { is_ascii_impl(buffer) } /// Checks whether the buffer is all-Basic Latin (i.e. UTF-16 representing /// only ASCII characters). /// /// May read the entire buffer even if it isn't all-ASCII. (I.e. the function /// is not guaranteed to fail fast.) -#[inline] pub fn is_basic_latin(buffer: &[u16]) -> bool { is_basic_latin_impl(buffer) } /// Checks whether the buffer is valid UTF-8 representing only code points /// less than or equal to U+00FF. /// /// Fails fast. (I.e. returns before having read the whole buffer if UTF-8 /// invalidity or code points above U+00FF are discovered. -#[inline] pub fn is_utf8_latin1(buffer: &[u8]) -> bool { is_utf8_latin1_impl(buffer).is_none() } /// Checks whether the buffer represents only code point less than or equal /// to U+00FF. /// /// Fails fast. (I.e. returns before having read the whole buffer if code /// points above U+00FF are discovered. -#[inline] pub fn is_str_latin1(buffer: &str) -> bool { is_str_latin1_impl(buffer).is_none() } /// Checks whether the buffer represents only code point less than or equal /// to U+00FF. /// /// May read the entire buffer even if it isn't all-Latin1. (I.e. the function /// is not guaranteed to fail fast.) -#[inline] pub fn is_utf16_latin1(buffer: &[u16]) -> bool { is_utf16_latin1_impl(buffer) } @@ -1283,7 +1278,6 @@ pub fn is_str_bidi(buffer: &str) -> bool { /// high surrogate that could be the high half of an RTL character. /// Returns `false` if the input contains neither RTL characters nor /// unpaired high surrogates that could be higher halves of RTL characters. -#[inline] pub fn is_utf16_bidi(buffer: &[u16]) -> bool { is_utf16_bidi_impl(buffer) } @@ -1416,67 +1410,63 @@ pub fn is_utf16_code_unit_bidi(u: u16) -> bool { /// Returns `Latin1Bidi::Latin1` if `is_utf8_latin1()` would return `true`. /// Otherwise, returns `Latin1Bidi::Bidi` if `is_utf8_bidi()` would return /// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`. -#[inline] pub fn check_utf8_for_latin1_and_bidi(buffer: &[u8]) -> Latin1Bidi { if let Some(offset) = is_utf8_latin1_impl(buffer) { if is_utf8_bidi(&buffer[offset..]) { Latin1Bidi::Bidi } else { Latin1Bidi::LeftToRight } } else { Latin1Bidi::Latin1 } } /// Checks whether a valid UTF-8 buffer contains code points /// that trigger right-to-left processing or is all-Latin1. /// /// Possibly more efficient than performing the checks separately. /// /// Returns `Latin1Bidi::Latin1` if `is_str_latin1()` would return `true`. /// Otherwise, returns `Latin1Bidi::Bidi` if `is_str_bidi()` would return /// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`. -#[inline] pub fn check_str_for_latin1_and_bidi(buffer: &str) -> Latin1Bidi { // The transition from the latin1 check to the bidi check isn't // optimal but not tweaking it to perfection today. if let Some(offset) = is_str_latin1_impl(buffer) { if is_str_bidi(&buffer[offset..]) { Latin1Bidi::Bidi } else { Latin1Bidi::LeftToRight } } else { Latin1Bidi::Latin1 } } /// Checks whether a potentially invalid UTF-16 buffer contains code points /// that trigger right-to-left processing or is all-Latin1. /// /// Possibly more efficient than performing the checks separately. /// /// Returns `Latin1Bidi::Latin1` if `is_utf16_latin1()` would return `true`. /// Otherwise, returns `Latin1Bidi::Bidi` if `is_utf16_bidi()` would return /// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`. -#[inline] pub fn check_utf16_for_latin1_and_bidi(buffer: &[u16]) -> Latin1Bidi { check_utf16_for_latin1_and_bidi_impl(buffer) } /// Converts potentially-invalid UTF-8 to valid UTF-16 with errors replaced /// with the REPLACEMENT CHARACTER. /// /// The length of the destination buffer must be at least the length of the /// source buffer _plus one_. /// /// Returns the number of `u16`s written. /// /// # Panics /// /// Panics if the destination buffer is shorter than stated above. -#[inline] pub fn convert_utf8_to_utf16(src: &[u8], dst: &mut [u16]) -> usize { // TODO: Can the requirement for dst to be at least one unit longer // be eliminated? @@ -1516,7 +1506,6 @@ pub fn convert_utf8_to_utf16(src: &[u8], dst: &mut [u16]) -> usize { /// # Panics /// /// Panics if the destination buffer is shorter than stated above. -#[inline] pub fn convert_str_to_utf16(src: &str, dst: &mut [u16]) -> usize { assert!( dst.len() >= src.len(), @@ -1683,7 +1672,6 @@ pub fn convert_utf16_to_utf8(src: &[u16], dst: &mut [u8]) -> usize { /// not allocating memory for the worst case up front. Specifically, /// if the input starts with or ends with an unpaired surrogate, those are /// replaced with the REPLACEMENT CHARACTER. -#[inline] pub fn convert_utf16_to_str_partial(src: &[u16], dst: &mut str) -> (usize, usize) { let bytes: &mut [u8] = unsafe { dst.as_bytes_mut() }; let (read, written) = convert_utf16_to_utf8_partial(src, bytes); @@ -1727,7 +1715,6 @@ pub fn convert_utf16_to_str(src: &[u16], dst: &mut str) -> usize { /// # Panics /// /// Panics if the destination buffer is shorter than stated above. -#[inline] pub fn convert_latin1_to_utf16(src: &[u8], dst: &mut [u16]) { assert!( dst.len() >= src.len(), @@ -1755,7 +1742,6 @@ pub fn convert_latin1_to_utf16(src: &[u8], dst: &mut [u16]) { /// indicated by the return value, so using a `&mut str` interpreted as /// `&mut [u8]` as the destination is not safe. If you want to convert into /// a `&mut str`, use `convert_utf16_to_str()` instead of this function. -#[inline] pub fn convert_latin1_to_utf8_partial(src: &[u8], dst: &mut [u8]) -> (usize, usize) { let src_len = src.len(); let src_ptr = src.as_ptr(); @@ -1894,7 +1880,6 @@ pub fn convert_latin1_to_str(src: &[u8], dst: &mut str) -> usize { /// /// If debug assertions are enabled (and not fuzzing) and the input is /// not in the range U+0000 to U+00FF, inclusive. -#[inline] pub fn convert_utf8_to_latin1_lossy(src: &[u8], dst: &mut [u8]) -> usize { assert!( dst.len() >= src.len(), @@ -1957,7 +1942,6 @@ pub fn convert_utf8_to_latin1_lossy(src: &[u8], dst: &mut [u8]) -> usize { /// /// (Probably in future versions if debug assertions are enabled (and not /// fuzzing) and the input is not in the range U+0000 to U+00FF, inclusive.) -#[inline] pub fn convert_utf16_to_latin1_lossy(src: &[u16], dst: &mut [u8]) { assert!( dst.len() >= src.len(), @@ -2030,7 +2014,6 @@ pub fn encode_latin1_lossy<'a>(string: &'a str) -> Cow<'a, [u8]> { /// Returns the index of the first unpaired surrogate or, if the input is /// valid UTF-16 in its entirety, the length of the input. -#[inline] pub fn utf16_valid_up_to(buffer: &[u16]) -> usize { utf16_valid_up_to_impl(buffer) } @@ -2060,61 +2043,58 @@ pub fn ensure_utf16_validity(buffer: &mut [u16]) { /// # Panics /// /// Panics if the destination buffer is shorter than stated above. -#[inline] pub fn copy_ascii_to_ascii(src: &[u8], dst: &mut [u8]) -> usize { assert!( dst.len() >= src.len(), "Destination must not be shorter than the source." ); if let Some((_, consumed)) = unsafe { ascii_to_ascii(src.as_ptr(), dst.as_mut_ptr(), src.len()) } { consumed } else { src.len() } } /// Copies ASCII from source to destination zero-extending it to UTF-16 up to /// the first non-ASCII byte (or the end of the input if it is ASCII in its /// entirety). /// /// The length of the destination buffer must be at least the length of the /// source buffer. /// /// Returns the number of `u16`s written. /// /// # Panics /// /// Panics if the destination buffer is shorter than stated above. -#[inline] pub fn copy_ascii_to_basic_latin(src: &[u8], dst: &mut [u16]) -> usize { assert!( dst.len() >= src.len(), "Destination must not be shorter than the source." ); if let Some((_, consumed)) = unsafe { ascii_to_basic_latin(src.as_ptr(), dst.as_mut_ptr(), src.len()) } { consumed } else { src.len() } } /// Copies Basic Latin from source to destination narrowing it to ASCII up to /// the first non-Basic Latin code unit (or the end of the input if it is /// Basic Latin in its entirety). /// /// The length of the destination buffer must be at least the length of the /// source buffer. /// /// Returns the number of bytes written. /// /// # Panics /// /// Panics if the destination buffer is shorter than stated above. -#[inline] pub fn copy_basic_latin_to_ascii(src: &[u16], dst: &mut [u8]) -> usize { assert!( dst.len() >= src.len(), diff --git a/third_party/rust/encoding_rs/src/simd_funcs.rs b/third_party/rust/encoding_rs/src/simd_funcs.rs index 0cc05baf784d..4e19b0e8a07e 100644 --- a/third_party/rust/encoding_rs/src/simd_funcs.rs +++ b/third_party/rust/encoding_rs/src/simd_funcs.rs @@ -7,9 +7,9 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use simd::u16x8; -use simd::u8x16; -use simd::Simd; +use packed_simd::u16x8; +use packed_simd::u8x16; +use packed_simd::FromBits; // TODO: Migrate unaligned access to stdlib code if/when the RFC // https://github.com/rust-lang/rfcs/pull/1725 is implemented. @@ -62,81 +62,79 @@ pub unsafe fn store8_aligned(ptr: *mut u16, s: u16x8) { *(ptr as *mut u16x8) = s; } -extern "platform-intrinsic" { - fn simd_shuffle16>(x: T, y: T, idx: [u32; 16]) -> U; +cfg_if! { + if #[cfg(all(target_feature = "sse2", target_arch = "x86_64"))] { + use std::arch::x86_64::__m128i; + use std::arch::x86_64::_mm_movemask_epi8; + use std::arch::x86_64::_mm_packus_epi16; + } else if #[cfg(all(target_feature = "sse2", target_arch = "x86"))] { + use std::arch::x86::__m128i; + use std::arch::x86::_mm_movemask_epi8; + use std::arch::x86::_mm_packus_epi16; + } else if #[cfg(target_arch = "aarch64")]{ + use std::arch::aarch64::uint8x16_t; + use std::arch::aarch64::uint16x8_t; + use std::arch::aarch64::vmaxvq_u8; + use std::arch::aarch64::vmaxvq_u16; + } else { + + } } // #[inline(always)] // fn simd_byte_swap_u8(s: u8x16) -> u8x16 { // unsafe { -// simd_shuffle16(s, s, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) +// shuffle!(s, s, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) // } // } // #[inline(always)] // pub fn simd_byte_swap(s: u16x8) -> u16x8 { // to_u16_lanes(simd_byte_swap_u8(to_u8_lanes(s))) // } #[inline(always)] pub fn simd_byte_swap(s: u16x8) -> u16x8 { let left = s << 8; let right = s >> 8; left | right } #[inline(always)] pub fn to_u16_lanes(s: u8x16) -> u16x8 { - unsafe { ::std::mem::transmute(s) } + u16x8::from_bits(s) } -// #[inline(always)] -// pub fn to_u8_lanes(s: u16x8) -> u8x16 { -// unsafe { ::std::mem::transmute(s) } -// } - cfg_if! { if #[cfg(target_feature = "sse2")] { - use simd::i16x8; - use simd::i8x16; - extern "platform-intrinsic" { - fn x86_mm_movemask_epi8(x: i8x16) -> i32; - } - // Expose low-level mask instead of higher-level conclusion, // because the non-ASCII case would perform less well otherwise. #[inline(always)] pub fn mask_ascii(s: u8x16) -> i32 { unsafe { - let signed: i8x16 = ::std::mem::transmute_copy(&s); - x86_mm_movemask_epi8(signed) + _mm_movemask_epi8(__m128i::from_bits(s)) } } } else { } } cfg_if! { if #[cfg(target_feature = "sse2")] { #[inline(always)] pub fn simd_is_ascii(s: u8x16) -> bool { unsafe { - let signed: i8x16 = ::std::mem::transmute_copy(&s); - x86_mm_movemask_epi8(signed) == 0 + _mm_movemask_epi8(__m128i::from_bits(s)) == 0 } } } else if #[cfg(target_arch = "aarch64")]{ - extern "platform-intrinsic" { - fn aarch64_vmaxvq_u8(x: u8x16) -> u8; - } - #[inline(always)] pub fn simd_is_ascii(s: u8x16) -> bool { unsafe { - aarch64_vmaxvq_u8(s) < 0x80 + vmaxvq_u8(uint8x16_t::from_bits(s)) < 0x80 } } } else { @@ -164,35 +162,31 @@ cfg_if! { #[inline(always)] pub fn simd_is_str_latin1(s: u8x16) -> bool { unsafe { - aarch64_vmaxvq_u8(s) < 0xC4 + vmaxvq_u8(uint8x16_t::from_bits(s)) < 0xC4 } } } else { #[inline(always)] pub fn simd_is_str_latin1(s: u8x16) -> bool { let above_str_latin1 = u8x16::splat(0xC4); s.lt(above_str_latin1).all() } } } cfg_if! { if #[cfg(target_arch = "aarch64")]{ - extern "platform-intrinsic" { - fn aarch64_vmaxvq_u16(x: u16x8) -> u16; - } - #[inline(always)] pub fn simd_is_basic_latin(s: u16x8) -> bool { unsafe { - aarch64_vmaxvq_u16(s) < 0x80 + vmaxvq_u16(uint16x8_t::from_bits(s)) < 0x80 } } #[inline(always)] pub fn simd_is_latin1(s: u16x8) -> bool { unsafe { - aarch64_vmaxvq_u16(s) < 0x100 + vmaxvq_u16(uint16x8_t::from_bits(s)) < 0x100 } } } else { @@ -225,7 +219,7 @@ cfg_if! { macro_rules! aarch64_return_false_if_below_hebrew { ($s:ident) => ({ unsafe { - if aarch64_vmaxvq_u16($s) < 0x0590 { + if vmaxvq_u16(uint16x8_t::from_bits($s)) < 0x0590 { return false; } } @@ -292,47 +286,38 @@ pub fn is_u16x8_bidi(s: u16x8) -> bool { #[inline(always)] pub fn simd_unpack(s: u8x16) -> (u16x8, u16x8) { unsafe { - let first: u8x16 = simd_shuffle16( + let first: u8x16 = shuffle!( s, u8x16::splat(0), - [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23], + [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] ); - let second: u8x16 = simd_shuffle16( + let second: u8x16 = shuffle!( s, u8x16::splat(0), - [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31], + [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] ); - ( - ::std::mem::transmute_copy(&first), - ::std::mem::transmute_copy(&second), - ) + (u16x8::from_bits(first), u16x8::from_bits(second)) } } cfg_if! { if #[cfg(target_feature = "sse2")] { - extern "platform-intrinsic" { - fn x86_mm_packus_epi16(x: i16x8, y: i16x8) -> u8x16; - } - #[inline(always)] pub fn simd_pack(a: u16x8, b: u16x8) -> u8x16 { unsafe { - let first: i16x8 = ::std::mem::transmute_copy(&a); - let second: i16x8 = ::std::mem::transmute_copy(&b); - x86_mm_packus_epi16(first, second) + u8x16::from_bits(_mm_packus_epi16(__m128i::from_bits(a), __m128i::from_bits(b))) } } } else { #[inline(always)] pub fn simd_pack(a: u16x8, b: u16x8) -> u8x16 { unsafe { - let first: u8x16 = ::std::mem::transmute_copy(&a); - let second: u8x16 = ::std::mem::transmute_copy(&b); - simd_shuffle16( + let first = u8x16::from_bits(a); + let second = u8x16::from_bits(b); + shuffle!( first, second, - [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30], + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] ) } } diff --git a/third_party/rust/encoding_rs/src/x_user_defined.rs b/third_party/rust/encoding_rs/src/x_user_defined.rs index 6d0d613fa093..2d2076987984 100644 --- a/third_party/rust/encoding_rs/src/x_user_defined.rs +++ b/third_party/rust/encoding_rs/src/x_user_defined.rs @@ -14,15 +14,12 @@ use variant::*; cfg_if! { if #[cfg(feature = "simd-accel")] { use simd_funcs::*; - use simd::u16x8; + use packed_simd::u16x8; #[inline(always)] fn shift_upper(unpacked: u16x8) -> u16x8 { let highest_ascii = u16x8::splat(0x7F); - let offset = u16x8::splat(0xF700); - let mask = unpacked.gt(highest_ascii).to_repr().to_u16(); - unpacked + (offset & mask) - } + unpacked + unpacked.gt(highest_ascii).select(u16x8::splat(0xF700), u16x8::splat(0)) } } else { } } diff --git a/third_party/rust/packed_simd/.appveyor.yml b/third_party/rust/packed_simd/.appveyor.yml new file mode 100644 index 000000000000..0388cee0a07b --- /dev/null +++ b/third_party/rust/packed_simd/.appveyor.yml @@ -0,0 +1,59 @@ +matrix: + allow_failures: + # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/72 + - TARGET: i686-pc-windows-msvc + - TARGET: i686-pc-windows-gnu + - TARGET: x86_64-pc-windows-gnu + fast_finish: true + +environment: + matrix: + - TARGET: x86_64-pc-windows-msvc + MSYSTEM: MINGW64 + NOVERIFY: "1" + - TARGET: x86_64-pc-windows-msvc + MSYSTEM: MINGW64 + RUSTFLAGS: "-C target-feature=+sse4.2" + NOVERIFY: "1" + - TARGET: x86_64-pc-windows-msvc + MSYSTEM: MINGW64 + RUSTFLAGS: "-C target-feature=+avx" + NOVERIFY: "1" + - TARGET: x86_64-pc-windows-msvc + MSYSTEM: MINGW64 + RUSTFLAGS: "-C target-feature=+avx2" + NOVERIFY: "1" + + - TARGET: i686-pc-windows-msvc + MSYSTEM: MINGW32 + NOVERIFY: "1" + - TARGET: i686-pc-windows-msvc + MSYSTEM: MINGW32 + RUSTFLAGS: "-C target-feature=+sse4.2" + NOVERIFY: "1" + - TARGET: i686-pc-windows-msvc + MSYSTEM: MINGW32 + RUSTFLAGS: "-C target-feature=+avx" + NOVERIFY: "1" + - TARGET: i686-pc-windows-msvc + MSYSTEM: MINGW32 + RUSTFLAGS: "-C target-feature=+avx2" + NOVERIFY: "1" + + - TARGET: x86_64-pc-windows-gnu + MSYSTEM: MINGW64 + + - TARGET: i686-pc-windows-gnu + MSYSTEM: MINGW32 + - TARGET: x86_64-pc-windows-gnu + MSYSTEM: MINGW64 +install: + - ps: if (ls -r . -fi "*.rs" | sls "`t") { throw "Found tab character" } + - ps: Start-FileDownload "https://static.rust-lang.org/dist/rust-nightly-${env:TARGET}.exe" -FileName "rust-install.exe" + - ps: .\rust-install.exe /VERYSILENT /NORESTART /DIR="C:\rust" | Out-Null + - ps: $env:PATH="$env:PATH;C:\rust\bin" + - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH% + - rustc -vV + - cargo -vV +build: false +test_script: bash -c "ci/run.sh" diff --git a/third_party/rust/packed_simd/.cargo-checksum.json b/third_party/rust/packed_simd/.cargo-checksum.json new file mode 100644 index 000000000000..01afcc1efdac --- /dev/null +++ b/third_party/rust/packed_simd/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{".appveyor.yml":"f1ed01850e0d725f9498f52a1a63ddf40702ad6e0bf5b2d7c4c04d76e96794a3",".travis.yml":"e9258d9a54fdaf4cbc12405fe5993ac4497eb2b29021691dbc91b19cb9b52227","Cargo.toml":"089941ba3c89ea111cbea3cc3abdcdcf2b9d0ae0db268d7269ee38226db950e5","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","bors.toml":"dee881dc69b9b7834e4eba5d95c3ed5a416d4628815a167d6a22d4cb4fb064b8","build.rs":"f3baefc5e5bb9b250e762a1466371b922fd7ee4243c217b2d014307603c2f57a","ci/all.sh":"a23d14e10cb26a0eb719e389c30eb955fa53cddcd436890646df09af640bd2eb","ci/android-install-ndk.sh":"0f1746108cc30bf9b9ba45bcde7b19fc1a8bdf5b0258035b4eb8dc69b75efac4","ci/android-install-sdk.sh":"3490432022c5c8f5a115c084f7a9aca1626f96c0c87ffb62019228c4346b47e4","ci/android-sysimage.sh":"ebf4e5daa1f0fe1b2092b79f0f3f161c4c4275cb744e52352c4d81ab451e4c5a","ci/benchmark.sh":"b61d19ef6b90deba8fb79dee74c8b062d94844676293da346da87bb78a9a49a4","ci/deploy_and_run_on_ios_simulator.rs":"ec8ecf82d92072676aa47f0d1a3d021b60a7ae3531153ef12d2ff4541fc294dc","ci/docker/aarch64-linux-android/Dockerfile":"ace2e7d33c87bc0f6d3962a4a3408c04557646f7f51ab99cfbf574906796b016","ci/docker/aarch64-unknown-linux-gnu/Dockerfile":"1ecdac757101d951794fb2ab0deaa278199cf25f2e08a15c7d40ff31a8556184","ci/docker/arm-linux-androideabi/Dockerfile":"370e55d3330a413a3ccf677b3afb3e0ef9018a5fab263faa97ae8ac017fc2286","ci/docker/arm-unknown-linux-gnueabi/Dockerfile":"e25d88f6c0c94aada3d2e3f08243f755feb7e869dc5dc505b3799719cb1af591","ci/docker/arm-unknown-linux-gnueabihf/Dockerfile":"f126f4c7bae8c11ab8b16df06ad997863f0838825a9c08c9899a3eedb6d570bd","ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile":"b647545c158ee480a4c581dbdc1f57833aef056c8d498acc04b573e842bf803c","ci/docker/i586-unknown-linux-gnu/Dockerfile":"0d492759017307ccf74dc2aa4a8cf6623daf3dc728c708dc2b18fa7940800cba","ci/docker/i686-unknown-linux-gnu/Dockerfile":"0d492759017307ccf74dc2aa4a8cf6623daf3dc728c708dc2b18fa7940800cba","ci/docker/mips-unknown-linux-gnu/Dockerfile":"323776469bb7b160385f3621d66e3ee14c75242f8180f916e65af048a29d4ea0","ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile":"c647f6948a9a43b0be695cbed4eac752120d0faf28e5e69c718cb10406921dab","ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile":"77bfd00cc8639509be381b394f077e39b45a00158ad61b4e1656714c714665d1","ci/docker/mipsel-unknown-linux-musl/Dockerfile":"ec5bea6c98a3b626731fdb95f9ff2d1182639c76e8fb16d3271d0fc884901524","ci/docker/powerpc-unknown-linux-gnu/Dockerfile":"4f2b662de66e83d1354f650b7077692309637f786c2ea5516c31b5c2ee10af2d","ci/docker/powerpc64-unknown-linux-gnu/Dockerfile":"a9595402b772bc365982e22a0096a8988825d90b09b5faa97ab192e76072f71d","ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile":"df3c381c157439695ae8cd10ab71664702c061e3b4ab22906a5ad6c2680acfed","ci/docker/s390x-unknown-linux-gnu/Dockerfile":"93fb44df3d7fd31ead158570667c97b5076a05c3d968af4a84bc13819a8f2db8","ci/docker/sparc64-unknown-linux-gnu/Dockerfile":"da1c39a3ff1fe22e41395fa7c8934e90b4c1788e551b9aec6e38bfd94effc437","ci/docker/thumbv7neon-linux-androideabi/Dockerfile":"c2decd5591bd7a09378901bef629cd944acf052eb55e4f35b79eb9cb4d62246a","ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile":"75c0c56161c7382b439de74c00de1c0e3dc9d59560cd6720976a751034b78714","ci/docker/wasm32-unknown-unknown/Dockerfile":"3e5f294bc1e004aa599086c2af49d6f3e7459fa250f5fbdd60cf67d53db78758","ci/docker/x86_64-linux-android/Dockerfile":"685040273cf350d5509e580ac451555efa19790c8723ca2af066adadc6880ad2","ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile":"44b6203d9290bfdc53d81219f0937e1110847a23dd982ec8c4de388354f01536","ci/docker/x86_64-unknown-linux-gnu/Dockerfile":"d253c86803b22da428fa9cc671a05f18d3318eca7733b8dccb4f7be1ddf524c5","ci/dox.sh":"5b61711be47a4e3dde0ddd15ba73d256ea95fd75af3897732c24db1dc7e66366","ci/linux-s390x.sh":"d6b732d7795b4ba131326aff893bca6228a7d2eb0e9402f135705413dbbe0dce","ci/linux-sparc64.sh":"c92966838b1ab7ad3b7a344833ee726aba6b647cf5952e56f0ad1ba420b13325","ci/lld-shim.rs":"3d7f71ec23a49e2b67f694a0168786f9a954dda15f5a138815d966643fd3fcc3","ci/max_line_width.sh":"0a1518bba4c9ecaa55694cb2e9930d0e19c265baabf73143f17f9cf285aaa5bb","ci/run-docker.sh":"92e036390ad9b0d16f109579df1b5ced2e72e9afea40c7d011400ebd3a2a90de","ci/run.sh":"63259e22a96ba539f53c06b1b39f53e3a78a71171652e7afc170836110ccd913","ci/run_examples.sh":"d1a23c6c35374a0678ba5114b9b8fefd8be0a79e774872a8bf0898d1baca18d0","ci/runtest-android.rs":"145a8e9799a5223975061fe7e586ade5669ee4877a7d7a4cf6b4ab48e8e36c7c","ci/setup_benchmarks.sh":"73fb981a8fdb1dcd54409d3c0fbbfb8f77a3ceabf8626a6b9bf9d21d6bc8ce72","ci/test-runner-linux":"c8aa6025cff5306f4f31d0c61dc5f9d4dd5a1d189ab613ef8d4c367c694d9ccd","contributing.md":"2cc8c9c560ae17867e69b06d09b758dbf7bc39eb774ada50a743724b10acc0a2","perf-guide/.gitignore":"fe82c7da551079d832cf74200b0b359b4df9828cb4a0416fa7384f07a2ae6a13","perf-guide/book.toml":"115a98284126c6b180178b44713314cc494f08a71662ee2ce15cf67f17a51064","perf-guide/src/SUMMARY.md":"3e03bffc991fdc2050f3d51842d72d9d21ea6abab56a3baf3b2d5973a78b89e1","perf-guide/src/ascii.css":"29afb08833b2fe2250f0412e1fa1161a2432a0820a14953c87124407417c741a","perf-guide/src/bound_checks.md":"5e4991ff58a183ef0cd9fdc1feb4cd12d083b44bdf87393bbb0927808ef3ce7d","perf-guide/src/float-math/approx.md":"8c09032fa2d795a0c5db1775826c850d28eb2627846d0965c60ee72de63735ad","perf-guide/src/float-math/fma.md":"311076ba4b741d604a82e74b83a8d7e8c318fcbd7f64c4392d1cf5af95c60243","perf-guide/src/float-math/fp.md":"04153e775ab6e4f0d7837bcc515230d327b04edfa34c84ce9c9e10ebaeef2be8","perf-guide/src/float-math/svml.md":"0798873b8eedaeda5fed62dc91645b57c20775a02d3cd74d8bd06958f1516506","perf-guide/src/introduction.md":"9f5a19e9e6751f25d2daad39891a0cc600974527ec4c8305843f9618910671bd","perf-guide/src/prof/linux.md":"447731eb5de7d69166728fdbc5ecb0c0c9db678ea493b45a592d67dd002184c0","perf-guide/src/prof/mca.md":"f56d54f3d20e7aa4d32052186e8237b03d65971eb5d112802b442570ff11d344","perf-guide/src/prof/profiling.md":"8a650c0fd6ede0964789bb6577557eeef1d8226a896788602ce61528e260e43c","perf-guide/src/target-feature/attribute.md":"615f88dca0a707b6c416fa605435dd6e1fb5361cc639429cbf68cd87624bd78b","perf-guide/src/target-feature/features.md":"17077760ff24c006b606dd21889c53d87228f4311f3ba3a574f9afdeacd86165","perf-guide/src/target-feature/inlining.md":"7ed1d7068d8173a00d84c16cfe5871cd68b9f04f8d0cca2d01ebc84957ebf2f6","perf-guide/src/target-feature/practice.md":"c4b371842e0086df178488fec97f20def8f0c62ee588bcd25fd948b9b1fa227e","perf-guide/src/target-feature/runtime.md":"835425f5ee597fb3e51d36e725a81ebee29f4561231d19563cd4da81dbb1cfcb","perf-guide/src/target-feature/rustflags.md":"ab49712e9293a65d74d540ba4784fcb57ff1119ec05a575d895c071f1a620f64","perf-guide/src/vert-hor-ops.md":"c6211c0ee91e60552ec592d89d9d957eedc21dee3cbd89e1ad6765ea06a27471","readme.md":"585a8f0e16877fb9abb00cd17a175fcb9d7857840c6c61209f1827ffab095070","rustfmt.toml":"de6101d0670bad65fb3b337d56957d2a024e017e5ab146ec784d77312daaf8ff","src/api.rs":"331a3a4abb19cee2df5f2df4ad7c3e88b45e62cf23fdacfc9bbaa633dc5cf788","src/api/bit_manip.rs":"e68290ee679cc5abc9c73afbe635c1035f8cbfe849e5c751a1680e459244c39e","src/api/cast.rs":"03b94a3d316ac7b7be7068810044911e965e889a0ace7bae762749ca74a92747","src/api/cast/macros.rs":"b0a14d0c83ad2ebb7a275180f6d9e3f2bc312ba57a7d3d6c39fad4e0f20f9408","src/api/cast/v128.rs":"63e28c6a3edf1a7a635f51b8d3c6adbb1d46f884d92a196b3d4a6e743d809416","src/api/cast/v16.rs":"2a584eeb57fd47baad6f3533764301b04aaaac23702b7a8db12598ac02899262","src/api/cast/v256.rs":"b91c15ed8d1536ecd97b4eb79ff9d5aba0552cd9b6f0ea6435b05f2273e23b3a","src/api/cast/v32.rs":"62ec89fcce7fa7f28497ee5770adc8f81d2d3a6b2925b02f7dc06504c40e8f38","src/api/cast/v512.rs":"d855cb943ae7106e9599ef38e30a3afb1c6bd5433178baca54cb128fd9a7d143","src/api/cast/v64.rs":"fe0f7dfaf4fc0c0c1a78c96fcfcdfdc2a1e2845843b11aa797a0c6fb52a8f774","src/api/cmp.rs":"357c3a2a09c6d4611c32dd7fa95be2fae933d513e229026ec9b44451a77b884e","src/api/cmp/eq.rs":"60f70f355bae4cb5b17db53204cacc3890f70670611c17df638d4c04f7cc8075","src/api/cmp/ord.rs":"589f7234761c294fa5df8f525bc4acd5a47cdb602207d524a0d4e19804cd9695","src/api/cmp/partial_eq.rs":"3ed23d2a930b0f9750c3a5309da766b03dc4f9c4d375b42ad3c50fe732693d15","src/api/cmp/partial_ord.rs":"e16b11805c94048acd058c93994b5bc74bb187f8d7e3b86a87df60e1601467f9","src/api/cmp/vertical.rs":"de3d62f38eba817299aa16f1e1939954c9a447e316509397465c2830852ba053","src/api/default.rs":"b61f92fc0e33a2633b3375eb405beba480da071cde03df4d437d8a6058afcd97","src/api/fmt.rs":"67fb804bb86b6cd77cf8cd492b5733ce437071b66fe3297278b8a6552c325dda","src/api/fmt/binary.rs":"35cb5c266197d6224d598fb3d286e5fe48ef0c01ed356c2ff6fe9ba946f96a92","src/api/fmt/debug.rs":"aa18eea443bf353fea3db8b1a025132bbcaf91e747ecfa43b8d9fce9af395a0c","src/api/fmt/lower_hex.rs":"69d5be366631af309f214e8031c8c20267fcc27a695eac6f45c6bc1df72a67e6","src/api/fmt/octal.rs":"9eb11ba3d990213f3c7f1ec25edba7ce997cb1320e16d308c83498ba6b9bfbd9","src/api/fmt/upper_hex.rs":"a4637d085b7bb20e759ce58e08435b510a563ba3dd468af2b03560fdc5511562","src/api/from.rs":"2e599d8329cb05eaf06224cc441355c4b7b51254fc19256619333be8c149d444","src/api/from/from_array.rs":"4151593c7bba7455821fffa5b59867005a77c95d32f1f0cc3fd87294000157d9","src/api/from/from_vector.rs":"9764371aa9e6005aace74dea14f59e5611a095b7cf42707940924749282c52f0","src/api/hash.rs":"562cfa3f1d8eb9a733c035a3665a599c2f1e341ee820d8fbdd102a4398a441bc","src/api/into_bits.rs":"82297f0697d67b5a015e904e7e6e7b2a7066ba825bc54b94b4ff3e22d7a1eefb","src/api/into_bits/arch_specific.rs":"1f925390b0ce7132587d95f2419c6e2ad3e1a9d17eb1d9c120a1c1c4bdf4277e","src/api/into_bits/macros.rs":"d762406de25aedff88d460dec7a80dc8e825a2a419d53218ce007efa6a1d3e04","src/api/into_bits/v128.rs":"ecdc5893664c71d7ab1ff3697c3fbe490d20d8748b9b76881d05e7625e40d74c","src/api/into_bits/v16.rs":"5459ec7dad1ad7bd30dc7e48374580b993abf23701d9c3cb22203fa0a9aabb6d","src/api/into_bits/v256.rs":"90ea351da0380ead1bf0f63b620afd40d01d638d09f7e7be31840bd2c1d9c663","src/api/into_bits/v32.rs":"ee1dc5a430050e16f51154b5fe85b1536f5feddf2ea23dd1d3859b67c4afc6fc","src/api/into_bits/v512.rs":"f72098ed1c9a23944f3d01abaf5e0f2d0e81d35a06fdadd2183e896d41b59867","src/api/into_bits/v64.rs":"6394462facdfe7827349c742b7801f1291e75a720dfb8c0b52100df46f371c98","src/api/math.rs":"8b2a2fc651917a850539f993aa0b9e5bf4da67b11685285b8de8cdca311719ec","src/api/math/float.rs":"61d2794d68262a1090ae473bd30793b5f65cf732f32a6694a3af2ce5d9225616","src/api/math/float/abs.rs":"5b6b2701e2e11135b7ce58a05052ea8120e10e4702c95d046b9d21b827b26bf8","src/api/math/float/consts.rs":"78acba000d3fa527111300b6327c1932de9c4c1e02d4174e1a5615c01463d38c","src/api/math/float/cos.rs":"4c2dd7173728ef189314f1576c9486e03be21b7da98843b2f9011282a7979e31","src/api/math/float/exp.rs":"7c6d5f1e304f498a01cfa23b92380c815d7da0ad94eae3483783bc377d287eef","src/api/math/float/ln.rs":"54c7583f3df793b39ff57534fade27b41bb992439e5dc178252f5ca3190a3e54","src/api/math/float/mul_add.rs":"62cac77660d20159276d4c9ef066eb90c81cbddb808e8e157182c607625ad2eb","src/api/math/float/mul_adde.rs":"bae056ee9f3a70df39ec3c3b2f6437c65303888a7b843ef1a5bcf1f5aca0e602","src/api/math/float/powf.rs":"9ddb938984b36d39d82a82f862f80df8f7fb013f1d222d45698d41d88472f568","src/api/math/float/recpre.rs":"589225794ff1dbf31158dff660e6d4509ecc8befbb57c633900dea5ac0b840d6","src/api/math/float/rsqrte.rs":"a32abdcc318d7ccc8448231f54d75b884b7cbeb03a7d595713ab6243036f4dbf","src/api/math/float/sin.rs":"cbd3622b7df74f19691743001c8cf747a201f8977ad90542fee915f37dcd1e49","src/api/math/float/sqrt.rs":"0c66d5d63fb08e4d99c6b82a8828e41173aff1ac9fa1a2764a11fac217ccf2ac","src/api/math/float/sqrte.rs":"731e1c9f321b662accdd27dacb3aac2e8043b7aecb2f2161dde733bd9f025362","src/api/minimal.rs":"1f22bcc528555444e76de569ec0ae2029b9ae9d04805efeafa93369c8098036b","src/api/minimal/iuf.rs":"c501a6696950cf5e521765f178de548af64fdfb6e10d026616d09fab93ca2d17","src/api/minimal/mask.rs":"42e415f536c5193d0218f5a754b34b87fd7c971bff068009f958712166ff056d","src/api/minimal/ptr.rs":"a9ee482d1dd1c956fb8f3f179e6e620b1de4e9d713961461d4c6923a4ef2e67c","src/api/ops.rs":"3e273b277a0f3019d42c3c59ca94a5afd4885d5ae6d2182e5089bbeec9de42ee","src/api/ops/scalar_arithmetic.rs":"d2d5ad897a59dd0787544f927e0e7ca4072c3e58b0f4a2324083312b0d5a21d7","src/api/ops/scalar_bitwise.rs":"482204e459ca6be79568e1c9f70adbe2d2151412ddf122fb2161be8ebb51c40c","src/api/ops/scalar_mask_bitwise.rs":"c250f52042e37b22d57256c80d4604104cfd2fbe2a2e127c676267270ca5d350","src/api/ops/scalar_shifts.rs":"987f8fdebeedc16e3d77c1b732e7826ef70633c541d16dfa290845d5c6289150","src/api/ops/vector_arithmetic.rs":"ddca15d09ddeef502c2ed66117a62300ca65d87e959e8b622d767bdf1c307910","src/api/ops/vector_bitwise.rs":"b3968f7005b649edcc22a54e2379b14d5ee19045f2e784029805781ae043b5ee","src/api/ops/vector_float_min_max.rs":"f5155dce75219f4ba11275b1f295d2fdcddd49d174a6f1fb2ace7ea42813ce41","src/api/ops/vector_int_min_max.rs":"a378789c6ff9b32a51fbd0a97ffd36ed102cd1fe6a067d2b02017c1df342def6","src/api/ops/vector_mask_bitwise.rs":"5052d18517d765415d40327e6e8e55a312daaca0a5e2aec959bfa54b1675f9c8","src/api/ops/vector_neg.rs":"5c62f6b0221983cdbd23cd0a3af3672e6ba1255f0dfe8b19aae6fbd6503e231b","src/api/ops/vector_rotates.rs":"03cbe8a400fd7c688e4ee771a990a6754f2031b1a59b19ae81158b21471167e5","src/api/ops/vector_shifts.rs":"9bf69d0087268f61009e39aea52e03a90f378910206b6a28e8393178b6a5d0e0","src/api/ptr.rs":"8a793251bed6130dcfb2f1519ceaa18b751bbb15875928d0fb6deb5a5e07523a","src/api/ptr/gather_scatter.rs":"9ddd960365e050674b25b2fd3116e24d94669b4375d74e71c03e3f1469576066","src/api/reductions.rs":"ae5baca81352ecd44526d6c30c0a1feeda475ec73ddd3c3ec6b14e944e5448ee","src/api/reductions/bitwise.rs":"8bf910ae226188bd15fc7e125f058cd2566b6186fcd0cd8fd020f352c39ce139","src/api/reductions/float_arithmetic.rs":"e58c8c87806a95df2b2b5b48ac5991036df024096d9d7c171a480fe9282896a4","src/api/reductions/integer_arithmetic.rs":"47471da1c5f859489680bb5d34ced3d3aa20081c16053a3af121a4496fcb57bf","src/api/reductions/mask.rs":"db83327a950e33a317f37fd33ca4e20c347fb415975ec024f3e23da8509425af","src/api/reductions/min_max.rs":"f27be3aa28e1c1f46de7890198db6e12f00c207085e89ef2de7e57ee443cdb98","src/api/select.rs":"a98e2ccf9fc6bdeed32d337c8675bc96c2fbe2cc34fbf149ad6047fb8e749774","src/api/shuffle.rs":"da58200790868c09659819322a489929a5b6e56c596ed07e6a44293ea02e7d09","src/api/shuffle1_dyn.rs":"bfea5a91905b31444e9ef7ca6eddb7a9606b7e22d3f71bb842eb2795a0346620","src/api/slice.rs":"ee87484e8af329547b9a5d4f2a69e8bed6ea10bbd96270d706083843d4eea2ac","src/api/slice/from_slice.rs":"4d4fe8a329c885fcb4fbcbedf99efb15a95296fe6b3f595056cc37037450d5ac","src/api/slice/write_to_slice.rs":"f5b23b2c4b91cfb26b713a9013a6c0da7f45eaefb79ba06dcbc27f3f23bda679","src/api/swap_bytes.rs":"4a6792a2e49a77475e1b237592b4b2804dbddb79c474331acd0dd71b36934259","src/codegen.rs":"c6eebc3d3665420aa6a2f317977e3c41a4f43e0550ac630cdbe8e4bbed5e2031","src/codegen/bit_manip.rs":"5559e095105a80003e0de35af1d19b0c65c9ab04eb743c7e01c5442d882eb34e","src/codegen/llvm.rs":"d1299c189abb17a6133f047574cffc7a6db4c1be37cb7d4785491cb5e8f8cf54","src/codegen/math.rs":"35f96e37a78fcf0cdb02146b7f27a45108fe06a37fc2a54d8851ce131a326178","src/codegen/math/float.rs":"dd86c0449e576c83b719700962ac017c332987fac08d91f2b7a2b1b883598170","src/codegen/math/float/abs.rs":"f56e2b4b8055ea861c1f5cbc6b6e1d8e7e5af163b62c13574ddee4e09513bfbc","src/codegen/math/float/cos.rs":"ef3b511a24d23045b310315e80348a9b7fedb576fc2de52d74290616a0abeb2a","src/codegen/math/float/cos_pi.rs":"4e7631a5d73dac21531e09ef1802d1180f8997509c2c8fa9f67f322194263a97","src/codegen/math/float/exp.rs":"61b691598c41b5622f24e4320c1bdd08701e612a516438bdddcc728fc3405c8c","src/codegen/math/float/ln.rs":"46b718b1ba8c9d99e1ad40f53d20dfde08a3063ca7bd2a9fdd6698e060da687e","src/codegen/math/float/macros.rs":"dd42135fff13f9aca4fd3a1a4e14c7e6c31aadc6d817d63b0d2fb9e62e062744","src/codegen/math/float/mul_add.rs":"a37bf764345d4b1714f97e83897b7cf0855fc2811704bcbc0012db91825339e1","src/codegen/math/float/mul_adde.rs":"c75702bfcb361de45964a93caf959a695ef2376bd069227600b8c6872665c755","src/codegen/math/float/powf.rs":"642346e982bc4c39203de0864d2149c4179cd7b21cf67a2951687932b4675872","src/codegen/math/float/sin.rs":"9d68164c90cdca6a85155040cdac42e27342ebe0b925273ef1593df721af4258","src/codegen/math/float/sin_cos_pi.rs":"9be02ad48585a1e8d99129382fbffbaed47852f15459256a708850b6b7a75405","src/codegen/math/float/sin_pi.rs":"9890347905b4d4a3c7341c3eb06406e46e60582bcf6960688bd727e5dadc6c57","src/codegen/math/float/sqrt.rs":"e3c60dcfb0c6d2fc62adabcc931b2d4040b83cab294dea36443fb4b89eb79e34","src/codegen/math/float/sqrte.rs":"f0f4ef9eb475ae41bcc7ec6a95ad744ba6b36925faa8b2c2814004396d196b63","src/codegen/pointer_sized_int.rs":"a70697169c28218b56fd2e8d5353f2e00671d1150d0c8cef77d613bdfacd84cb","src/codegen/reductions.rs":"645e2514746d01387ddd07f0aa4ffd8430cc9ab428d4fb13773ea319fa25dd95","src/codegen/reductions/mask.rs":"8f1afe6aabf096a3278e1fc3a30f736e04aa8b9ce96373cee22162d18cfe2702","src/codegen/reductions/mask/aarch64.rs":"cba6e17603d39795dcfe8339b6b7d8714c3e162a1f0a635979f037aa24fe4206","src/codegen/reductions/mask/arm.rs":"9447904818aa2c7c25d0963eead452a639a11ca7dbd6d21eedbfcaade07a0f33","src/codegen/reductions/mask/fallback.rs":"7a0ef9f7fd03ae318b495b95e121350cd61caffc5cc6ee17fabf130d5d933453","src/codegen/reductions/mask/fallback_impl.rs":"76547f396e55ef403327c77c314cf8db8c7a5c9b9819bfb925abeacf130249e5","src/codegen/reductions/mask/x86.rs":"14bd2c482071f2355beebcf7b7ecf950ff2dfcdb08c3ca50993092434a9de717","src/codegen/reductions/mask/x86/avx.rs":"b4913d87844c522903641cbbf10db4551addb1ce5e9e78278e21612fa65c733b","src/codegen/reductions/mask/x86/avx2.rs":"677aed3f056285285daa3adff8bc65e739630b4424defa6d9665e160f027507e","src/codegen/reductions/mask/x86/sse.rs":"226610b4ff88c676d5187114dd57b4a8800de6ce40884675e9198445b1ed0306","src/codegen/reductions/mask/x86/sse2.rs":"bc38e6c31cb4b3d62147eba6cac264e519e2a48e0f7ce9010cfa9ef0cf0ec9fd","src/codegen/shuffle.rs":"0abca97e92cdce49a58a39cc447eb09dc7d7715ef256c8dbd2181a186e61bb64","src/codegen/shuffle1_dyn.rs":"04523e9338133bdedb012dd076c2c564b79ce5593b0fc56d0fb6910e04190a81","src/codegen/swap_bytes.rs":"1d6cdc716eadddc92b4fd506b2445a821caa8dc00860447de09d7ebd69c2087f","src/codegen/v128.rs":"94226b31ec403d18d9d2fe06713f147c9c79e9b5f9105089088266313f843185","src/codegen/v16.rs":"ddec4ffb66b6f7aaffb9a1780c5ddba82557abd74f45073d335047e04cf74924","src/codegen/v256.rs":"6b63917f0444118d6b1595bff2045e59b97c4d24012bd575f69f1f0efc5a0241","src/codegen/v32.rs":"3477b3c5540aed86e61e2f5807dd31db947413cec9181c587d93ed6ec74f0eba","src/codegen/v512.rs":"5854f99d3aabc4cd42b28a20d9ce447756dc2ba024a409a69b6a8ae1f1842fc5","src/codegen/v64.rs":"e9e89caebfe63d10c0cbca61e4dfdba3b7e02ee0989170f80beed23237ddd950","src/codegen/vPtr.rs":"96d609a9eece4dcbbcc01ba0b8744d7f5958be12774176a2945bc676f4e6b5cb","src/codegen/vSize.rs":"eeee9858749aa82142b27bc120d1989bb74a6b82e1e4efbbeaccc9634dc9acfc","src/lib.rs":"1b5d419ff05ee0370d671810423ccc254708cc8d415c1dbac2a7a36be4bf63a8","src/masks.rs":"870f429967b2d7d5133f4d28d6c753fc5cef0570b27b29d4e966a066d22d2d0e","src/sealed.rs":"ff7f0324276408ae8249941cfa32c90b8835a54d750896b683efea857af19db2","src/testing.rs":"1d3a7862ef625e235a5734ad7204e68d350f902c0695182b1f08a0552432416e","src/testing/macros.rs":"6378856d7a40ba5ec5c7c0dad6327d79f0c77266921c24296d10aed6c68e9b98","src/testing/utils.rs":"d6fd5a5017f1f85d9d99585754f8f6ad06fc3d683b34083543e67a7cc6c1772c","src/v128.rs":"18fe263c4aa28cd06461c7070b0269f69f4a2e75749b8f142a83dfdfe4d22bf5","src/v16.rs":"e5c663c9fb3547eaeac78a5f7db9969f4d8b5ec96112bf2954602fff11f0aebd","src/v256.rs":"68732cd688ad12a56d8b4f8ddf279f77bdfe1be2943c7dc0c1b4f1a76798aa0f","src/v32.rs":"785b22a1ccb4a41bb53dfeb0670f624c0ce42e6cdf62d1747e3283777a1c70bd","src/v512.rs":"d1337bfe07f06a8f37f8e8fa7d4315b9307476ee435ad80dd5269eaed564fbfa","src/v64.rs":"3077468d65125b8f085e9454c8b2463a4d5225697464ba6a1300f8799528fd4b","src/vPtr.rs":"c9a53f41f466e17b6648a4ce390fd8f4d3a848d440eb8a9a803a11608d76eb05","src/vSize.rs":"5c46d3e8c3ee5863d9b6e37e681f871386e0efc254d6d84ba711edb529ce7b3c","tests/endianness.rs":"541a144be017e3dd7da7c8ea49d907dc02538245e8c5f3deb5bd43da92c929e1"},"package":null} \ No newline at end of file diff --git a/third_party/rust/packed_simd/.travis.yml b/third_party/rust/packed_simd/.travis.yml new file mode 100644 index 000000000000..8d8ed54ab737 --- /dev/null +++ b/third_party/rust/packed_simd/.travis.yml @@ -0,0 +1,308 @@ +language: rust +sudo: false +rust: nightly + +stages: + - tools + - linux-tier1 + - osx-tier1 + - osx-tier2 + - linux-tier2 + - android + +matrix: + fast_finish: true + include: + # Android: + - env: TARGET=x86_64-linux-android NOVERIFY=1 + name: "x86_64-unknown-linux-android + SSE2" + stage: android + - env: TARGET=arm-linux-androideabi + name: "arm-linux-androideabi" + stage: android + - env: TARGET=arm-linux-androideabi RUSTFLAGS="-C target-feature=+v7,+neon" + name: "arm-linux-androideabi + NEON" + stage: android + - env: TARGET=aarch64-linux-android + name: "aarch64-unknown-linux-android" + stage: android + - env: TARGET=aarch64-linux-android RUSTFLAGS="-C target-feature=+neon" + name: "aarch64-unknown-linux-android + NEON" + stage: android + - env: TARGET="thumbv7neon-linux-androideabi" + name: "thumbv7neon-linux-androideabi" + stage: android + # Linux: + - env: TARGET=i586-unknown-linux-gnu + name: "i586-unknown-linux-gnu" + stage: linux-tier2 + - env: TARGET=i586-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse" + name: "i586-unknown-linux-gnu + SSE" + stage: linux-tier2 + - env: TARGET=i586-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse2" + name: "i586-unknown-linux-gnu + SSE2" + stage: linux-tier2 + - env: TARGET=i686-unknown-linux-gnu + name: "i686-unknown-linux-gnu + SSE2" + stage: linux-tier1 + - env: TARGET=i686-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse4.2" + name: "i686-unknown-linux-gnu + SSE4.2" + stage: linux-tier1 + - env: TARGET=i686-unknown-linux-gnu RUSTFLAGS="-C target-feature=+avx2" + name: "i686-unknown-linux-gnu + AVX2" + stage: linux-tier1 + - env: TARGET=x86_64-unknown-linux-gnu + name: "x86_64-unknown-linux-gnu + SSE2" + install: rustup component add rustfmt-preview + stage: linux-tier1 + - env: TARGET=x86_64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse4.2" + name: "x86_64-unknown-linux-gnu + SSE4.2" + install: rustup component add rustfmt-preview + stage: linux-tier1 + - env: TARGET=x86_64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+avx" + name: "x86_64-unknown-linux-gnu + AVX" + install: rustup component add rustfmt-preview + stage: linux-tier1 + - env: TARGET=x86_64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+avx2" + name: "x86_64-unknown-linux-gnu + AVX2" + install: rustup component add rustfmt-preview + stage: linux-tier1 + - env: TARGET=x86_64-unknown-linux-gnu-emulated + name: "Intel SDE + SSE2" + install: true + stage: linux-tier1 + - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+sse4.2" + name: "Intel SDE + SSE4.2" + install: true + stage: linux-tier1 + - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+avx" + name: "Intel SDE + AVX" + install: true + stage: linux-tier1 + - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+avx2" + name: "Intel SDE + AVX2" + install: true + stage: linux-tier1 + - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+avx-512f" + name: "Intel SDE + AVX-512" + install: true + stage: linux-tier1 + - env: TARGET=arm-unknown-linux-gnueabi + name: "arm-unknown-linux-gnueabi" + stage: linux-tier2 + - env: TARGET=arm-unknown-linux-gnueabi RUSTFLAGS="-C target-feature=+v7,+neon" + name: "arm-unknown-linux-gnueabi + NEON" + stage: linux-tier2 + - env: TARGET=arm-unknown-linux-gnueabihf + name: "arm-unknown-linux-gnueabihf" + stage: linux-tier2 + - env: TARGET=arm-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+v7,+neon" + name: "arm-unknown-linux-gnueabihf + NEON" + stage: linux-tier2 + - env: TARGET=armv7-unknown-linux-gnueabihf + name: "armv7-unknown-linux-gnueabihf" + stage: linux-tier2 + - env: TARGET=armv7-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+neon" + name: "armv7-unknown-linux-gnueabihf + NEON" + stage: linux-tier2 + - env: TARGET="thumbv7neon-unknown-linux-gnueabihf" + name: "thumbv7neon-unknown-linux-gnueabihf" + stage: linux-tier2 + - env: TARGET=aarch64-unknown-linux-gnu + name: "aarch64-unknown-linux-gnu" + stage: linux-tier2 + - env: TARGET=aarch64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+neon" + name: "aarch64-unknown-linux-gnu + NEON" + stage: linux-tier2 + - env: TARGET=mips-unknown-linux-gnu + name: "mips-unknown-linux-gnu" + stage: linux-tier2 + - env: TARGET=mipsel-unknown-linux-musl + name: "mipsel-unknown-linux-musl" + stage: linux-tier2 + - env: TARGET=mips64-unknown-linux-gnuabi64 + name: "mips64-unknown-linux-gnuabi64" + stage: linux-tier2 + - env: TARGET=mips64el-unknown-linux-gnuabi64 + name: "mips64el-unknown-linux-gnuabi64" + stage: linux-tier2 + # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/18 + # env: TARGET=mips64el-unknown-linux-gnuabi64 RUSTFLAGS="-C target-feature=+msa -C target-cpu=mips64r6" + - env: TARGET=powerpc-unknown-linux-gnu + name: "powerpc-unknown-linux-gnu" + stage: linux-tier2 + - env: TARGET=powerpc64-unknown-linux-gnu + name: "powerpc64-unknown-linux-gnu" + stage: linux-tier2 + - env: TARGET=powerpc64le-unknown-linux-gnu + name: "powerpc64le-unknown-linux-gnu" + stage: linux-tier2 + - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+altivec" + name: "powerpc64le-unknown-linux-gnu + ALTIVEC" + stage: linux-tier2 + - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+vsx" + name: "powerpc64le-unknown-linux-gnu + VSX" + stage: linux-tier2 + - env: TARGET=s390x-unknown-linux-gnu + name: "s390x-unknown-linux-gnu" + stage: linux-tier2 + - env: TARGET=sparc64-unknown-linux-gnu + name: "sparc64-unknown-linux-gnu" + stage: linux-tier2 + # WebAssembly: + - env: TARGET=wasm32-unknown-unknown + name: "wasm32-unknown-unknown" + stage: osx-tier1 # For now + # MacOSX: + - os: osx + env: TARGET=i686-apple-darwin + name: "i686-apple-darwin + SSE2" + script: ci/run.sh + osx_image: xcode10 + stage: osx-tier1 + - os: osx + env: TARGET=i686-apple-darwin RUSTFLAGS="-C target-feature=+sse4.2" + name: "i686-apple-darwin + SSE4.2" + script: ci/run.sh + osx_image: xcode10 + stage: osx-tier1 + # Travis-CI OSX build bots do not support AVX2: + - os: osx + env: TARGET=i686-apple-darwin RUSTFLAGS="-C target-feature=+avx" + name: "i686-apple-darwin + AVX" + script: ci/run.sh + osx_image: xcode10 + stage: osx-tier1 + - os: osx + env: TARGET=x86_64-apple-darwin + name: "x86_64-apple-darwin + SSE2" + install: true + script: ci/run.sh + osx_image: xcode10 + stage: osx-tier1 + - os: osx + env: TARGET=x86_64-apple-darwin RUSTFLAGS="-C target-feature=+sse4.2" + name: "x86_64-apple-darwin + SSE4.2" + install: true + script: ci/run.sh + osx_image: xcode10 + stage: osx-tier1 + # Travis-CI OSX build bots do not support AVX2: + - os: osx + env: TARGET=x86_64-apple-darwin RUSTFLAGS="-C target-feature=+avx" + name: "x86_64-apple-darwin + AVX" + install: true + script: ci/run.sh + osx_image: xcode10 + stage: osx-tier1 + # *BSDs: + #- env: TARGET=i686-unknown-freebsd NORUN=1 + # script: ci/run.sh + #- env: TARGET=x86_64-unknown-freebsd NORUN=1 + # script: ci/run.sh + #- env: TARGET=x86_64-unknown-netbsd NORUN=1 + # script: ci/run.sh + # Solaris: + #- env: TARGET=x86_64-sun-solaris NORUN=1 + # script: ci/run.sh + # iOS: + - os: osx + env: TARGET=i386-apple-ios + name: "i386-apple-ios" + script: ci/run.sh + osx_image: xcode9.4 + stage: osx-tier2 + - os: osx + env: TARGET=x86_64-apple-ios + name: "x86_64-apple-ios + SSE2" + script: ci/run.sh + osx_image: xcode9.4 + stage: osx-tier2 + - os: osx + env: TARGET=armv7-apple-ios NORUN=1 + name: "armv7-apple-ios [Build only]" + script: ci/run.sh + osx_image: xcode9.4 + stage: osx-tier2 + - os: osx + env: TARGET=aarch64-apple-ios NORUN=1 + name: "aarch64-apple-ios [Build only]" + script: ci/run.sh + osx_image: xcode9.4 + stage: osx-tier2 + # BENCHMARKS: + - name: "Benchmarks - x86_64-unknown-linux-gnu" + install: TARGET=x86_64-unknown-linux-gnu ./ci/setup_benchmarks.sh + script: PATH=$(pwd):$PATH NORUN=1 VERIFY=1 FEATURES=core_arch,ispc,sleef-sys ci/benchmark.sh + stage: tools + - name: "Benchmarks - x86_64-apple-darwin" + install: TARGET=x86_64-apple-darwin ./ci/setup_benchmarks.sh + script: PATH=$(pwd):$PATH NORUN=1 VERIFY=1 FEATURES=core_arch,ispc,sleef-sys ci/benchmark.sh + os: osx + osx_image: xcode9.4 + stage: tools + # TOOLS: + - name: "Documentation" + install: cargo install mdbook + script: ci/dox.sh + stage: tools + - name: "rustfmt" + install: true + before_script: rustup component add rustfmt-preview + script: ci/all.sh check_fmt || true + stage: tools + - name: "clippy" + install: true + before_script: rustup component add clippy-preview + script: ci/all.sh clippy + stage: tools + + allow_failures: + # FIXME: ISPC cannot be found? + - name: "Benchmarks - x86_64-apple-darwin" + # FIXME: TBD + - env: TARGET=powerpc-unknown-linux-gnu + - env: TARGET=powerpc64-unknown-linux-gnu + - env: TARGET=powerpc64le-unknown-linux-gnu + - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+altivec" + - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+vsx" + #- env: TARGET=i686-unknown-freebsd NORUN=1 + #- env: TARGET=x86_64-unknown-freebsd NORUN=1 + #- env: TARGET=x86_64-unknown-netbsd NORUN=1 + #- env: TARGET=x86_64-sun-solaris NORUN=1 + + # FIXME: TBD + - env: TARGET=arm-linux-androideabi + - env: TARGET=arm-linux-androideabi RUSTFLAGS="-C target-feature=+v7,+neon" + - env: TARGET=aarch64-linux-android + - env: TARGET=aarch64-linux-android RUSTFLAGS="-C target-feature=+neon" + + # FIXME: iOS + # https://github.com/rust-lang-nursery/packed_simd/issues/26 + - env: TARGET=i386-apple-ios + - env: TARGET=x86_64-apple-ios + + # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/182 + - env: TARGET=arm-unknown-linux-gnueabi RUSTFLAGS="-C target-feature=+v7,+neon" + - env: TARGET=arm-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+v7,+neon" + - env: TARGET=armv7-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+neon" + + # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/183 + - env: TARGET=wasm32-unknown-unknown + +install: travis_retry rustup target add $TARGET +before_script: cargo generate-lockfile +script: travis_wait 50 ci/run-docker.sh +after_script: sleep 5 + +env: + global: + secure: "lPHv7s6+AxQYNaFncycVFQt++Y1asQmMhOikQU1ztlP8CK7+hn2m98cg/euOJyzIOb2iJ3ZX4cGZkzw4lc59MQBByb1GtDbazQoUOzVDbVfe9BDD2f8JVoIFh1CMfjPKQ7Gg/rJqWlwrUlSd5GNxPCutKjY7qZhJuR6SQbJjlWaGN2Vd4fVCzKXz8fHRXgMEZS+d+CR4Nsrkb83J3Z4s5kSdJmhYxJ61AWjuzJVwUh4l3/HEYlSL5XXpuh5R2i7W16h1PlNdaTUgkZli1lHzO8+6Q8LzX9+XiLIEVX9lw3A2NdIKGz8E/+7Qs5oYOkwYhjROsDQxIK7xkSM30bQuN7cwMBybAVIyOPJkqXQ1dQyp83KSdsOj7JMyDDRvcEDLI6ehRlm5EcdH7YrReuboN81iUo0Sa7VsuUmgj5hjERCt9r30f9aWuitABai7vKRtjglg7Sp5CrEVPA4PQs6PqKCCRogoggbXJ/Z5Dyw/RZaXPeNR9+qIKN1Vjm9Gew1sRN2JK/3+vXTKtyJXH/uBxgJt4jQlbuShOJuF+BSfTF88sMe67a/357SSOIb4JkaCyd0flDCWYE8576kaHPlVVMT2peXee0LeRXm1e13nG3Na0t3LS/orJLPHOShNQGoDj7qAP5aEKggRya896JGwtvlaBHHTmSQh65G7cyNErZo=" +branches: + only: + - staging # bors r+ + - trying # bors try + - master +notifications: + email: + on_success: never diff --git a/third_party/rust/packed_simd/Cargo.toml b/third_party/rust/packed_simd/Cargo.toml new file mode 100644 index 000000000000..3db9354c9407 --- /dev/null +++ b/third_party/rust/packed_simd/Cargo.toml @@ -0,0 +1,42 @@ +[package] +name = "packed_simd" +version = "0.3.3" +authors = ["Gonzalo Brito Gadeschi "] +description = "Portable Packed SIMD vectors" +documentation = "https://docs.rs/crate/packed_simd/" +homepage = "https://github.com/rust-lang-nursery/packed_simd" +repository = "https://github.com/rust-lang-nursery/packed_simd" +keywords = ["simd", "vector", "portability"] +categories = ["hardware-support", "concurrency", "no-std", "data-structures"] +license = "MIT/Apache-2.0" +build = "build.rs" +edition = "2018" + +[badges] +appveyor = { repository = "rust-lang-nursery/packed_simd" } +travis-ci = { repository = "rust-lang-nursery/packed_simd" } +codecov = { repository = "rust-lang-nursery/packed_simd" } +is-it-maintained-issue-resolution = { repository = "rust-lang-nursery/packed_simd" } +is-it-maintained-open-issues = { repository = "rust-lang-nursery/packed_simd" } +maintenance = { status = "experimental" } + +[dependencies] +cfg-if = "^0.1.6" +core_arch = { version = "^0.1.3", optional = true } + +[features] +default = [] +into_bits = [] +libcore_neon = [] + +[dev-dependencies] +paste = "^0.1.3" +arrayvec = { version = "^0.4", default-features = false } + +[target.'cfg(target_arch = "x86_64")'.dependencies.sleef-sys] +version = "^0.1.2" +optional = true + +[target.wasm32-unknown-unknown.dev-dependencies] +wasm-bindgen = "=0.2.19" +wasm-bindgen-test = "=0.2.19" \ No newline at end of file diff --git a/third_party/rust/simd/LICENSE-APACHE b/third_party/rust/packed_simd/LICENSE-APACHE similarity index 100% rename from third_party/rust/simd/LICENSE-APACHE rename to third_party/rust/packed_simd/LICENSE-APACHE diff --git a/third_party/rust/simd/LICENSE-MIT b/third_party/rust/packed_simd/LICENSE-MIT similarity index 93% rename from third_party/rust/simd/LICENSE-MIT rename to third_party/rust/packed_simd/LICENSE-MIT index bf6c304f7774..39d4bdb5acd3 100644 --- a/third_party/rust/simd/LICENSE-MIT +++ b/third_party/rust/packed_simd/LICENSE-MIT @@ -1,25 +1,25 @@ -Copyright (c) 2014 Huon Wilson +Copyright (c) 2014 The Rust Project Developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. \ No newline at end of file +DEALINGS IN THE SOFTWARE. diff --git a/third_party/rust/packed_simd/bors.toml b/third_party/rust/packed_simd/bors.toml new file mode 100644 index 000000000000..6d302dc85cf6 --- /dev/null +++ b/third_party/rust/packed_simd/bors.toml @@ -0,0 +1,3 @@ +status = [ + "continuous-integration/travis-ci/push" +] \ No newline at end of file diff --git a/third_party/rust/packed_simd/build.rs b/third_party/rust/packed_simd/build.rs new file mode 100644 index 000000000000..85639ff9d085 --- /dev/null +++ b/third_party/rust/packed_simd/build.rs @@ -0,0 +1,8 @@ +fn main() { + println!("cargo:rustc-env=RUSTC_BOOTSTRAP=1"); + let target = std::env::var("TARGET") + .expect("TARGET environment variable not defined"); + if target.contains("neon") { + println!("cargo:rustc-cfg=libcore_neon"); + } +} diff --git a/third_party/rust/packed_simd/ci/all.sh b/third_party/rust/packed_simd/ci/all.sh new file mode 100644 index 000000000000..273562d4a9bb --- /dev/null +++ b/third_party/rust/packed_simd/ci/all.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +# +# Performs an operation on all targets + +set -ex + +: "${1?The all.sh script requires one argument.}" + +op=$1 + +cargo_clean() { + cargo clean +} + +cargo_check_fmt() { + cargo fmt --all -- --check +} + +cargo_fmt() { + cargo fmt --all +} + +cargo_clippy() { + cargo clippy --all -- -D clippy::pedantic +} + +CMD="-1" + +case $op in + clean*) + CMD=cargo_clean + ;; + check_fmt*) + CMD=cargo_check_fmt + ;; + fmt*) + CMD=cargo_fmt + ;; + clippy) + CMD=cargo_clippy + ;; + *) + echo "Unknown operation: \"${op}\"" + exit 1 + ;; +esac + +echo "Operation is: ${CMD}" + +# On src/ +$CMD + +# Check examples/ +for dir in examples/*/ +do + dir=${dir%*/} + ( + cd "${dir%*/}" + $CMD + ) +done + +( + cd verify/verify + $CMD +) + +( + cd micro_benchmarks + $CMD +) diff --git a/third_party/rust/packed_simd/ci/android-install-ndk.sh b/third_party/rust/packed_simd/ci/android-install-ndk.sh new file mode 100644 index 000000000000..818e78446ae8 --- /dev/null +++ b/third_party/rust/packed_simd/ci/android-install-ndk.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env sh +# Copyright 2016 The Rust Project Developers. See the COPYRIGHT +# file at the top-level directory of this distribution and at +# http://rust-lang.org/COPYRIGHT. +# +# Licensed under the Apache License, Version 2.0 or the MIT license +# , at your +# option. This file may not be copied, modified, or distributed +# except according to those terms. + +set -ex + +curl --retry 5 -O https://dl.google.com/android/repository/android-ndk-r15b-linux-x86_64.zip +unzip -q android-ndk-r15b-linux-x86_64.zip + +case "$1" in + aarch64) + arch=arm64 + ;; + + i686) + arch=x86 + ;; + + *) + arch=$1 + ;; +esac; + +android-ndk-r15b/build/tools/make_standalone_toolchain.py \ + --unified-headers \ + --install-dir "/android/ndk-${1}" \ + --arch "${arch}" \ + --api 24 + +rm -rf ./android-ndk-r15b-linux-x86_64.zip ./android-ndk-r15b diff --git a/third_party/rust/packed_simd/ci/android-install-sdk.sh b/third_party/rust/packed_simd/ci/android-install-sdk.sh new file mode 100644 index 000000000000..6b5ac09ab04a --- /dev/null +++ b/third_party/rust/packed_simd/ci/android-install-sdk.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env sh +# Copyright 2016 The Rust Project Developers. See the COPYRIGHT +# file at the top-level directory of this distribution and at +# http://rust-lang.org/COPYRIGHT. +# +# Licensed under the Apache License, Version 2.0 or the MIT license +# , at your +# option. This file may not be copied, modified, or distributed +# except according to those terms. + +set -ex + +# Prep the SDK and emulator +# +# Note that the update process requires that we accept a bunch of licenses, and +# we can't just pipe `yes` into it for some reason, so we take the same strategy +# located in https://github.com/appunite/docker by just wrapping it in a script +# which apparently magically accepts the licenses. + +mkdir sdk +curl --retry 5 https://dl.google.com/android/repository/sdk-tools-linux-3859397.zip -O +unzip -d sdk sdk-tools-linux-3859397.zip + +case "$1" in + arm | armv7) + abi=armeabi-v7a + ;; + + aarch64) + abi=arm64-v8a + ;; + + i686) + abi=x86 + ;; + + x86_64) + abi=x86_64 + ;; + + *) + echo "invalid arch: $1" + exit 1 + ;; +esac; + +# --no_https avoids + # javax.net.ssl.SSLHandshakeException: sun.security.validator.ValidatorException: No trusted certificate found +yes | ./sdk/tools/bin/sdkmanager --licenses --no_https +yes | ./sdk/tools/bin/sdkmanager --no_https \ + "emulator" \ + "platform-tools" \ + "platforms;android-24" \ + "system-images;android-24;default;$abi" + +echo "no" | + ./sdk/tools/bin/avdmanager create avd \ + --name "${1}" \ + --package "system-images;android-24;default;$abi" diff --git a/third_party/rust/packed_simd/ci/android-sysimage.sh b/third_party/rust/packed_simd/ci/android-sysimage.sh new file mode 100644 index 000000000000..9eabd7c8d94f --- /dev/null +++ b/third_party/rust/packed_simd/ci/android-sysimage.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash + +# Copyright 2017 The Rust Project Developers. See the COPYRIGHT +# file at the top-level directory of this distribution and at +# http://rust-lang.org/COPYRIGHT. +# +# Licensed under the Apache License, Version 2.0 or the MIT license +# , at your +# option. This file may not be copied, modified, or distributed +# except according to those terms. + +set -ex + +URL=https://dl.google.com/android/repository/sys-img/android + +main() { + local arch="${1}" + local name="${2}" + local dest=/system + local td + td="$(mktemp -d)" + + apt-get install --no-install-recommends e2tools + + pushd "${td}" + curl --retry 5 -O "${URL}/${name}" + unzip -q "${name}" + + local system + system="$(find . -name system.img)" + mkdir -p ${dest}/{bin,lib,lib64} + + # Extract android linker and libraries to /system + # This allows android executables to be run directly (or with qemu) + if [ "${arch}" = "x86_64" ] || [ "${arch}" = "arm64" ]; then + e2cp -p "${system}:/bin/linker64" "${dest}/bin/" + e2cp -p "${system}:/lib64/libdl.so" "${dest}/lib64/" + e2cp -p "${system}:/lib64/libc.so" "${dest}/lib64/" + e2cp -p "${system}:/lib64/libm.so" "${dest}/lib64/" + else + e2cp -p "${system}:/bin/linker" "${dest}/bin/" + e2cp -p "${system}:/lib/libdl.so" "${dest}/lib/" + e2cp -p "${system}:/lib/libc.so" "${dest}/lib/" + e2cp -p "${system}:/lib/libm.so" "${dest}/lib/" + fi + + # clean up + apt-get purge --auto-remove -y e2tools + + popd + + rm -rf "${td}" +} + +main "${@}" diff --git a/third_party/rust/packed_simd/ci/benchmark.sh b/third_party/rust/packed_simd/ci/benchmark.sh new file mode 100644 index 000000000000..3635b9e371d1 --- /dev/null +++ b/third_party/rust/packed_simd/ci/benchmark.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# +# Runs all benchmarks. Controlled by the following environment variables: +# +# FEATURES={} - cargo features to pass to all benchmarks (e.g. core_arch,sleef-sys,ispc) +# NORUN={1} - only builds the benchmarks + +set -ex + +if [[ ${NORUN} != 1 ]]; then + # Most benchmarks require hyperfine; require it upfront. + hash hyperfine 2>/dev/null || { echo >&2 "hyperfine is not in PATH."; exit 1; } +fi + + +# If the ispc benchmark feature is enabled, ispc must be in the path of the +# benchmarks. +if echo "$FEATURES" | grep -q "ispc"; then + hash ispc 2>/dev/null || { echo >&2 "ispc is not in PATH."; exit 1; } +fi + +# An example with a benchmark.sh is a benchmark: +for dir in examples/*/ +do + dir=${dir%*/} + cd ${dir%*/} + if [ -f "benchmark.sh" ]; then + ./benchmark.sh + fi + cd - +done + diff --git a/third_party/rust/packed_simd/ci/deploy_and_run_on_ios_simulator.rs b/third_party/rust/packed_simd/ci/deploy_and_run_on_ios_simulator.rs new file mode 100644 index 000000000000..c0fe52c35659 --- /dev/null +++ b/third_party/rust/packed_simd/ci/deploy_and_run_on_ios_simulator.rs @@ -0,0 +1,176 @@ +// Copyright 2017 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// This is a script to deploy and execute a binary on an iOS simulator. +// The primary use of this is to be able to run unit tests on the simulator and +// retrieve the results. +// +// To do this through Cargo instead, use Dinghy +// (https://github.com/snipsco/dinghy): cargo dinghy install, then cargo dinghy +// test. + +use std::env; +use std::fs::{self, File}; +use std::io::Write; +use std::path::Path; +use std::process; +use std::process::Command; + +macro_rules! t { + ($e:expr) => (match $e { + Ok(e) => e, + Err(e) => panic!("{} failed with: {}", stringify!($e), e), + }) +} + +// Step one: Wrap as an app +fn package_as_simulator_app(crate_name: &str, test_binary_path: &Path) { + println!("Packaging simulator app"); + drop(fs::remove_dir_all("ios_simulator_app")); + t!(fs::create_dir("ios_simulator_app")); + t!(fs::copy(test_binary_path, + Path::new("ios_simulator_app").join(crate_name))); + + let mut f = t!(File::create("ios_simulator_app/Info.plist")); + t!(f.write_all(format!(r#" + + + + + CFBundleExecutable + {} + CFBundleIdentifier + com.rust.unittests + + + "#, crate_name).as_bytes())); +} + +// Step two: Start the iOS simulator +fn start_simulator() { + println!("Looking for iOS simulator"); + let output = t!(Command::new("xcrun").arg("simctl").arg("list").output()); + assert!(output.status.success()); + let mut simulator_exists = false; + let mut simulator_booted = false; + let mut found_rust_sim = false; + let stdout = t!(String::from_utf8(output.stdout)); + for line in stdout.lines() { + if line.contains("rust_ios") { + if found_rust_sim { + panic!("Duplicate rust_ios simulators found. Please \ + double-check xcrun simctl list."); + } + simulator_exists = true; + simulator_booted = line.contains("(Booted)"); + found_rust_sim = true; + } + } + + if simulator_exists == false { + println!("Creating iOS simulator"); + Command::new("xcrun") + .arg("simctl") + .arg("create") + .arg("rust_ios") + .arg("com.apple.CoreSimulator.SimDeviceType.iPhone-SE") + .arg("com.apple.CoreSimulator.SimRuntime.iOS-10-2") + .check_status(); + } else if simulator_booted == true { + println!("Shutting down already-booted simulator"); + Command::new("xcrun") + .arg("simctl") + .arg("shutdown") + .arg("rust_ios") + .check_status(); + } + + println!("Starting iOS simulator"); + // We can't uninstall the app (if present) as that will hang if the + // simulator isn't completely booted; just erase the simulator instead. + Command::new("xcrun").arg("simctl").arg("erase").arg("rust_ios").check_status(); + Command::new("xcrun").arg("simctl").arg("boot").arg("rust_ios").check_status(); +} + +// Step three: Install the app +fn install_app_to_simulator() { + println!("Installing app to simulator"); + Command::new("xcrun") + .arg("simctl") + .arg("install") + .arg("booted") + .arg("ios_simulator_app/") + .check_status(); +} + +// Step four: Run the app +fn run_app_on_simulator() { + println!("Running app"); + let output = t!(Command::new("xcrun") + .arg("simctl") + .arg("launch") + .arg("--console") + .arg("booted") + .arg("com.rust.unittests") + .output()); + + println!("stdout --\n{}\n", String::from_utf8_lossy(&output.stdout)); + println!("stderr --\n{}\n", String::from_utf8_lossy(&output.stderr)); + + let stdout = String::from_utf8_lossy(&output.stdout); + let failed = stdout.lines() + .find(|l| l.contains("FAILED")) + .map(|l| l.contains("FAILED")) + .unwrap_or(false); + + let passed = stdout.lines() + .find(|l| l.contains("test result: ok")) + .map(|l| l.contains("test result: ok")) + .unwrap_or(false); + + println!("Shutting down simulator"); + Command::new("xcrun") + .arg("simctl") + .arg("shutdown") + .arg("rust_ios") + .check_status(); + if !(passed && !failed) { + panic!("tests didn't pass"); + } +} + +trait CheckStatus { + fn check_status(&mut self); +} + +impl CheckStatus for Command { + fn check_status(&mut self) { + println!("\trunning: {:?}", self); + assert!(t!(self.status()).success()); + } +} + +fn main() { + let args: Vec = env::args().collect(); + if args.len() != 2 { + println!("Usage: {} ", args[0]); + process::exit(-1); + } + + let test_binary_path = Path::new(&args[1]); + let crate_name = test_binary_path.file_name().unwrap(); + + package_as_simulator_app(crate_name.to_str().unwrap(), test_binary_path); + start_simulator(); + install_app_to_simulator(); + run_app_on_simulator(); +} diff --git a/third_party/rust/packed_simd/ci/docker/aarch64-linux-android/Dockerfile b/third_party/rust/packed_simd/ci/docker/aarch64-linux-android/Dockerfile new file mode 100644 index 000000000000..27bde89c5a8d --- /dev/null +++ b/third_party/rust/packed_simd/ci/docker/aarch64-linux-android/Dockerfile @@ -0,0 +1,47 @@ +FROM ubuntu:16.04 + +RUN dpkg --add-architecture i386 && \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + file \ + make \ + curl \ + ca-certificates \ + python \ + unzip \ + expect \ + openjdk-9-jre \ + libstdc++6:i386 \ + libpulse0 \ + gcc \ + libc6-dev + +WORKDIR /android/ +COPY android* /android/ + +ENV ANDROID_ARCH=aarch64 +ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools + +RUN sh /android/android-install-ndk.sh $ANDROID_ARCH +RUN sh /android/android-install-sdk.sh $ANDROID_ARCH +RUN mv /root/.android /tmp +RUN chmod 777 -R /tmp/.android +RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/* + +ENV PATH=$PATH:/rust/bin \ + CARGO_TARGET_AARCH64_LINUX_ANDROID_LINKER=aarch64-linux-android-gcc \ + CARGO_TARGET_AARCH64_LINUX_ANDROID_RUNNER=/tmp/runtest \ + OBJDUMP=aarch64-linux-android-objdump \ + HOME=/tmp + +ADD runtest-android.rs /tmp/runtest.rs +ENTRYPOINT [ \ + "bash", \ + "-c", \ + # set SHELL so android can detect a 64bits system, see + # http://stackoverflow.com/a/41789144 + "SHELL=/bin/dash /android/sdk/emulator/emulator @aarch64 -no-window & \ + rustc /tmp/runtest.rs -o /tmp/runtest && \ + exec \"$@\"", \ + "--" \ +] diff --git a/third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile new file mode 100644 index 000000000000..68261a2f033d --- /dev/null +++ b/third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile @@ -0,0 +1,14 @@ +FROM ubuntu:17.10 +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + ca-certificates \ + libc6-dev \ + gcc-aarch64-linux-gnu \ + libc6-dev-arm64-cross \ + qemu-user \ + make \ + file + +ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \ + CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -L /usr/aarch64-linux-gnu" \ + OBJDUMP=aarch64-linux-gnu-objdump diff --git a/third_party/rust/packed_simd/ci/docker/arm-linux-androideabi/Dockerfile b/third_party/rust/packed_simd/ci/docker/arm-linux-androideabi/Dockerfile new file mode 100644 index 000000000000..995a9e30e65e --- /dev/null +++ b/third_party/rust/packed_simd/ci/docker/arm-linux-androideabi/Dockerfile @@ -0,0 +1,47 @@ +FROM ubuntu:16.04 + +RUN dpkg --add-architecture i386 && \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + file \ + make \ + curl \ + ca-certificates \ + python \ + unzip \ + expect \ + openjdk-9-jre \ + libstdc++6:i386 \ + libpulse0 \ + gcc \ + libc6-dev + +WORKDIR /android/ +COPY android* /android/ + +ENV ANDROID_ARCH=arm +ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools + +RUN sh /android/android-install-ndk.sh $ANDROID_ARCH +RUN sh /android/android-install-sdk.sh $ANDROID_ARCH +RUN mv /root/.android /tmp +RUN chmod 777 -R /tmp/.android +RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/* + +ENV PATH=$PATH:/rust/bin \ + CARGO_TARGET_ARM_LINUX_ANDROIDEABI_LINKER=arm-linux-androideabi-gcc \ + CARGO_TARGET_ARM_LINUX_ANDROIDEABI_RUNNER=/tmp/runtest \ + OBJDUMP=arm-linux-androideabi-objdump \ + HOME=/tmp + +ADD runtest-android.rs /tmp/runtest.rs +ENTRYPOINT [ \ + "bash", \ + "-c", \ + # set SHELL so android can detect a 64bits system, see + # http://stackoverflow.com/a/41789144 + "SHELL=/bin/dash /android/sdk/emulator/emulator @arm -no-window & \ + rustc /tmp/runtest.rs -o /tmp/runtest && \ + exec \"$@\"", \ + "--" \ +] diff --git a/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile new file mode 100644 index 000000000000..cb4de6a57eaa --- /dev/null +++ b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile @@ -0,0 +1,15 @@ +FROM ubuntu:17.10 +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + ca-certificates \ + libc6-dev \ + libc6-armel-cross \ + libc6-dev-armel-cross \ + binutils-arm-linux-gnueabi \ + gcc-arm-linux-gnueabi \ + qemu-user \ + make \ + file +ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_LINKER=arm-linux-gnueabi-gcc \ + CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_RUNNER="qemu-arm -L /usr/arm-linux-gnueabi" \ + OBJDUMP=arm-linux-gnueabi-objdump diff --git a/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile new file mode 100644 index 000000000000..c7bd61f0a796 --- /dev/null +++ b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile @@ -0,0 +1,13 @@ +FROM ubuntu:17.10 +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + ca-certificates \ + libc6-dev \ + gcc-arm-linux-gnueabihf \ + libc6-dev-armhf-cross \ + qemu-user \ + make \ + file +ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ + CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \ + OBJDUMP=arm-linux-gnueabihf-objdump diff --git a/third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile new file mode 100644 index 000000000000..e01b87afdf56 --- /dev/null +++ b/third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile @@ -0,0 +1,13 @@ +FROM ubuntu:17.10 +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + ca-certificates \ + libc6-dev \ + gcc-arm-linux-gnueabihf \ + libc6-dev-armhf-cross \ + qemu-user \ + make \ + file +ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ + CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \ + OBJDUMP=arm-linux-gnueabihf-objdump diff --git a/third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile new file mode 100644 index 000000000000..857974a858f1 --- /dev/null +++ b/third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile @@ -0,0 +1,7 @@ +FROM ubuntu:17.10 +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc-multilib \ + libc6-dev \ + file \ + make \ + ca-certificates diff --git a/third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile new file mode 100644 index 000000000000..857974a858f1 --- /dev/null +++ b/third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile @@ -0,0 +1,7 @@ +FROM ubuntu:17.10 +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc-multilib \ + libc6-dev \ + file \ + make \ + ca-certificates diff --git a/third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile new file mode 100644 index 000000000000..4711cead372a --- /dev/null +++ b/third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile @@ -0,0 +1,13 @@ +FROM ubuntu:17.10 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc libc6-dev qemu-user ca-certificates \ + gcc-mips-linux-gnu libc6-dev-mips-cross \ + qemu-system-mips \ + qemu-user \ + make \ + file + +ENV CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_LINKER=mips-linux-gnu-gcc \ + CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_RUNNER="qemu-mips -L /usr/mips-linux-gnu" \ + OBJDUMP=mips-linux-gnu-objdump \ No newline at end of file diff --git a/third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile b/third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile new file mode 100644 index 000000000000..1422e8c80924 --- /dev/null +++ b/third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile @@ -0,0 +1,10 @@ +FROM ubuntu:17.10 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc libc6-dev qemu-user ca-certificates \ + gcc-mips64-linux-gnuabi64 libc6-dev-mips64-cross \ + qemu-system-mips64 qemu-user + +ENV CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_LINKER=mips64-linux-gnuabi64-gcc \ + CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64 -L /usr/mips64-linux-gnuabi64" \ + OBJDUMP=mips64-linux-gnuabi64-objdump \ No newline at end of file diff --git a/third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile b/third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile new file mode 100644 index 000000000000..d94deb5b2013 --- /dev/null +++ b/third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile @@ -0,0 +1,10 @@ +FROM ubuntu:17.10 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc libc6-dev qemu-user ca-certificates \ + gcc-mips64el-linux-gnuabi64 libc6-dev-mips64el-cross \ + qemu-system-mips64el + +ENV CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_LINKER=mips64el-linux-gnuabi64-gcc \ + CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64el -L /usr/mips64el-linux-gnuabi64" \ + OBJDUMP=mips64el-linux-gnuabi64-objdump \ No newline at end of file diff --git a/third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile b/third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile new file mode 100644 index 000000000000..40ac50675bd9 --- /dev/null +++ b/third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile @@ -0,0 +1,25 @@ +FROM ubuntu:18.10 + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ca-certificates \ + gcc \ + libc6-dev \ + make \ + qemu-user \ + qemu-system-mips \ + bzip2 \ + curl \ + file + +RUN mkdir /toolchain + +# Note that this originally came from: +# https://downloads.openwrt.org/snapshots/trunk/malta/generic/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2 +RUN curl -L https://s3-us-west-1.amazonaws.com/rust-lang-ci2/libc/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2 | \ + tar xjf - -C /toolchain --strip-components=2 + +ENV PATH=$PATH:/rust/bin:/toolchain/bin \ + CC_mipsel_unknown_linux_musl=mipsel-openwrt-linux-gcc \ + CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_LINKER=mipsel-openwrt-linux-gcc \ + CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_RUNNER="qemu-mipsel -L /toolchain" \ No newline at end of file diff --git a/third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile new file mode 100644 index 000000000000..43b174ed87fc --- /dev/null +++ b/third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile @@ -0,0 +1,12 @@ +FROM ubuntu:17.10 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc libc6-dev qemu-user ca-certificates \ + gcc-powerpc-linux-gnu libc6-dev-powerpc-cross \ + qemu-system-ppc \ + make \ + file + +ENV CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_LINKER=powerpc-linux-gnu-gcc \ + CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc -cpu Vger -L /usr/powerpc-linux-gnu" \ + OBJDUMP=powerpc-linux-gnu-objdump diff --git a/third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile new file mode 100644 index 000000000000..7757ad28a42d --- /dev/null +++ b/third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile @@ -0,0 +1,17 @@ +FROM ubuntu:17.10 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + ca-certificates \ + libc6-dev \ + gcc-powerpc64-linux-gnu \ + libc6-dev-ppc64-cross \ + qemu-user \ + qemu-system-ppc \ + make \ + file + +ENV CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_LINKER=powerpc64-linux-gnu-gcc \ + CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64 -L /usr/powerpc64-linux-gnu" \ + CC=powerpc64-linux-gnu-gcc \ + OBJDUMP=powerpc64-linux-gnu-objdump diff --git a/third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile new file mode 100644 index 000000000000..0b0c214fdf1b --- /dev/null +++ b/third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile @@ -0,0 +1,11 @@ +FROM ubuntu:17.10 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc libc6-dev qemu-user ca-certificates \ + gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross \ + qemu-system-ppc file make + +ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER=powerpc64le-linux-gnu-gcc \ + CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64le -L /usr/powerpc64le-linux-gnu" \ + CC=powerpc64le-linux-gnu-gcc \ + OBJDUMP=powerpc64le-linux-gnu-objdump diff --git a/third_party/rust/packed_simd/ci/docker/s390x-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/s390x-unknown-linux-gnu/Dockerfile new file mode 100644 index 000000000000..c645b0bcc2b8 --- /dev/null +++ b/third_party/rust/packed_simd/ci/docker/s390x-unknown-linux-gnu/Dockerfile @@ -0,0 +1,20 @@ +FROM ubuntu:18.10 + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + cmake \ + gcc \ + libc6-dev \ + g++-s390x-linux-gnu \ + libc6-dev-s390x-cross \ + qemu-user \ + make \ + file + +ENV CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_LINKER=s390x-linux-gnu-gcc \ + CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_RUNNER="qemu-s390x -L /usr/s390x-linux-gnu" \ + CC_s390x_unknown_linux_gnu=s390x-linux-gnu-gcc \ + CXX_s390x_unknown_linux_gnu=s390x-linux-gnu-g++ \ + OBJDUMP=s390x-linux-gnu-objdump \ No newline at end of file diff --git a/third_party/rust/packed_simd/ci/docker/sparc64-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/sparc64-unknown-linux-gnu/Dockerfile new file mode 100644 index 000000000000..fe12af14da6f --- /dev/null +++ b/third_party/rust/packed_simd/ci/docker/sparc64-unknown-linux-gnu/Dockerfile @@ -0,0 +1,18 @@ +FROM debian:stretch + +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl ca-certificates \ + gcc libc6-dev \ + gcc-sparc64-linux-gnu libc6-dev-sparc64-cross \ + qemu-system-sparc64 openbios-sparc seabios ipxe-qemu \ + p7zip-full cpio + +COPY linux-sparc64.sh / +RUN bash /linux-sparc64.sh + +COPY test-runner-linux / + +ENV CARGO_TARGET_SPARC64_UNKNOWN_LINUX_GNU_LINKER=sparc64-linux-gnu-gcc \ + CARGO_TARGET_SPARC64_UNKNOWN_LINUX_GNU_RUNNER="/test-runner-linux sparc64" \ + CC_sparc64_unknown_linux_gnu=sparc64-linux-gnu-gcc \ + PATH=$PATH:/rust/bin \ No newline at end of file diff --git a/third_party/rust/packed_simd/ci/docker/thumbv7neon-linux-androideabi/Dockerfile b/third_party/rust/packed_simd/ci/docker/thumbv7neon-linux-androideabi/Dockerfile new file mode 100644 index 000000000000..c1da77109c12 --- /dev/null +++ b/third_party/rust/packed_simd/ci/docker/thumbv7neon-linux-androideabi/Dockerfile @@ -0,0 +1,47 @@ +FROM ubuntu:16.04 + +RUN dpkg --add-architecture i386 && \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + file \ + make \ + curl \ + ca-certificates \ + python \ + unzip \ + expect \ + openjdk-9-jre \ + libstdc++6:i386 \ + libpulse0 \ + gcc \ + libc6-dev + +WORKDIR /android/ +COPY android* /android/ + +ENV ANDROID_ARCH=arm +ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools + +RUN sh /android/android-install-ndk.sh $ANDROID_ARCH +RUN sh /android/android-install-sdk.sh $ANDROID_ARCH +RUN mv /root/.android /tmp +RUN chmod 777 -R /tmp/.android +RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/* + +ENV PATH=$PATH:/rust/bin \ + CARGO_TARGET_THUMBV7NEON_LINUX_ANDROIDEABI_LINKER=arm-linux-androideabi-gcc \ + CARGO_TARGET_THUMBV7NEON_LINUX_ANDROIDEABI_RUNNER=/tmp/runtest \ + OBJDUMP=arm-linux-androideabi-objdump \ + HOME=/tmp + +ADD runtest-android.rs /tmp/runtest.rs +ENTRYPOINT [ \ + "bash", \ + "-c", \ + # set SHELL so android can detect a 64bits system, see + # http://stackoverflow.com/a/41789144 + "SHELL=/bin/dash /android/sdk/emulator/emulator @arm -no-window & \ + rustc /tmp/runtest.rs -o /tmp/runtest && \ + exec \"$@\"", \ + "--" \ +] diff --git a/third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile b/third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile new file mode 100644 index 000000000000..696cb6c3fb52 --- /dev/null +++ b/third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile @@ -0,0 +1,13 @@ +FROM ubuntu:17.10 +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + ca-certificates \ + libc6-dev \ + gcc-arm-linux-gnueabihf \ + libc6-dev-armhf-cross \ + qemu-user \ + make \ + file +ENV CARGO_TARGET_THUMBV7NEON_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ + CARGO_TARGET_THUMBV7NEON_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \ + OBJDUMP=arm-linux-gnueabihf-objdump diff --git a/third_party/rust/packed_simd/ci/docker/wasm32-unknown-unknown/Dockerfile b/third_party/rust/packed_simd/ci/docker/wasm32-unknown-unknown/Dockerfile new file mode 100644 index 000000000000..f905cf1a36eb --- /dev/null +++ b/third_party/rust/packed_simd/ci/docker/wasm32-unknown-unknown/Dockerfile @@ -0,0 +1,37 @@ +FROM ubuntu:18.04 + +RUN apt-get update -y && apt-get install -y --no-install-recommends \ + ca-certificates \ + clang \ + cmake \ + curl \ + git \ + libc6-dev \ + make \ + python \ + xz-utils + +# Install `wasm2wat` +RUN git clone --recursive https://github.com/WebAssembly/wabt +RUN make -C wabt -j$(nproc) +ENV PATH=$PATH:/wabt/bin + +# Install `wasm-bindgen-test-runner` +RUN curl -L https://github.com/rustwasm/wasm-bindgen/releases/download/0.2.19/wasm-bindgen-0.2.19-x86_64-unknown-linux-musl.tar.gz \ + | tar xzf - +ENV PATH=$PATH:/wasm-bindgen-0.2.19-x86_64-unknown-linux-musl +ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_RUNNER=wasm-bindgen-test-runner + +# Install `node` +RUN curl https://nodejs.org/dist/v10.8.0/node-v10.8.0-linux-x64.tar.xz | tar xJf - +ENV PATH=$PATH:/node-v10.8.0-linux-x64/bin + +# We use a shim linker that removes `--strip-debug` when passed to LLD. While +# this typically results in invalid debug information in release mode it doesn't +# result in an invalid names section which is what we're interested in. +COPY lld-shim.rs / +ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_LINKER=/tmp/lld-shim + +# Rustc isn't available until this container starts, so defer compilation of the +# shim. +ENTRYPOINT /rust/bin/rustc /lld-shim.rs -o /tmp/lld-shim && exec bash "$@" diff --git a/third_party/rust/packed_simd/ci/docker/x86_64-linux-android/Dockerfile b/third_party/rust/packed_simd/ci/docker/x86_64-linux-android/Dockerfile new file mode 100644 index 000000000000..d52dd45b12bf --- /dev/null +++ b/third_party/rust/packed_simd/ci/docker/x86_64-linux-android/Dockerfile @@ -0,0 +1,29 @@ +FROM ubuntu:16.04 + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + gcc \ + libc-dev \ + python \ + unzip \ + file \ + make + +WORKDIR /android/ +ENV ANDROID_ARCH=x86_64 +COPY android-install-ndk.sh /android/ +RUN sh /android/android-install-ndk.sh $ANDROID_ARCH + +# We do not run x86_64-linux-android tests on an android emulator. +# See ci/android-sysimage.sh for informations about how tests are run. +COPY android-sysimage.sh /android/ +RUN bash /android/android-sysimage.sh x86_64 x86_64-24_r07.zip + +ENV PATH=$PATH:/rust/bin:/android/ndk-$ANDROID_ARCH/bin \ + CARGO_TARGET_X86_64_LINUX_ANDROID_LINKER=x86_64-linux-android-gcc \ + CC_x86_64_linux_android=x86_64-linux-android-gcc \ + CXX_x86_64_linux_android=x86_64-linux-android-g++ \ + OBJDUMP=x86_64-linux-android-objdump \ + HOME=/tmp diff --git a/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile b/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile new file mode 100644 index 000000000000..a6bbe6653928 --- /dev/null +++ b/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile @@ -0,0 +1,16 @@ +FROM ubuntu:18.04 +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + libc6-dev \ + file \ + make \ + ca-certificates \ + wget \ + bzip2 \ + cmake \ + libclang-dev \ + clang + +RUN wget https://github.com/gnzlbg/intel_sde/raw/master/sde-external-8.16.0-2018-01-30-lin.tar.bz2 +RUN tar -xjf sde-external-8.16.0-2018-01-30-lin.tar.bz2 +ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/sde-external-8.16.0-2018-01-30-lin/sde64 --" diff --git a/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile new file mode 100644 index 000000000000..e6b000d0516e --- /dev/null +++ b/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile @@ -0,0 +1,10 @@ +FROM ubuntu:17.10 +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + libc6-dev \ + file \ + make \ + ca-certificates \ + cmake \ + libclang-dev \ + clang diff --git a/third_party/rust/packed_simd/ci/dox.sh b/third_party/rust/packed_simd/ci/dox.sh new file mode 100644 index 000000000000..1743366407e3 --- /dev/null +++ b/third_party/rust/packed_simd/ci/dox.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +set -ex + +rm -rf target/doc +mkdir -p target/doc + +# Build API documentation +cargo doc --features=into_bits + +# Build Performance Guide +# FIXME: https://github.com/rust-lang-nursery/mdBook/issues/780 +# mdbook build perf-guide -d target/doc/perf-guide +cd perf-guide +mdbook build +cd - +cp -r perf-guide/book target/doc/perf-guide + +# If we're on travis, not a PR, and on the right branch, publish! +if [ "$TRAVIS_PULL_REQUEST" = "false" ] && [ "$TRAVIS_BRANCH" = "master" ]; then + pip install ghp_import --install-option="--prefix=$HOME/.local" + $HOME/.local/bin/ghp-import -n target/doc + git push -qf https://${GH_PAGES}@github.com/${TRAVIS_REPO_SLUG}.git gh-pages +fi diff --git a/third_party/rust/packed_simd/ci/linux-s390x.sh b/third_party/rust/packed_simd/ci/linux-s390x.sh new file mode 100644 index 000000000000..972abeec569e --- /dev/null +++ b/third_party/rust/packed_simd/ci/linux-s390x.sh @@ -0,0 +1,18 @@ +set -ex + +mkdir -m 777 /qemu +cd /qemu + +curl -LO https://github.com/qemu/qemu/raw/master/pc-bios/s390-ccw.img +curl -LO http://ftp.debian.org/debian/dists/testing/main/installer-s390x/20170828/images/generic/kernel.debian +curl -LO http://ftp.debian.org/debian/dists/testing/main/installer-s390x/20170828/images/generic/initrd.debian + +mv kernel.debian kernel +mv initrd.debian initrd.gz + +mkdir init +cd init +gunzip -c ../initrd.gz | cpio -id +rm ../initrd.gz +cp /usr/s390x-linux-gnu/lib/libgcc_s.so.1 usr/lib/ +chmod a+w . diff --git a/third_party/rust/packed_simd/ci/linux-sparc64.sh b/third_party/rust/packed_simd/ci/linux-sparc64.sh new file mode 100644 index 000000000000..4452b120e1b6 --- /dev/null +++ b/third_party/rust/packed_simd/ci/linux-sparc64.sh @@ -0,0 +1,17 @@ +set -ex + +mkdir -m 777 /qemu +cd /qemu + +curl -LO https://cdimage.debian.org/cdimage/ports/9.0/sparc64/iso-cd/debian-9.0-sparc64-NETINST-1.iso +7z e debian-9.0-sparc64-NETINST-1.iso boot/initrd.gz +7z e debian-9.0-sparc64-NETINST-1.iso boot/sparc64 +mv sparc64 kernel +rm debian-9.0-sparc64-NETINST-1.iso + +mkdir init +cd init +gunzip -c ../initrd.gz | cpio -id +rm ../initrd.gz +cp /usr/sparc64-linux-gnu/lib/libgcc_s.so.1 usr/lib/ +chmod a+w . diff --git a/third_party/rust/packed_simd/ci/lld-shim.rs b/third_party/rust/packed_simd/ci/lld-shim.rs new file mode 100644 index 000000000000..10263869e8dc --- /dev/null +++ b/third_party/rust/packed_simd/ci/lld-shim.rs @@ -0,0 +1,11 @@ +use std::os::unix::prelude::*; +use std::process::Command; +use std::env; + +fn main() { + let args = env::args() + .skip(1) + .filter(|s| s != "--strip-debug") + .collect::>(); + panic!("failed to exec: {}", Command::new("rust-lld").args(&args).exec()); +} diff --git a/third_party/rust/packed_simd/ci/max_line_width.sh b/third_party/rust/packed_simd/ci/max_line_width.sh new file mode 100644 index 000000000000..f70639b6f89b --- /dev/null +++ b/third_party/rust/packed_simd/ci/max_line_width.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env sh + +set -x + +export success=true + +find . -iname '*.rs' | while read -r file; do + result=$(grep '.\{79\}' "${file}" | grep --invert 'http') + if [ "${result}" = "" ] + then + : + else + echo "file \"${file}\": $result" + exit 1 + fi +done + diff --git a/third_party/rust/packed_simd/ci/run-docker.sh b/third_party/rust/packed_simd/ci/run-docker.sh new file mode 100644 index 000000000000..abdd6852fc3a --- /dev/null +++ b/third_party/rust/packed_simd/ci/run-docker.sh @@ -0,0 +1,38 @@ +# Small script to run tests for a target (or all targets) inside all the +# respective docker images. + +set -ex + +run() { + echo "Building docker container for TARGET=${TARGET} RUSTFLAGS=${RUSTFLAGS}" + docker build -t packed_simd -f ci/docker/${TARGET}/Dockerfile ci/ + mkdir -p target + target=$(echo "${TARGET}" | sed 's/-emulated//') + echo "Running docker" + docker run \ + --user `id -u`:`id -g` \ + --rm \ + --init \ + --volume $HOME/.cargo:/cargo \ + --env CARGO_HOME=/cargo \ + --volume `rustc --print sysroot`:/rust:ro \ + --env TARGET=$target \ + --env NORUN \ + --env NOVERIFY \ + --env RUSTFLAGS \ + --volume `pwd`:/checkout:ro \ + --volume `pwd`/target:/checkout/target \ + --workdir /checkout \ + --privileged \ + packed_simd \ + bash \ + -c 'PATH=$PATH:/rust/bin exec ci/run.sh' +} + +if [ -z "${TARGET}" ]; then + for d in `ls ci/docker/`; do + run $d + done +else + run ${TARGET} +fi diff --git a/third_party/rust/packed_simd/ci/run.sh b/third_party/rust/packed_simd/ci/run.sh new file mode 100644 index 000000000000..7bb825883680 --- /dev/null +++ b/third_party/rust/packed_simd/ci/run.sh @@ -0,0 +1,96 @@ +#!/usr/bin/env bash + +set -ex + +: ${TARGET?"The TARGET environment variable must be set."} + +# Tests are all super fast anyway, and they fault often enough on travis that +# having only one thread increases debuggability to be worth it. +#export RUST_TEST_THREADS=1 +#export RUST_BACKTRACE=full +#export RUST_TEST_NOCAPTURE=1 + +# Some appveyor builds run out-of-memory; this attempts to mitigate that: +# https://github.com/rust-lang-nursery/packed_simd/issues/39 +# export RUSTFLAGS="${RUSTFLAGS} -C codegen-units=1" +# export CARGO_BUILD_JOBS=1 + +export CARGO_SUBCMD=test +if [[ "${NORUN}" == "1" ]]; then + export CARGO_SUBCMD=build +fi + +if [[ ${TARGET} == "x86_64-apple-ios" ]] || [[ ${TARGET} == "i386-apple-ios" ]]; then + export RUSTFLAGS="${RUSTFLAGS} -Clink-arg=-mios-simulator-version-min=7.0" + rustc ./ci/deploy_and_run_on_ios_simulator.rs -o $HOME/runtest + export CARGO_TARGET_X86_64_APPLE_IOS_RUNNER=$HOME/runtest + export CARGO_TARGET_I386_APPLE_IOS_RUNNER=$HOME/runtest +fi + +# The source directory is read-only. Need to copy internal crates to the target +# directory for their Cargo.lock to be properly written. +mkdir target || true + +rustc --version +cargo --version +echo "TARGET=${TARGET}" +echo "HOST=${HOST}" +echo "RUSTFLAGS=${RUSTFLAGS}" +echo "NORUN=${NORUN}" +echo "NOVERIFY=${NOVERIFY}" +echo "CARGO_SUBCMD=${CARGO_SUBCMD}" +echo "CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS}" +echo "CARGO_INCREMENTAL=${CARGO_INCREMENTAL}" +echo "RUST_TEST_THREADS=${RUST_TEST_THREADS}" +echo "RUST_BACKTRACE=${RUST_BACKTRACE}" +echo "RUST_TEST_NOCAPTURE=${RUST_TEST_NOCAPTURE}" + +cargo_test() { + cmd="cargo ${CARGO_SUBCMD} --verbose --target=${TARGET} ${@}" + if [ "${NORUN}" != "1" ] + then + if [ "$TARGET" != "wasm32-unknown-unknown" ] + then + cmd="$cmd -- --quiet" + fi + fi + mkdir target || true + ${cmd} 2>&1 | tee > target/output + if [[ ${PIPESTATUS[0]} != 0 ]]; then + cat target/output + return 1 + fi +} + +cargo_test_impl() { + ORIGINAL_RUSTFLAGS=${RUSTFLAGS} + RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v16 --cfg test_v32 --cfg test_v64" cargo_test ${@} + RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v128 --cfg test_v256" cargo_test ${@} + RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v512" cargo_test ${@} + RUSTFLAGS=${ORIGINAL_RUSTFLAGS} +} + +# Debug run: +if [[ "${TARGET}" != "wasm32-unknown-unknown" ]]; then + # Run wasm32-unknown-unknown in release mode only + cargo_test_impl +fi + +if [[ "${TARGET}" == "x86_64-unknown-linux-gnu" ]] || [[ "${TARGET}" == "x86_64-pc-windows-msvc" ]]; then + # use sleef on linux and windows x86_64 builds + cargo_test_impl --release --features=into_bits,core_arch,sleef-sys +else + cargo_test_impl --release --features=into_bits,core_arch +fi + +# Verify code generation +if [[ "${NOVERIFY}" != "1" ]]; then + cp -r verify/verify target/verify + export STDSIMD_ASSERT_INSTR_LIMIT=30 + if [[ "${TARGET}" == "i586-unknown-linux-gnu" ]]; then + export STDSIMD_ASSERT_INSTR_LIMIT=50 + fi + cargo_test --release --manifest-path=target/verify/Cargo.toml +fi + +. ci/run_examples.sh diff --git a/third_party/rust/packed_simd/ci/run_examples.sh b/third_party/rust/packed_simd/ci/run_examples.sh new file mode 100644 index 000000000000..5b26b18afb20 --- /dev/null +++ b/third_party/rust/packed_simd/ci/run_examples.sh @@ -0,0 +1,51 @@ +# Runs all examples. + +# FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/55 +# All examples fail to build for `armv7-apple-ios`. +if [[ ${TARGET} == "armv7-apple-ios" ]]; then + exit 0 +fi + +# FIXME: travis exceeds 50 minutes on these targets +# Skipping the examples is an attempt at preventing travis from timing-out +if [[ ${TARGET} == "arm-linux-androidabi" ]] || [[ ${TARGET} == "aarch64-linux-androidabi" ]] \ + || [[ ${TARGET} == "sparc64-unknown-linux-gnu" ]]; then + exit 0 +fi + +if [[ ${TARGET} == "wasm32-unknown-unknown" ]]; then + exit 0 +fi + +cp -r examples/aobench target/aobench +cargo_test --manifest-path=target/aobench/Cargo.toml --release --no-default-features +cargo_test --manifest-path=target/aobench/Cargo.toml --release --features=256bit + +cp -r examples/dot_product target/dot_product +cargo_test --manifest-path=target/dot_product/Cargo.toml --release + +cp -r examples/fannkuch_redux target/fannkuch_redux +cargo_test --manifest-path=target/fannkuch_redux/Cargo.toml --release + +# FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/56 +if [[ ${TARGET} != "i586-unknown-linux-gnu" ]]; then + cp -r examples/mandelbrot target/mandelbrot + cargo_test --manifest-path=target/mandelbrot/Cargo.toml --release +fi + +cp -r examples/matrix_inverse target/matrix_inverse +cargo_test --manifest-path=target/matrix_inverse/Cargo.toml --release + +cp -r examples/nbody target/nbody +cargo_test --manifest-path=target/nbody/Cargo.toml --release + +cp -r examples/spectral_norm target/spectral_norm +cargo_test --manifest-path=target/spectral_norm/Cargo.toml --release + +if [[ ${TARGET} != "i586-unknown-linux-gnu" ]]; then + cp -r examples/stencil target/stencil + cargo_test --manifest-path=target/stencil/Cargo.toml --release +fi + +cp -r examples/triangle_xform target/triangle_xform +cargo_test --manifest-path=target/triangle_xform/Cargo.toml --release diff --git a/third_party/rust/packed_simd/ci/runtest-android.rs b/third_party/rust/packed_simd/ci/runtest-android.rs new file mode 100644 index 000000000000..ed1cd80c834a --- /dev/null +++ b/third_party/rust/packed_simd/ci/runtest-android.rs @@ -0,0 +1,45 @@ +use std::env; +use std::process::Command; +use std::path::{Path, PathBuf}; + +fn main() { + let args = env::args_os() + .skip(1) + .filter(|arg| arg != "--quiet") + .collect::>(); + assert_eq!(args.len(), 1); + let test = PathBuf::from(&args[0]); + let dst = Path::new("/data/local/tmp").join(test.file_name().unwrap()); + + let status = Command::new("adb") + .arg("wait-for-device") + .status() + .expect("failed to run: adb wait-for-device"); + assert!(status.success()); + + let status = Command::new("adb") + .arg("push") + .arg(&test) + .arg(&dst) + .status() + .expect("failed to run: adb pushr"); + assert!(status.success()); + + let output = Command::new("adb") + .arg("shell") + .arg(&dst) + .output() + .expect("failed to run: adb shell"); + assert!(status.success()); + + println!("status: {}\nstdout ---\n{}\nstderr ---\n{}", + output.status, + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr)); + + let stdout = String::from_utf8_lossy(&output.stdout); + let mut lines = stdout.lines().filter(|l| l.starts_with("test result")); + if !lines.all(|l| l.contains("test result: ok") && l.contains("0 failed")) { + panic!("failed to find successful test run"); + } +} diff --git a/third_party/rust/packed_simd/ci/setup_benchmarks.sh b/third_party/rust/packed_simd/ci/setup_benchmarks.sh new file mode 100644 index 000000000000..ddc4765d5ceb --- /dev/null +++ b/third_party/rust/packed_simd/ci/setup_benchmarks.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +set -ex + +# Get latest ISPC binary for the target and put it in the path +git clone https://github.com/gnzlbg/ispc-binaries +cp ispc-binaries/ispc-${TARGET} ispc + +# Rust-bindgen requires RUSTFMT +rustup component add rustfmt-preview diff --git a/third_party/rust/packed_simd/ci/test-runner-linux b/third_party/rust/packed_simd/ci/test-runner-linux new file mode 100644 index 000000000000..0654f63bfdb9 --- /dev/null +++ b/third_party/rust/packed_simd/ci/test-runner-linux @@ -0,0 +1,24 @@ +#!/bin/sh + +set -e + +arch=$1 +prog=$2 + +cd /qemu/init +cp -f $2 prog +find . | cpio --create --format='newc' --quiet | gzip > ../initrd.gz +cd .. + +timeout 30s qemu-system-$arch \ + -m 1024 \ + -nographic \ + -kernel kernel \ + -initrd initrd.gz \ + -append init=/prog > output || true + +# remove kernel messages +tr -d '\r' < output | egrep -v '^\[' + +# if the output contains a failure, return error +! grep FAILED output > /dev/null diff --git a/third_party/rust/packed_simd/contributing.md b/third_party/rust/packed_simd/contributing.md new file mode 100644 index 000000000000..93fa92783740 --- /dev/null +++ b/third_party/rust/packed_simd/contributing.md @@ -0,0 +1,67 @@ +# Contributing to `packed_simd` + +Welcome! If you are reading this document, it means you are interested in contributing +to the `packed_simd` crate. + +## Reporting issues + +All issues with this crate are tracked using GitHub's [Issue Tracker]. + +You can use issues to bring bugs to the attention of the maintainers, to discuss +certain problems encountered with the crate, or to request new features (although +feature requests should be limited to things mentioned in the [RFC]). + +One thing to keep in mind is to always use the **latest** nightly toolchain when +working on this crate. Due to the nature of this project, we use a lot of unstable +features, meaning breakage happens often. + +[Issue Tracker]: https://github.com/rust-lang-nursery/packed_simd/issues +[RFC]: https://github.com/rust-lang/rfcs/pull/2366 + +### LLVM issues + +The Rust compiler relies on [LLVM](https://llvm.org/) for machine code generation, +and quite a few LLVM bugs have been discovered during the development of this project. + +If you encounter issues with incorrect/suboptimal codegen, which you do not encounter +when using the [SIMD vendor intrinsics](https://doc.rust-lang.org/nightly/std/arch/), +it is likely the issue is with LLVM, or this crate's interaction with it. + +You should first open an issue **in this repo** to help us track the problem, and we +will help determine what is the exact cause of the problem. +If LLVM is indeed the cause, the issue will be reported upstream to the +[LLVM bugtracker](https://bugs.llvm.org/). + +## Submitting Pull Requests + +New code is submitted to the crate using GitHub's [pull request] mechanism. +You should first fork this repository, make your changes (preferrably in a new +branch), then use GitHub's web UI to create a new PR. + +[pull request]: https://help.github.com/articles/about-pull-requests/ + +### Examples + +The `examples` directory contains code showcasing SIMD code written with this crate, +usually in comparison to scalar or ISPC code. If you have a project / idea which +uses SIMD, we'd love to add it to the examples list. + +Every example should include a small `README`, describing the example code's purpose. +If your example could potentially work as a benchmark, then add a `benchmark.sh` +script to allow running the example benchmark code in CI. See an existing example's +[`benchmark.sh`](examples/aobench/benchmark.sh) for a sample. + +Don't forget to update the crate's top-level `README` with a link to your example. + +### Perf guide + +The objective of the [performance guide][perf-guide] is to be a comprehensive +resource detailing the process of optimizing Rust code with SIMD support. + +If you believe a certain section could be reworded, or if you have any tips & tricks +related to SIMD which you'd like to share, please open a PR. + +[mdBook] is used to manage the formatting of the guide as a book. + +[perf-guide]: https://rust-lang-nursery.github.io/packed_simd/perf-guide/ +[mdBook]: https://github.com/rust-lang-nursery/mdBook diff --git a/third_party/rust/packed_simd/perf-guide/.gitignore b/third_party/rust/packed_simd/perf-guide/.gitignore new file mode 100644 index 000000000000..5a0bf0317d75 --- /dev/null +++ b/third_party/rust/packed_simd/perf-guide/.gitignore @@ -0,0 +1 @@ +/book diff --git a/third_party/rust/packed_simd/perf-guide/book.toml b/third_party/rust/packed_simd/perf-guide/book.toml new file mode 100644 index 000000000000..69ba3053ca25 --- /dev/null +++ b/third_party/rust/packed_simd/perf-guide/book.toml @@ -0,0 +1,12 @@ +[book] +authors = ["Gonzalo Brito Gadeschi", "Gabriel Majeri"] +multilingual = false +src = "src" +title = "Rust SIMD Performance Guide" +description = "This book describes how to write performant SIMD code in Rust." + +[build] +create-missing = false + +[output.html] +additional-css = ["./src/ascii.css"] diff --git a/third_party/rust/packed_simd/perf-guide/src/SUMMARY.md b/third_party/rust/packed_simd/perf-guide/src/SUMMARY.md new file mode 100644 index 000000000000..1e76898865c5 --- /dev/null +++ b/third_party/rust/packed_simd/perf-guide/src/SUMMARY.md @@ -0,0 +1,21 @@ +# Summary + +[Introduction](./introduction.md) + +- [Floating-point Math](./float-math/fp.md) + - [Short-vector Math Library](./float-math/svml.md) + - [Approximate functions](./float-math/approx.md) + - [Fused multiply-accumulate](./float-math/fma.md) + +- [Target features](./target-feature/features.md) + - [Using `RUSTFLAGS`](./target-feature/rustflags.md) + - [Using the `target_feature` attribute](./target-feature/attribute.md) + - [Interaction with inlining](./target-feature/inlining.md) + - [Detecting features at runtime](./target-feature/runtime.md) + +- [Bounds checking](./bound_checks.md) +- [Vertical and horizontal operations](./vert-hor-ops.md) + +- [Performance profiling](./prof/profiling.md) + - [Profiling on Linux](./prof/linux.md) + - [Using machine code analyzers](./prof/mca.md) diff --git a/third_party/rust/packed_simd/perf-guide/src/ascii.css b/third_party/rust/packed_simd/perf-guide/src/ascii.css new file mode 100644 index 000000000000..4c02651195f9 --- /dev/null +++ b/third_party/rust/packed_simd/perf-guide/src/ascii.css @@ -0,0 +1,4 @@ +code { + /* "Source Code Pro" breaks ASCII art */ + font-family: Consolas, "Ubuntu Mono", Menlo, "DejaVu Sans Mono", monospace; +} diff --git a/third_party/rust/packed_simd/perf-guide/src/bound_checks.md b/third_party/rust/packed_simd/perf-guide/src/bound_checks.md new file mode 100644 index 000000000000..2eeedb5ac829 --- /dev/null +++ b/third_party/rust/packed_simd/perf-guide/src/bound_checks.md @@ -0,0 +1,22 @@ +# Bounds checking + +Reading and writing packed vectors to/from slices is checked by default. +Independently of the configuration options used, the safe functions: + +* `Simd<[T; N]>::from_slice_aligned(& s[..])` +* `Simd<[T; N]>::write_to_slice_aligned(&mut s[..])` + +always check that: + +* the slice is big enough to hold the vector +* the slice is suitably aligned to perform an aligned load/store for a `Simd<[T; + N]>` (this alignment is often much larger than that of `T`). + +There are `_unaligned` versions that use unaligned load and stores, as well as +`unsafe` `_unchecked` that do not perform any checks iff `debug-assertions = +false` / `debug = false`. That is, the `_unchecked` methods do still assert size +and alignment in debug builds and could also do so in release builds depending +on the configuration options. + +These assertions do often significantly impact performance and you should be +aware of them. diff --git a/third_party/rust/packed_simd/perf-guide/src/float-math/approx.md b/third_party/rust/packed_simd/perf-guide/src/float-math/approx.md new file mode 100644 index 000000000000..2237c67ec4b3 --- /dev/null +++ b/third_party/rust/packed_simd/perf-guide/src/float-math/approx.md @@ -0,0 +1,8 @@ +# Approximate functions + + diff --git a/third_party/rust/packed_simd/perf-guide/src/float-math/fma.md b/third_party/rust/packed_simd/perf-guide/src/float-math/fma.md new file mode 100644 index 000000000000..357748383d63 --- /dev/null +++ b/third_party/rust/packed_simd/perf-guide/src/float-math/fma.md @@ -0,0 +1,6 @@ +# Fused Multiply Add + + diff --git a/third_party/rust/packed_simd/perf-guide/src/float-math/fp.md b/third_party/rust/packed_simd/perf-guide/src/float-math/fp.md new file mode 100644 index 000000000000..711fcc4fd598 --- /dev/null +++ b/third_party/rust/packed_simd/perf-guide/src/float-math/fp.md @@ -0,0 +1,3 @@ +# Floating-point math + +This chapter contains information pertaining to working with floating-point numbers. diff --git a/third_party/rust/packed_simd/perf-guide/src/float-math/svml.md b/third_party/rust/packed_simd/perf-guide/src/float-math/svml.md new file mode 100644 index 000000000000..266c2531cc04 --- /dev/null +++ b/third_party/rust/packed_simd/perf-guide/src/float-math/svml.md @@ -0,0 +1,7 @@ +# Short Vector Math Library + + diff --git a/third_party/rust/packed_simd/perf-guide/src/introduction.md b/third_party/rust/packed_simd/perf-guide/src/introduction.md new file mode 100644 index 000000000000..7243e19c8a54 --- /dev/null +++ b/third_party/rust/packed_simd/perf-guide/src/introduction.md @@ -0,0 +1,26 @@ +# Introduction + +## What is SIMD + + + +## History of SIMD in Rust + + + +## Discover packed_simd + + + +Writing fast and portable SIMD algorithms using `packed_simd` is, unfortunately, +not trivial. There are many pitfals that one should be aware of, and some idioms +that help avoid those pitfalls. + +This book attempts to document these best practices and provides practical examples +on how to apply the tips to _your_ code. diff --git a/third_party/rust/packed_simd/perf-guide/src/prof/linux.md b/third_party/rust/packed_simd/perf-guide/src/prof/linux.md new file mode 100644 index 000000000000..96c7d67bc476 --- /dev/null +++ b/third_party/rust/packed_simd/perf-guide/src/prof/linux.md @@ -0,0 +1,107 @@ +# Performance profiling on Linux + +## Using `perf` + +[perf](https://perf.wiki.kernel.org/) is the most powerful performance profiler +for Linux, featuring support for various hardware Performance Monitoring Units, +as well as integration with the kernel's performance events framework. + +We will only look at how can the `perf` command can be used to profile SIMD code. +Full system profiling is outside of the scope of this book. + +### Recording + +The first step is to record a program's execution during an average workload. +It helps if you can isolate the parts of your program which have performance +issues, and set up a benchmark which can be easily (re)run. + +Build the benchmark binary in release mode, after having enabled debug info: + +```sh +$ cargo build --release +Finished release [optimized + debuginfo] target(s) in 0.02s +``` + +Then use the `perf record` subcommand: + +```sh +$ perf record --call-graph=dwarf ./target/release/my-program +[ perf record: Woken up 10 times to write data ] +[ perf record: Captured and wrote 2,356 MB perf.data (292 samples) ] +``` + +Instead of using `--call-graph=dwarf`, which can become pretty slow, you can use +`--call-graph=lbr` if you have a processor with support for Last Branch Record +(i.e. Intel Haswell and newer). + +`perf` will, by default, record the count of CPU cycles it takes to execute +various parts of your program. You can use the `-e` command line option +to enable other performance events, such as `cache-misses`. Use `perf list` +to get a list of all hardware counters supported by your CPU. + +### Viewing the report + +The next step is getting a bird's eye view of the program's execution. +`perf` provides a `ncurses`-based interface which will get you started. + +Use `perf report` to open a visualization of your program's performance: + +```sh +perf report --hierarchy -M intel +``` + +`--hierarchy` will display a tree-like structure of where your program spent +most of its time. `-M intel` enables disassembly output with Intel syntax, which +is subjectively more readable than the default AT&T syntax. + +Here is the output from profiling the `nbody` benchmark: + +``` +- 100,00% nbody + - 94,18% nbody + + 93,48% [.] nbody_lib::simd::advance + + 0,70% [.] nbody_lib::run + + 5,06% libc-2.28.so +``` + +If you move with the arrow keys to any node in the tree, you can the press `a` +to have `perf` _annotate_ that node. This means it will: + +- disassemble the function + +- associate every instruction with the percentage of time which was spent executing it + +- interleaves the disassembly with the source code, + assuming it found the debug symbols + (you can use `s` to toggle this behaviour) + +`perf` will, by default, open the instruction which it identified as being the +hottest spot in the function: + +``` +0,76 │ movapd xmm2,xmm0 +0,38 │ movhlps xmm2,xmm0 + │ addpd xmm2,xmm0 + │ unpcklpd xmm1,xmm2 +12,50 │ sqrtpd xmm0,xmm1 +1,52 │ mulpd xmm0,xmm1 +``` + +In this case, `sqrtpd` will be highlighted in red, since that's the instruction +which the CPU spends most of its time executing. + +## Using Valgrind + +Valgrind is a set of tools which initially helped C/C++ programmers find unsafe +memory accesses in their code. Nowadays the project also has + +- a heap profiler called `massif` + +- a cache utilization profiler called `cachegrind` + +- a call-graph performance profiler called `callgrind` + + diff --git a/third_party/rust/packed_simd/perf-guide/src/prof/mca.md b/third_party/rust/packed_simd/perf-guide/src/prof/mca.md new file mode 100644 index 000000000000..65ddf1a4eb3a --- /dev/null +++ b/third_party/rust/packed_simd/perf-guide/src/prof/mca.md @@ -0,0 +1,100 @@ +# Machine code analysis tools + +## The microarchitecture of modern CPUs + +While you might have heard of Instruction Set Architectures, such as `x86` or +`arm` or `mips`, the term _microarchitecture_ (also written here as _µ-arch_), +refers to the internal details of an actual family of CPUs, such as Intel's +_Haswell_ or AMD's _Jaguar_. + +Replacing scalar code with SIMD code will improve performance on all CPUs +supporting the required vector extensions. +However, due to microarchitectural differences, the actual speed-up at +runtime might vary. + +**Example**: a simple example arises when optimizing for AMD K8 CPUs. +The assembly generated for an empty function should look like this: + +```asm +nop +ret +``` + +The `nop` is used to align the `ret` instruction for better performance. +However, the compiler will actually generated the following code: + +```asm +repz ret +``` + +The `repz` instruction will repeat the following instruction until a certain +condition. Of course, in this situation, the function will simply immediately +return, and the `ret` instruction is still aligned. +However, AMD K8's branch predictor performs better with the latter code. + +For those looking to absolutely maximize performance for a certain target µ-arch, +you will have to read some CPU manuals, or ask the compiler to do it for you +with `-C target-cpu`. + +### Summary of CPU internals + +Modern processors are able to execute instructions out-of-order for better performance, +by utilizing tricks such as [branch prediction], [instruction pipelining], +or [superscalar execution]. + +[branch prediction]: https://en.wikipedia.org/wiki/Branch_predictor +[instruction pipelining]: https://en.wikipedia.org/wiki/Instruction_pipelining +[superscalar execution]: https://en.wikipedia.org/wiki/Superscalar_processor + +SIMD instructions are also subject to these optimizations, meaning it can get pretty +difficult to determine where the slowdown happens. +For example, if the profiler reports a store operation is slow, one of two things +could be happening: + +- the store is limited by the CPU's memory bandwidth, which is actually an ideal + scenario, all things considered; + +- memory bandwidth is nowhere near its peak, but the value to be stored is at the + end of a long chain of operations, and this store is where the profiler + encountered the pipeline stall; + +Since most profilers are simple tools which don't understand the subtleties of +instruction scheduling, you + +## Analyzing the machine code + +Certain tools have knowledge of internal CPU microarchitecture, i.e. they know + +- how many physical [register files] a CPU actually has + +- what is the latency / throughtput of an instruction + +- what [µ-ops] are generated for a set of instructions + +and many other architectural details. + +[register files]: https://en.wikipedia.org/wiki/Register_file +[µ-ops]: https://en.wikipedia.org/wiki/Micro-operation + +These tools are therefore able to provide accurate information as to why some +instructions are inefficient, and where the bottleneck is. + +The disadvantage is that the output of these tools requires advanced knowledge +of the target architecture to understand, i.e. they **cannot** point out what +the cause of the issue is explicitly. + +## Intel's Architecture Code Analyzer (IACA) + +[IACA] is a free tool offered by Intel for analyzing the performance of various +computational kernels. + +Being a proprietary, closed source tool, it _only_ supports Intel's µ-arches. + +[IACA]: https://software.intel.com/en-us/articles/intel-architecture-code-analyzer + +## llvm-mca + + diff --git a/third_party/rust/packed_simd/perf-guide/src/prof/profiling.md b/third_party/rust/packed_simd/perf-guide/src/prof/profiling.md new file mode 100644 index 000000000000..02ba78d2f22f --- /dev/null +++ b/third_party/rust/packed_simd/perf-guide/src/prof/profiling.md @@ -0,0 +1,14 @@ +# Performance profiling + +While the rest of the book provides practical advice on how to improve the performance +of SIMD code, this chapter is dedicated to [**performance profiling**][profiling]. +Profiling consists of recording a program's execution in order to identify program +hotspots. + +**Important**: most profilers require debug information in order to accurately +link the program hotspots back to the corresponding source code lines. Rust will +disable debug info generation by default for optimized builds, but you can change +that [in your `Cargo.toml`][cargo-ref]. + +[profiling]: https://en.wikipedia.org/wiki/Profiling_(computer_programming) +[cargo-ref]: https://doc.rust-lang.org/cargo/reference/manifest.html#the-profile-sections diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/attribute.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/attribute.md new file mode 100644 index 000000000000..ee670fea5bd8 --- /dev/null +++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/attribute.md @@ -0,0 +1,5 @@ +# The `target_feature` attribute + + diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/features.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/features.md new file mode 100644 index 000000000000..b93030ca6708 --- /dev/null +++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/features.md @@ -0,0 +1,13 @@ +# Enabling target features + +Not all processors of a certain architecture will have SIMD processing units, +and using a SIMD instruction which is not supported will trigger undefined behavior. + +To allow building safe, portable programs, the Rust compiler will **not**, by default, +generate any sort of vector instructions, unless it can statically determine +they are supported. For example, on AMD64, SSE2 support is architecturally guaranteed. +The `x86_64-apple-darwin` target enables up to SSSE3. The get a defintive list of +which features are enabled by default on various platforms, refer to the target +specifications [in the compiler's source code][targets]. + +[targets]: https://github.com/rust-lang/rust/tree/master/src/librustc_target/spec diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/inlining.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/inlining.md new file mode 100644 index 000000000000..86705102a74b --- /dev/null +++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/inlining.md @@ -0,0 +1,5 @@ +# Inlining + + diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/practice.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/practice.md new file mode 100644 index 000000000000..5b55c61c268a --- /dev/null +++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/practice.md @@ -0,0 +1,31 @@ +# Target features in practice + +Using `RUSTFLAGS` will allow the crate being compiled, as well as all its +transitive dependencies to use certain target features. + +A tehnique used to avoid undefined behavior at runtime is to compile and +ship multiple binaries, each compiled with a certain set of features. +This might not be feasible in some cases, and can quickly get out of hand +as more and more vector extensions are added to an architecture. + +Rust can be more flexible: you can build a single binary/library which automatically +picks the best supported vector instructions depending on the host machine. +The trick consists of monomorphizing parts of the code during building, and then +using run-time feature detection to select the right code path when running. + + + +**NOTE** (x86 specific): because the AVX (256-bit) registers extend the existing +SSE (128-bit) registers, mixing SSE and AVX instructions in a program can cause +performance issues. + +The solution is to compile all code, even the code written with 128-bit vectors, +with the AVX target feature enabled. This will cause the compiler to prefix the +generated instructions with the [VEX] prefix. + +[VEX]: https://en.wikipedia.org/wiki/VEX_prefix diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/runtime.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/runtime.md new file mode 100644 index 000000000000..47ddcc8660db --- /dev/null +++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/runtime.md @@ -0,0 +1,5 @@ +# Detecting host features at runtime + + diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/rustflags.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/rustflags.md new file mode 100644 index 000000000000..e2e806e085b6 --- /dev/null +++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/rustflags.md @@ -0,0 +1,77 @@ +# Using RUSTFLAGS + +One of the easiest ways to benefit from SIMD is to allow the compiler +to generate code using certain vector instruction extensions. + +The environment variable `RUSTFLAGS` can be used to pass options for code +generation to the Rust compiler. These flags will affect **all** compiled crates. + +There are two flags which can be used to enable specific vector extensions: + +## target-feature + +- Syntax: `-C target-feature=` + +- Provides the compiler with a comma-separated set of instruction extensions + to enable. + + **Example**: Use `-C target-features=+sse3,+avx` to enable generating instructions + for [Streaming SIMD Extensions 3](https://en.wikipedia.org/wiki/SSE3) and + [Advanced Vector Extensions](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions). + +- To list target triples for all targets supported by Rust, use: + + ```sh + rustc --print target-list + ``` + +- To list all support target features for a certain target triple, use: + + ```sh + rustc --target=${TRIPLE} --print target-features + ``` + +- Note that all CPU features are independent, and will have to be enabled individually. + + **Example**: Setting `-C target-features=+avx2` will _not_ enable `fma`, even though + all CPUs which support AVX2 also support FMA. To enable both, one has to use + `-C target-features=+avx2,+fma` + +- Some features also depend on other features, which need to be enabled for the + target instructions to be generated. + + **Example**: Unless `v7` is specified as the target CPU (see below), to enable + NEON on ARM it is necessary to use `-C target-feature=+v7,+neon`. + +## target-cpu + +- Syntax: `-C target-cpu=` + +- Sets the identifier of a CPU family / model for which to build and optimize the code. + + **Example**: `RUSTFLAGS='-C target-cpu=cortex-a75'` + +- To list all supported target CPUs for a certain target triple, use: + + ```sh + rustc --target=${TRIPLE} --print target-cpus + ``` + + **Example**: + + ```sh + rustc --target=i686-pc-windows-msvc --print target-cpus + ``` + +- The compiler will translate this into a list of target features. Therefore, + individual feature checks (`#[cfg(target_feature = "...")]`) will still + work properly. + +- It will cause the code generator to optimize the generated code for that + specific CPU model. + +- Using `native` as the CPU model will cause Rust to generate and optimize code + for the CPU running the compiler. It is useful when building programs which you + plan to only use locally. This should never be used when the generated programs + are meant to be run on other computers, such as when packaging for distribution + or cross-compiling. diff --git a/third_party/rust/packed_simd/perf-guide/src/vert-hor-ops.md b/third_party/rust/packed_simd/perf-guide/src/vert-hor-ops.md new file mode 100644 index 000000000000..d0dd1be12a19 --- /dev/null +++ b/third_party/rust/packed_simd/perf-guide/src/vert-hor-ops.md @@ -0,0 +1,76 @@ +# Vertical and horizontal operations + +In SIMD terminology, each vector has a certain "width" (number of lanes). +A vector processor is able to perform two kinds of operations on a vector: + +- Vertical operations: + operate on two vectors of the same width, result has same width + +**Example**: vertical addition of two `f32x4` vectors + + %0 == | 2 | -3.5 | 0 | 7 | + + + + + + %1 == | 4 | 1.5 | -1 | 0 | + = = = = + %0 + %1 == | 6 | -2 | -1 | 7 | + +- Horizontal operations: + reduce the elements of two vectors in some way, + the result's elements combine information from the two original ones + +**Example**: horizontal addition of two `u64x2` vectors + + %0 == | 1 | 3 | + └─+───┘ + └───────┐ + │ + %1 == | 4 | -1 | │ + └─+──┘ │ + └───┐ │ + │ │ + ┌─────│───┘ + ▼ ▼ + %0 + %1 == | 4 | 3 | + +## Performance consideration of horizontal operations + +The result of vertical operations, like vector negation: `-a`, for a given lane, +does not depend on the result of the operation for the other lanes. The result +of horizontal operations, like the vector `sum` reduction: `a.sum()`, depends on +the value of all vector lanes. + +In virtually all architectures vertical operations are fast, while horizontal +operations are, by comparison, very slow. + +Consider the following two functions for computing the sum of all `f32` values +in a slice: + +```rust +fn fast_sum(x: &[f32]) -> f32 { + assert!(x.len() % 4 == 0); + let mut sum = f32x4::splat(0.); // [0., 0., 0., 0.] + for i in (0..x.len()).step_by(4) { + sum += f32x4::from_slice_unaligned(&x[i..]); + } + sum.sum() +} + +fn slow_sum(x: &[f32]) -> f32 { + assert!(x.len() % 4 == 0); + let mut sum: f32 = 0.; + for i in (0..x.len()).step_by(4) { + sum += f32x4::from_slice_unaligned(&x[i..]).sum(); + } + sum +} +``` + +The inner loop over the slice is where the bulk of the work actually happens. +There, the `fast_sum` function perform vertical operations into a vector, doing +a single horizontal reduction at the end, while the `slow_sum` function performs +horizontal vector operations inside of the loop. + +On all widely-used architectures, `fast_sum` is a large constant factor faster +than `slow_sum`. You can run the [slice_sum]() example and see for yourself. On +the particular machine tested there the algorithm using the horizontal vector +addition is 2.7x slower than the one using vertical vector operations! diff --git a/third_party/rust/packed_simd/readme.md b/third_party/rust/packed_simd/readme.md new file mode 100644 index 000000000000..3b27a2bba0d6 --- /dev/null +++ b/third_party/rust/packed_simd/readme.md @@ -0,0 +1,182 @@ +# `Simd<[T; N]>` + +## Implementation of [Rust RFC #2366: `std::simd`][rfc2366] + +[![Travis-CI Status]][travis] [![Appveyor Status]][appveyor] [![Latest Version]][crates.io] [![docs]][master_docs] + +> This aims to be a 100% conforming implementation of Rust RFC 2366 for stabilization. + +**WARNING**: this crate only supports the most recent nightly Rust toolchain. + +## Documentation + +* [API docs (`master` branch)][master_docs] +* [Performance guide][perf_guide] +* [API docs (`docs.rs`)][docs.rs]: **CURRENTLY DOWN** due to + https://github.com/rust-lang-nursery/packed_simd/issues/110 +* [RFC2366 `std::simd`][rfc2366]: - contains motivation, design rationale, + discussion, etc. + +## Examples + +Most of the examples come with both a scalar and a vectorized implementation. + +* [`aobench`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/aobench) +* [`fannkuch_redux`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/fannkuch_redux) +* [`matrix inverse`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/matrix_inverse) +* [`mandelbrot`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/mandelbrot) +* [`n-body`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/nbody) +* [`options_pricing`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/options_pricing) +* [`spectral_norm`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/spectral_norm) +* [`triangle transform`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/triangle_xform) +* [`stencil`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/stencil) +* [`vector dot product`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/dot_product) + +## Cargo features + +* `into_bits` (default: disabled): enables `FromBits`/`IntoBits` trait + implementations for the vector types. These allow reinterpreting the bits of a + vector type as those of another vector type safely by just using the + `.into_bits()` method. + +* `core_arch` (default: disabled): enable this feature to recompile `core::arch` + for the target-features enabled. `packed_simd` includes optimizations for some + target feature combinations that are enabled by this feature. Note, however, + that this is an unstable dependency, that rustc might break at any time. + +* `sleef-sys` (default: disabled - `x86_64` only): internally uses the [SLEEF] + short-vector math library when profitable via the [`sleef-sys`][sleef_sys] + crate. [SLEEF] is licensed under the [Boost Software License + v1.0][boost_license], an extremely permissive license, and can be statically + linked without issues. + +## Performance + +The following [ISPC] examples are also part of `packed_simd`'s +[`examples/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/) +directory, where `packed_simd`+[`rayon`][rayon] are used to emulate [ISPC]'s +Single-Program-Multiple-Data (SPMD) programming model. The performance results +on different hardware is shown in the `readme.md` of each example. The following +table summarizes the performance ranges, where `+` means speed-up and `-` +slowdown: + +* `aobench`: `[-1.02x, +1.53x]`, +* `stencil`: `[+1.06x, +1.72x]`, +* `mandelbrot`: `[-1.74x, +1.2x]`, +* `options_pricing`: + * `black_scholes`: `+1.0x` + * `binomial_put`: `+1.4x` + + While SPMD is not the intended use case for `packed_simd`, it is possible to + combine the library with [`rayon`][rayon] to poorly emulate [ISPC]'s SPMD programming + model in Rust. Writing performant code is not as straightforward as with + [ISPC], but with some care (e.g. see the [Performance Guide][perf_guide]) one + can easily match and often out-perform [ISPC]'s "default performance". + +## Platform support + +The following table describes the supported platforms: `build` shows whether the +library compiles without issues for a given target, while `run` shows whether +the full testsuite passes on the target. + +| Linux targets: | build | run | +|-----------------------------------|-----------|---------| +| `i586-unknown-linux-gnu` | ✓ | ✓ | +| `i686-unknown-linux-gnu` | ✓ | ✓ | +| `x86_64-unknown-linux-gnu` | ✓ | ✓ | +| `arm-unknown-linux-gnueabi` | ✗ | ✗ | +| `arm-unknown-linux-gnueabihf` | ✓ | ✓ | +| `armv7-unknown-linux-gnueabi` | ✓ | ✓ | +| `aarch64-unknown-linux-gnu` | ✓ | ✓ | +| `mips-unknown-linux-gnu` | ✓ | ✓ | +| `mipsel-unknown-linux-musl` | ✓ | ✓ | +| `mips64-unknown-linux-gnuabi64` | ✓ | ✓ | +| `mips64el-unknown-linux-gnuabi64` | ✓ | ✓ | +| `powerpc-unknown-linux-gnu` | ✗ | ✗ | +| `powerpc64-unknown-linux-gnu` | ✗ | ✗ | +| `powerpc64le-unknown-linux-gnu` | ✗ | ✗ | +| `s390x-unknown-linux-gnu` | ✓ | ✓* | +| `sparc64-unknown-linux-gnu` | ✓ | ✓* | +| `thumbv7neon-unknown-linux-gnueabihf` | ✓ | ✓ | +| **MacOSX targets:** | **build** | **run** | +| `x86_64-apple-darwin` | ✓ | ✓ | +| `i686-apple-darwin` | ✓ | ✓ | +| **Windows targets:** | **build** | **run** | +| `x86_64-pc-windows-msvc` | ✓ | ✓ | +| `i686-pc-windows-msvc` | ✓ | ✓ | +| `x86_64-pc-windows-gnu` | ✗ | ✗ | +| `i686-pc-windows-gnu` | ✗ | ✗ | +| **WebAssembly targets:** | **build** | **run** | +| `wasm32-unknown-unknown` | ✓ | ✓ | +| **Android targets:** | **build** | **run** | +| `x86_64-linux-android` | ✓ | ✓ | +| `arm-linux-androideabi` | ✓ | ✓ | +| `aarch64-linux-android` | ✓ | ✗ | +| `thumbv7neon-linux-androideabi` | ✓ | ✓ | +| **iOS targets:** | **build** | **run** | +| `i386-apple-ios` | ✓ | ✗ | +| `x86_64-apple-ios` | ✓ | ✗ | +| `armv7-apple-ios` | ✓ | ✗** | +| `aarch64-apple-ios` | ✓ | ✗** | +| **xBSD targets:** | **build** | **run** | +| `i686-unknown-freebsd` | ✗ | ✗** | +| `x86_64-unknown-freebsd` | ✗ | ✗** | +| `x86_64-unknown-netbsd` | ✗ | ✗** | +| **Solaris targets:** | **build** | **run** | +| `x86_64-sun-solaris` | ✗ | ✗** | + +[*] most of the test suite passes correctly on these platform but +there are correctness bugs open in the issue tracker. + +[**] it is currently not easily possible to run these platforms on CI. + +## Machine code verification + +The +[`verify/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/verify) +crate tests disassembles the portable packed vector APIs at run-time and +compares the generated machine code against the desired one to make sure that +this crate remains efficient. + +## License + +This project is licensed under either of + +* [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) + ([LICENSE-APACHE](LICENSE-APACHE)) + +* [MIT License](http://opensource.org/licenses/MIT) + ([LICENSE-MIT](LICENSE-MIT)) + +at your option. + +## Contributing + +We welcome all people who want to contribute. +Please see the [contributing instructions] for more information. + +Contributions in any form (issues, pull requests, etc.) to this project +must adhere to Rust's [Code of Conduct]. + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in `packed_simd` by you, as defined in the Apache-2.0 license, shall be +dual licensed as above, without any additional terms or conditions. + +[travis]: https://travis-ci.org/rust-lang-nursery/packed_simd +[Travis-CI Status]: https://travis-ci.org/rust-lang-nursery/packed_simd.svg?branch=master +[appveyor]: https://ci.appveyor.com/project/gnzlbg/packed-simd +[Appveyor Status]: https://ci.appveyor.com/api/projects/status/hd7v9dvr442hgdix?svg=true +[Latest Version]: https://img.shields.io/crates/v/packed_simd.svg +[crates.io]: https://crates.io/crates/packed_simd +[docs]: https://docs.rs/packed_simd/badge.svg +[docs.rs]: https://docs.rs/packed_simd/ +[master_docs]: https://rust-lang-nursery.github.io/packed_simd/packed_simd/ +[perf_guide]: https://rust-lang-nursery.github.io/packed_simd/perf-guide/ +[rfc2366]: https://github.com/rust-lang/rfcs/pull/2366 +[ISPC]: https://ispc.github.io/ +[rayon]: https://crates.io/crates/rayon +[boost_license]: https://www.boost.org/LICENSE_1_0.txt +[SLEEF]: https://sleef.org/ +[sleef_sys]: https://crates.io/crates/sleef-sys +[contributing instructions]: contributing.md +[Code of Conduct]: https://www.rust-lang.org/en-US/conduct.html diff --git a/third_party/rust/packed_simd/rustfmt.toml b/third_party/rust/packed_simd/rustfmt.toml new file mode 100644 index 000000000000..5b400a4ce440 --- /dev/null +++ b/third_party/rust/packed_simd/rustfmt.toml @@ -0,0 +1,7 @@ +max_width = 79 +use_small_heuristics = "Max" +wrap_comments = true +comment_width = 79 +fn_args_density = "Compressed" +edition = "2018" +error_on_line_overflow = true \ No newline at end of file diff --git a/third_party/rust/packed_simd/src/api.rs b/third_party/rust/packed_simd/src/api.rs new file mode 100644 index 000000000000..9959a052ae96 --- /dev/null +++ b/third_party/rust/packed_simd/src/api.rs @@ -0,0 +1,301 @@ +//! Implements the Simd<[T; N]> APIs + +crate mod cast; +#[macro_use] +mod cmp; +#[macro_use] +mod default; +#[macro_use] +mod fmt; +#[macro_use] +mod from; +#[macro_use] +mod hash; +#[macro_use] +mod math; +#[macro_use] +mod minimal; +#[macro_use] +mod ops; +#[macro_use] +mod ptr; +#[macro_use] +mod reductions; +#[macro_use] +mod select; +#[macro_use] +mod shuffle; +#[macro_use] +mod shuffle1_dyn; +#[macro_use] +mod slice; +#[macro_use] +mod swap_bytes; +#[macro_use] +mod bit_manip; + +#[cfg(feature = "into_bits")] +crate mod into_bits; + +macro_rules! impl_i { + ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident + | $ielem_ty:ident | $test_tt:tt | $($elem_ids:ident),* + | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { + impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt + | $($elem_ids),* | $(#[$doc])*); + impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_ops_vector_bitwise!( + [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0) + ); + impl_ops_scalar_bitwise!( + [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0) + ); + impl_ops_vector_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_ops_scalar_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_ops_vector_rotates!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_ops_vector_neg!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_ops_vector_int_min_max!( + [$elem_ty; $elem_n]: $tuple_id | $test_tt + ); + impl_reduction_integer_arithmetic!( + [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt + ); + impl_reduction_min_max!( + [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt + ); + impl_reduction_bitwise!( + [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt + | (|x|{ x as $elem_ty }) | (!(0 as $elem_ty), 0) + ); + impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_fmt_lower_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_fmt_upper_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_fmt_octal!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_fmt_binary!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 1)); + impl_from_vectors!( + [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),* + ); + impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_hash!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_swap_bytes!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_bit_manip!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_cmp_partial_eq!( + [$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1) + ); + impl_cmp_eq!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1)); + impl_cmp_vertical!( + [$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1, 0) | $test_tt + ); + impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_cmp_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1)); + + test_select!($elem_ty, $mask_ty, $tuple_id, (1, 2) | $test_tt); + test_cmp_partial_ord_int!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + } +} + +macro_rules! impl_u { + ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident + | $ielem_ty:ident | $test_tt:tt | $($elem_ids:ident),* + | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { + impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt + | $($elem_ids),* | $(#[$doc])*); + impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_ops_vector_bitwise!( + [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0) + ); + impl_ops_scalar_bitwise!( + [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0) + ); + impl_ops_vector_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_ops_scalar_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_ops_vector_rotates!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_ops_vector_int_min_max!( + [$elem_ty; $elem_n]: $tuple_id | $test_tt + ); + impl_reduction_integer_arithmetic!( + [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt + ); + impl_reduction_min_max!( + [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt + ); + impl_reduction_bitwise!( + [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt + | (|x|{ x as $elem_ty }) | (!(0 as $elem_ty), 0) + ); + impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_fmt_lower_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_fmt_upper_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_fmt_octal!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_fmt_binary!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 1)); + impl_from_vectors!( + [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),* + ); + impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_hash!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_swap_bytes!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_bit_manip!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_cmp_partial_eq!( + [$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 0) + ); + impl_cmp_eq!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1)); + impl_cmp_vertical!( + [$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1, 0) | $test_tt + ); + impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_cmp_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1)); + + test_select!($elem_ty, $mask_ty, $tuple_id, (1, 2) | $test_tt); + test_cmp_partial_ord_int!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + } +} + +macro_rules! impl_f { + ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident + | $ielem_ty:ident | $test_tt:tt | $($elem_ids:ident),* + | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { + impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt + | $($elem_ids),* | $(#[$doc])*); + impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_ops_vector_neg!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_ops_vector_float_min_max!( + [$elem_ty; $elem_n]: $tuple_id | $test_tt + ); + impl_reduction_float_arithmetic!( + [$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_reduction_min_max!( + [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt + ); + impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1., 1.)); + impl_from_vectors!( + [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),* + ); + impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_cmp_partial_eq!( + [$elem_ty; $elem_n]: $tuple_id | $test_tt | (1., 0.) + ); + impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + + impl_float_consts!([$elem_ty; $elem_n]: $tuple_id); + impl_float_category!([$elem_ty; $elem_n]: $tuple_id, $mask_ty); + + // floating-point math + impl_math_float_abs!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_math_float_cos!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_math_float_exp!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_math_float_ln!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_math_float_mul_add!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_math_float_mul_adde!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_math_float_powf!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_math_float_recpre!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_math_float_rsqrte!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_math_float_sin!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_math_float_sqrt!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_math_float_sqrte!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_cmp_vertical!( + [$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1., 0.) + | $test_tt + ); + + test_select!($elem_ty, $mask_ty, $tuple_id, (1., 2.) | $test_tt); + test_reduction_float_min_max!( + [$elem_ty; $elem_n]: $tuple_id | $test_tt + ); + test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + } +} + +macro_rules! impl_m { + ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident | $ielem_ty:ident + | $test_tt:tt | $($elem_ids:ident),* | From: $($from_vec_ty:ident),* + | $(#[$doc:meta])*) => { + impl_minimal_mask!( + [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt + | $($elem_ids),* | $(#[$doc])* + ); + impl_ops_vector_mask_bitwise!( + [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false) + ); + impl_ops_scalar_mask_bitwise!( + [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false) + ); + impl_reduction_bitwise!( + [bool; $elem_n]: $tuple_id | $ielem_ty | $test_tt + | (|x|{ x != 0 }) | (true, false) + ); + impl_reduction_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_fmt_debug!([bool; $elem_n]: $tuple_id | $test_tt); + impl_from_array!( + [$elem_ty; $elem_n]: $tuple_id | $test_tt + | (crate::$elem_ty::new(true), true) + ); + impl_from_vectors!( + [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),* + ); + impl_default!([bool; $elem_n]: $tuple_id | $test_tt); + impl_cmp_partial_eq!( + [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false) + ); + impl_cmp_eq!( + [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false) + ); + impl_cmp_vertical!( + [$elem_ty; $elem_n]: $tuple_id, $tuple_id, true, (true, false) + | $test_tt + ); + impl_select!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_cmp_ord!( + [$elem_ty; $elem_n]: $tuple_id | $test_tt | (false, true) + ); + impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + + test_cmp_partial_ord_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + test_shuffle1_dyn_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + } +} + +macro_rules! impl_const_p { + ([$elem_ty:ty; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident, + $usize_ty:ident, $isize_ty:ident + | $test_tt:tt | $($elem_ids:ident),* + | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { + impl_minimal_p!( + [$elem_ty; $elem_n]: $tuple_id, $mask_ty, $usize_ty, $isize_ty + | ref_ | $test_tt | $($elem_ids),* + | (1 as $elem_ty, 0 as $elem_ty) | $(#[$doc])* + ); + impl_ptr_read!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt); + } +} + +macro_rules! impl_mut_p { + ([$elem_ty:ty; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident, + $usize_ty:ident, $isize_ty:ident + | $test_tt:tt | $($elem_ids:ident),* + | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { + impl_minimal_p!( + [$elem_ty; $elem_n]: $tuple_id, $mask_ty, $usize_ty, $isize_ty + | ref_mut_ | $test_tt | $($elem_ids),* + | (1 as $elem_ty, 0 as $elem_ty) | $(#[$doc])* + ); + impl_ptr_read!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt); + impl_ptr_write!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt); + } +} diff --git a/third_party/rust/packed_simd/src/api/bit_manip.rs b/third_party/rust/packed_simd/src/api/bit_manip.rs new file mode 100644 index 000000000000..3d3c4eb8850a --- /dev/null +++ b/third_party/rust/packed_simd/src/api/bit_manip.rs @@ -0,0 +1,128 @@ +//! Bit manipulations. + +macro_rules! impl_bit_manip { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Returns the number of ones in the binary representation of + /// the lanes of `self`. + #[inline] + pub fn count_ones(self) -> Self { + super::codegen::bit_manip::BitManip::ctpop(self) + } + + /// Returns the number of zeros in the binary representation of + /// the lanes of `self`. + #[inline] + pub fn count_zeros(self) -> Self { + super::codegen::bit_manip::BitManip::ctpop(!self) + } + + /// Returns the number of leading zeros in the binary + /// representation of the lanes of `self`. + #[inline] + pub fn leading_zeros(self) -> Self { + super::codegen::bit_manip::BitManip::ctlz(self) + } + + /// Returns the number of trailing zeros in the binary + /// representation of the lanes of `self`. + #[inline] + pub fn trailing_zeros(self) -> Self { + super::codegen::bit_manip::BitManip::cttz(self) + } + } + + test_if! { + $test_tt: + paste::item_with_macros! { + #[allow(overflowing_literals)] + pub mod [<$id _bit_manip>] { + use super::*; + + const LANE_WIDTH: usize = mem::size_of::<$elem_ty>() * 8; + + macro_rules! test_func { + ($x:expr, $func:ident) => {{ + let mut actual = $x; + for i in 0..$id::lanes() { + actual = actual.replace( + i, + $x.extract(i).$func() as $elem_ty + ); + } + let expected = $x.$func(); + assert_eq!(actual, expected); + }}; + } + + const BYTES: [u8; 64] = [ + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + ]; + + fn load_bytes() -> $id { + let elems: &mut [$elem_ty] = unsafe { + slice::from_raw_parts_mut( + BYTES.as_mut_ptr() as *mut $elem_ty, + $id::lanes(), + ) + }; + $id::from_slice_unaligned(elems) + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn count_ones() { + test_func!($id::splat(0), count_ones); + test_func!($id::splat(!0), count_ones); + test_func!(load_bytes(), count_ones); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn count_zeros() { + test_func!($id::splat(0), count_zeros); + test_func!($id::splat(!0), count_zeros); + test_func!(load_bytes(), count_zeros); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn leading_zeros() { + test_func!($id::splat(0), leading_zeros); + test_func!($id::splat(1), leading_zeros); + // some implementations use `pshufb` which has unique + // behavior when the 8th bit is set. + test_func!($id::splat(0b1000_0010), leading_zeros); + test_func!($id::splat(!0), leading_zeros); + test_func!( + $id::splat(1 << (LANE_WIDTH - 1)), + leading_zeros + ); + test_func!(load_bytes(), leading_zeros); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn trailing_zeros() { + test_func!($id::splat(0), trailing_zeros); + test_func!($id::splat(1), trailing_zeros); + test_func!($id::splat(0b1000_0010), trailing_zeros); + test_func!($id::splat(!0), trailing_zeros); + test_func!( + $id::splat(1 << (LANE_WIDTH - 1)), + trailing_zeros + ); + test_func!(load_bytes(), trailing_zeros); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/cast.rs b/third_party/rust/packed_simd/src/api/cast.rs new file mode 100644 index 000000000000..f1c32ca1a38b --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cast.rs @@ -0,0 +1,108 @@ +//! Implementation of `FromCast` and `IntoCast`. +#![allow(clippy::module_name_repetitions)] + +/// Numeric cast from `T` to `Self`. +/// +/// > Note: This is a temporary workaround until the conversion traits +/// specified > in [RFC2484] are implemented. +/// +/// Numeric cast between vectors with the same number of lanes, such that: +/// +/// * casting integer vectors whose lane types have the same size (e.g. `i32xN` +/// -> `u32xN`) is a **no-op**, +/// +/// * casting from a larger integer to a smaller integer (e.g. `u32xN` -> +/// `u8xN`) will **truncate**, +/// +/// * casting from a smaller integer to a larger integer (e.g. `u8xN` -> +/// `u32xN`) will: +/// * **zero-extend** if the source is unsigned, or +/// * **sign-extend** if the source is signed, +/// +/// * casting from a float to an integer will **round the float towards zero**, +/// +/// * casting from an integer to float will produce the floating point +/// representation of the integer, **rounding to nearest, ties to even**, +/// +/// * casting from an `f32` to an `f64` is perfect and lossless, +/// +/// * casting from an `f64` to an `f32` **rounds to nearest, ties to even**. +/// +/// [RFC2484]: https://github.com/rust-lang/rfcs/pull/2484 +pub trait FromCast: crate::marker::Sized { + /// Numeric cast from `T` to `Self`. + fn from_cast(_: T) -> Self; +} + +/// Numeric cast from `Self` to `T`. +/// +/// > Note: This is a temporary workaround until the conversion traits +/// specified > in [RFC2484] are implemented. +/// +/// Numeric cast between vectors with the same number of lanes, such that: +/// +/// * casting integer vectors whose lane types have the same size (e.g. `i32xN` +/// -> `u32xN`) is a **no-op**, +/// +/// * casting from a larger integer to a smaller integer (e.g. `u32xN` -> +/// `u8xN`) will **truncate**, +/// +/// * casting from a smaller integer to a larger integer (e.g. `u8xN` -> +/// `u32xN`) will: +/// * **zero-extend** if the source is unsigned, or +/// * **sign-extend** if the source is signed, +/// +/// * casting from a float to an integer will **round the float towards zero**, +/// +/// * casting from an integer to float will produce the floating point +/// representation of the integer, **rounding to nearest, ties to even**, +/// +/// * casting from an `f32` to an `f64` is perfect and lossless, +/// +/// * casting from an `f64` to an `f32` **rounds to nearest, ties to even**. +/// +/// [RFC2484]: https://github.com/rust-lang/rfcs/pull/2484 +pub trait Cast: crate::marker::Sized { + /// Numeric cast from `self` to `T`. + fn cast(self) -> T; +} + +/// `FromCast` implies `Cast`. +impl Cast for T +where + U: FromCast, +{ + #[inline] + fn cast(self) -> U { + U::from_cast(self) + } +} + +/// `FromCast` and `Cast` are reflexive +impl FromCast for T { + #[inline] + fn from_cast(t: Self) -> Self { + t + } +} + +#[macro_use] +mod macros; + +mod v16; +pub use self::v16::*; + +mod v32; +pub use self::v32::*; + +mod v64; +pub use self::v64::*; + +mod v128; +pub use self::v128::*; + +mod v256; +pub use self::v256::*; + +mod v512; +pub use self::v512::*; diff --git a/third_party/rust/packed_simd/src/api/cast/macros.rs b/third_party/rust/packed_simd/src/api/cast/macros.rs new file mode 100644 index 000000000000..3bb29f0b80b7 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cast/macros.rs @@ -0,0 +1,82 @@ +//! Macros implementing `FromCast` + +macro_rules! impl_from_cast_ { + ($id:ident[$test_tt:tt]: $from_ty:ident) => { + impl crate::api::cast::FromCast<$from_ty> for $id { + #[inline] + fn from_cast(x: $from_ty) -> Self { + use crate::llvm::simd_cast; + debug_assert_eq!($from_ty::lanes(), $id::lanes()); + Simd(unsafe { simd_cast(x.0) }) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _from_cast_ $from_ty>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test() { + assert_eq!($id::lanes(), $from_ty::lanes()); + } + } + } + } + }; +} + +macro_rules! impl_from_cast { + ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { + $( + impl_from_cast_!($id[$test_tt]: $from_ty); + )* + } +} + +macro_rules! impl_from_cast_mask_ { + ($id:ident[$test_tt:tt]: $from_ty:ident) => { + impl crate::api::cast::FromCast<$from_ty> for $id { + #[inline] + fn from_cast(x: $from_ty) -> Self { + debug_assert_eq!($from_ty::lanes(), $id::lanes()); + x.ne($from_ty::default()) + .select($id::splat(true), $id::splat(false)) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _from_cast_ $from_ty>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test() { + assert_eq!($id::lanes(), $from_ty::lanes()); + + let x = $from_ty::default(); + let m: $id = x.cast(); + assert!(m.none()); + } + } + } + } + }; +} + +macro_rules! impl_from_cast_mask { + ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { + $( + impl_from_cast_mask_!($id[$test_tt]: $from_ty); + )* + } +} + +#[allow(unused)] +macro_rules! impl_into_cast { + ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { + $( + impl_from_cast_!($from_ty[$test_tt]: $id); + )* + } +} diff --git a/third_party/rust/packed_simd/src/api/cast/v128.rs b/third_party/rust/packed_simd/src/api/cast/v128.rs new file mode 100644 index 000000000000..78c07f3a5597 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cast/v128.rs @@ -0,0 +1,79 @@ +//! `FromCast` and `IntoCast` implementations for portable 128-bit wide vectors +#![rustfmt::skip] + +use crate::*; + +impl_from_cast!( + i8x16[test_v128]: u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16 +); +impl_from_cast!( + u8x16[test_v128]: i8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16 +); +impl_from_cast_mask!( + m8x16[test_v128]: i8x16, u8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16 +); + +impl_from_cast!( + i16x8[test_v128]: i8x8, u8x8, m8x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, + i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 +); +impl_from_cast!( + u16x8[test_v128]: i8x8, u8x8, m8x8, i16x8, m16x8, i32x8, u32x8, f32x8, m32x8, + i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 +); +impl_from_cast_mask!( + m16x8[test_v128]: i8x8, u8x8, m8x8, i16x8, u16x8, i32x8, u32x8, f32x8, m32x8, + i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 +); + +impl_from_cast!( + i32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); +impl_from_cast!( + u32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); +impl_from_cast!( + f32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); +impl_from_cast_mask!( + m32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); + +impl_from_cast!( + i64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); +impl_from_cast!( + u64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); +impl_from_cast!( + f64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); +impl_from_cast_mask!( + m64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); + +impl_from_cast!( + isizex2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, usizex2, msizex2 +); +impl_from_cast!( + usizex2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, msizex2 +); +impl_from_cast_mask!( + msizex2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2 +); + +// FIXME[test_v128]: 64-bit single element vectors into_cast impls +impl_from_cast!(i128x1[test_v128]: u128x1, m128x1); +impl_from_cast!(u128x1[test_v128]: i128x1, m128x1); +impl_from_cast!(m128x1[test_v128]: i128x1, u128x1); diff --git a/third_party/rust/packed_simd/src/api/cast/v16.rs b/third_party/rust/packed_simd/src/api/cast/v16.rs new file mode 100644 index 000000000000..d292936baa41 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cast/v16.rs @@ -0,0 +1,17 @@ +//! `FromCast` and `IntoCast` implementations for portable 16-bit wide vectors +#![rustfmt::skip] + +use crate::*; + +impl_from_cast!( + i8x2[test_v16]: u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); +impl_from_cast!( + u8x2[test_v16]: i8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); +impl_from_cast_mask!( + m8x2[test_v16]: i8x2, u8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); diff --git a/third_party/rust/packed_simd/src/api/cast/v256.rs b/third_party/rust/packed_simd/src/api/cast/v256.rs new file mode 100644 index 000000000000..0a669e0beebe --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cast/v256.rs @@ -0,0 +1,81 @@ +//! `FromCast` and `IntoCast` implementations for portable 256-bit wide vectors +#![rustfmt::skip] + +use crate::*; + +impl_from_cast!(i8x32[test_v256]: u8x32, m8x32, i16x32, u16x32, m16x32); +impl_from_cast!(u8x32[test_v256]: i8x32, m8x32, i16x32, u16x32, m16x32); +impl_from_cast_mask!(m8x32[test_v256]: i8x32, u8x32, i16x32, u16x32, m16x32); + +impl_from_cast!( + i16x16[test_v256]: i8x16, u8x16, m8x16, u16x16, m16x16, + i32x16, u32x16, f32x16, m32x16 +); +impl_from_cast!( + u16x16[test_v256]: i8x16, u8x16, m8x16, i16x16, m16x16, + i32x16, u32x16, f32x16, m32x16 +); +impl_from_cast_mask!( + m16x16[test_v256]: i8x16, u8x16, m8x16, i16x16, u16x16, + i32x16, u32x16, f32x16, m32x16 +); + +impl_from_cast!( + i32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, u32x8, f32x8, m32x8, + i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 +); +impl_from_cast!( + u32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, f32x8, m32x8, + i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 +); +impl_from_cast!( + f32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, m32x8, + i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 +); +impl_from_cast_mask!( + m32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, + i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 +); + +impl_from_cast!( + i64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); +impl_from_cast!( + u64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); +impl_from_cast!( + f64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); +impl_from_cast_mask!( + m64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); + +impl_from_cast!( + i128x2[test_v256]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); +impl_from_cast!( + u128x2[test_v256]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, m128x2, isizex2, usizex2, msizex2 +); +impl_from_cast_mask!( + m128x2[test_v256]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, m64x2, f64x2, i128x2, u128x2, isizex2, usizex2, msizex2 +); + +impl_from_cast!( + isizex4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, usizex4, msizex4 +); +impl_from_cast!( + usizex4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, msizex4 +); +impl_from_cast_mask!( + msizex4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4 +); diff --git a/third_party/rust/packed_simd/src/api/cast/v32.rs b/third_party/rust/packed_simd/src/api/cast/v32.rs new file mode 100644 index 000000000000..65050cdacb4e --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cast/v32.rs @@ -0,0 +1,30 @@ +//! `FromCast` and `IntoCast` implementations for portable 32-bit wide vectors +#![rustfmt::skip] + +use crate::*; + +impl_from_cast!( + i8x4[test_v32]: u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); +impl_from_cast!( + u8x4[test_v32]: i8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); +impl_from_cast_mask!( + m8x4[test_v32]: i8x4, u8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); + +impl_from_cast!( + i16x2[test_v32]: i8x2, u8x2, m8x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); +impl_from_cast!( + u16x2[test_v32]: i8x2, u8x2, m8x2, i16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); +impl_from_cast_mask!( + m16x2[test_v32]: i8x2, u8x2, m8x2, i16x2, u16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); diff --git a/third_party/rust/packed_simd/src/api/cast/v512.rs b/third_party/rust/packed_simd/src/api/cast/v512.rs new file mode 100644 index 000000000000..9ae1caed35e2 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cast/v512.rs @@ -0,0 +1,68 @@ +//! `FromCast` and `IntoCast` implementations for portable 512-bit wide vectors +#![rustfmt::skip] + +use crate::*; + +impl_from_cast!(i8x64[test_v512]: u8x64, m8x64); +impl_from_cast!(u8x64[test_v512]: i8x64, m8x64); +impl_from_cast_mask!(m8x64[test_v512]: i8x64, u8x64); + +impl_from_cast!(i16x32[test_v512]: i8x32, u8x32, m8x32, u16x32, m16x32); +impl_from_cast!(u16x32[test_v512]: i8x32, u8x32, m8x32, i16x32, m16x32); +impl_from_cast_mask!(m16x32[test_v512]: i8x32, u8x32, m8x32, i16x32, u16x32); + +impl_from_cast!( + i32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, u32x16, f32x16, m32x16 +); +impl_from_cast!( + u32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, f32x16, m32x16 +); +impl_from_cast!( + f32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, m32x16 +); +impl_from_cast_mask!( + m32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16 +); + +impl_from_cast!( + i64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, + u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 +); +impl_from_cast!( + u64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, + i64x8, f64x8, m64x8, isizex8, usizex8, msizex8 +); +impl_from_cast!( + f64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, + i64x8, u64x8, m64x8, isizex8, usizex8, msizex8 +); +impl_from_cast_mask!( + m64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, + i64x8, u64x8, f64x8, isizex8, usizex8, msizex8 +); + +impl_from_cast!( + i128x4[test_v512]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); +impl_from_cast!( + u128x4[test_v512]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, m128x4, isizex4, usizex4, msizex4 +); +impl_from_cast_mask!( + m128x4[test_v512]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, m64x4, f64x4, i128x4, u128x4, isizex4, usizex4, msizex4 +); + +impl_from_cast!( + isizex8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, + i64x8, u64x8, f64x8, m64x8, usizex8, msizex8 +); +impl_from_cast!( + usizex8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, + i64x8, u64x8, f64x8, m64x8, isizex8, msizex8 +); +impl_from_cast_mask!( + msizex8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, + i64x8, u64x8, f64x8, m64x8, isizex8, usizex8 +); diff --git a/third_party/rust/packed_simd/src/api/cast/v64.rs b/third_party/rust/packed_simd/src/api/cast/v64.rs new file mode 100644 index 000000000000..0e2f78f7335b --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cast/v64.rs @@ -0,0 +1,47 @@ +//! `FromCast` and `IntoCast` implementations for portable 64-bit wide vectors +#![rustfmt::skip] + +use crate::*; + +impl_from_cast!( + i8x8[test_v64]: u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, + i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 +); +impl_from_cast!( + u8x8[test_v64]: i8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, + i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 +); +impl_from_cast_mask!( + m8x8[test_v64]: i8x8, u8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, + i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 +); + +impl_from_cast!( + i16x4[test_v64]: i8x4, u8x4, m8x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); +impl_from_cast!( + u16x4[test_v64]: i8x4, u8x4, m8x4, i16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); +impl_from_cast_mask!( + m16x4[test_v64]: i8x4, u8x4, m8x4, i16x4, u16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); + +impl_from_cast!( + i32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); +impl_from_cast!( + u32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); +impl_from_cast!( + f32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); +impl_from_cast_mask!( + m32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); diff --git a/third_party/rust/packed_simd/src/api/cmp.rs b/third_party/rust/packed_simd/src/api/cmp.rs new file mode 100644 index 000000000000..6d5301ddddbd --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cmp.rs @@ -0,0 +1,16 @@ +//! Implement cmp traits for vector types + +#[macro_use] +mod partial_eq; + +#[macro_use] +mod eq; + +#[macro_use] +mod partial_ord; + +#[macro_use] +mod ord; + +#[macro_use] +mod vertical; diff --git a/third_party/rust/packed_simd/src/api/cmp/eq.rs b/third_party/rust/packed_simd/src/api/cmp/eq.rs new file mode 100644 index 000000000000..3c55d0dce57e --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cmp/eq.rs @@ -0,0 +1,27 @@ +//! Implements `Eq` for vector types. + +macro_rules! impl_cmp_eq { + ( + [$elem_ty:ident; $elem_count:expr]: + $id:ident | $test_tt:tt | + ($true:expr, $false:expr) + ) => { + impl crate::cmp::Eq for $id {} + impl crate::cmp::Eq for LexicographicallyOrdered<$id> {} + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _cmp_eq>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn eq() { + fn foo(_: E) {} + let a = $id::splat($false); + foo(a); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/cmp/ord.rs b/third_party/rust/packed_simd/src/api/cmp/ord.rs new file mode 100644 index 000000000000..e54ba3bfde9a --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cmp/ord.rs @@ -0,0 +1,43 @@ +//! Implements `Ord` for vector types. + +macro_rules! impl_cmp_ord { + ( + [$elem_ty:ident; $elem_count:expr]: + $id:ident | $test_tt:tt | + ($true:expr, $false:expr) + ) => { + impl $id { + /// Returns a wrapper that implements `Ord`. + #[inline] + pub fn lex_ord(&self) -> LexicographicallyOrdered<$id> { + LexicographicallyOrdered(*self) + } + } + + impl crate::cmp::Ord for LexicographicallyOrdered<$id> { + #[inline] + fn cmp(&self, other: &Self) -> crate::cmp::Ordering { + match self.partial_cmp(other) { + Some(x) => x, + None => unsafe { crate::hint::unreachable_unchecked() }, + } + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _cmp_ord>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn eq() { + fn foo(_: E) {} + let a = $id::splat($false); + foo(a.partial_lex_ord()); + foo(a.lex_ord()); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/cmp/partial_eq.rs b/third_party/rust/packed_simd/src/api/cmp/partial_eq.rs new file mode 100644 index 000000000000..1712a0de56cb --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cmp/partial_eq.rs @@ -0,0 +1,67 @@ +//! Implements `PartialEq` for vector types. + +macro_rules! impl_cmp_partial_eq { + ( + [$elem_ty:ident; $elem_count:expr]: + $id:ident | $test_tt:tt | + ($true:expr, $false:expr) + ) => { + // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892 + #[allow(clippy::partialeq_ne_impl)] + impl crate::cmp::PartialEq<$id> for $id { + #[inline] + fn eq(&self, other: &Self) -> bool { + $id::eq(*self, *other).all() + } + #[inline] + fn ne(&self, other: &Self) -> bool { + $id::ne(*self, *other).any() + } + } + + // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892 + #[allow(clippy::partialeq_ne_impl)] + impl crate::cmp::PartialEq> + for LexicographicallyOrdered<$id> + { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } + #[inline] + fn ne(&self, other: &Self) -> bool { + self.0 != other.0 + } + } + + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _cmp_PartialEq>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn partial_eq() { + let a = $id::splat($false); + let b = $id::splat($true); + + assert!(a != b); + assert!(!(a == b)); + assert!(a == a); + assert!(!(a != a)); + + if $id::lanes() > 1 { + let a = $id::splat($false).replace(0, $true); + let b = $id::splat($true); + + assert!(a != b); + assert!(!(a == b)); + assert!(a == a); + assert!(!(a != a)); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/cmp/partial_ord.rs b/third_party/rust/packed_simd/src/api/cmp/partial_ord.rs new file mode 100644 index 000000000000..a2292918bae1 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cmp/partial_ord.rs @@ -0,0 +1,234 @@ +//! Implements `PartialOrd` for vector types. +//! +//! This implements a lexicographical order. + +macro_rules! impl_cmp_partial_ord { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Returns a wrapper that implements `PartialOrd`. + #[inline] + pub fn partial_lex_ord(&self) -> LexicographicallyOrdered<$id> { + LexicographicallyOrdered(*self) + } + } + + impl crate::cmp::PartialOrd> + for LexicographicallyOrdered<$id> + { + #[inline] + fn partial_cmp( + &self, other: &Self, + ) -> Option { + if PartialEq::eq(self, other) { + Some(crate::cmp::Ordering::Equal) + } else if PartialOrd::lt(self, other) { + Some(crate::cmp::Ordering::Less) + } else if PartialOrd::gt(self, other) { + Some(crate::cmp::Ordering::Greater) + } else { + None + } + } + #[inline] + fn lt(&self, other: &Self) -> bool { + let m_lt = self.0.lt(other.0); + let m_eq = self.0.eq(other.0); + for i in 0..$id::lanes() { + if m_eq.extract(i) { + continue; + } + return m_lt.extract(i); + } + false + } + #[inline] + fn le(&self, other: &Self) -> bool { + self.lt(other) | PartialEq::eq(self, other) + } + #[inline] + fn ge(&self, other: &Self) -> bool { + self.gt(other) | PartialEq::eq(self, other) + } + #[inline] + fn gt(&self, other: &Self) -> bool { + let m_gt = self.0.gt(other.0); + let m_eq = self.0.eq(other.0); + for i in 0..$id::lanes() { + if m_eq.extract(i) { + continue; + } + return m_gt.extract(i); + } + false + } + } + }; +} + +macro_rules! test_cmp_partial_ord_int { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _cmp_PartialOrd>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn partial_lex_ord() { + use crate::testing::utils::{test_cmp}; + // constant values + let a = $id::splat(0); + let b = $id::splat(1); + + test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), + Some(crate::cmp::Ordering::Less)); + test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), + Some(crate::cmp::Ordering::Greater)); + test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), + Some(crate::cmp::Ordering::Equal)); + test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), + Some(crate::cmp::Ordering::Equal)); + + // variable values: a = [0, 1, 2, 3]; b = [3, 2, 1, 0] + let mut a = $id::splat(0); + let mut b = $id::splat(0); + for i in 0..$id::lanes() { + a = a.replace(i, i as $elem_ty); + b = b.replace(i, ($id::lanes() - i) as $elem_ty); + } + test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), + Some(crate::cmp::Ordering::Less)); + test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), + Some(crate::cmp::Ordering::Greater)); + test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), + Some(crate::cmp::Ordering::Equal)); + test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), + Some(crate::cmp::Ordering::Equal)); + + // variable values: a = [0, 1, 2, 3]; b = [0, 1, 2, 4] + let mut b = a; + b = b.replace( + $id::lanes() - 1, + a.extract($id::lanes() - 1) + 1 as $elem_ty + ); + test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), + Some(crate::cmp::Ordering::Less)); + test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), + Some(crate::cmp::Ordering::Greater)); + test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), + Some(crate::cmp::Ordering::Equal)); + test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), + Some(crate::cmp::Ordering::Equal)); + + if $id::lanes() > 2 { + // variable values a = [0, 1, 0, 0]; b = [0, 1, 2, 3] + let b = a; + let mut a = $id::splat(0); + a = a.replace(1, 1 as $elem_ty); + test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), + Some(crate::cmp::Ordering::Less)); + test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), + Some(crate::cmp::Ordering::Greater)); + test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), + Some(crate::cmp::Ordering::Equal)); + test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), + Some(crate::cmp::Ordering::Equal)); + + // variable values: a = [0, 1, 2, 3]; b = [0, 1, 3, 2] + let mut b = a; + b = b.replace( + 2, a.extract($id::lanes() - 1) + 1 as $elem_ty + ); + test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), + Some(crate::cmp::Ordering::Less)); + test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), + Some(crate::cmp::Ordering::Greater)); + test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), + Some(crate::cmp::Ordering::Equal)); + test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), + Some(crate::cmp::Ordering::Equal)); + } + } + } + } + } + }; +} + +macro_rules! test_cmp_partial_ord_mask { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _cmp_PartialOrd>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn partial_lex_ord() { + use crate::testing::utils::{test_cmp}; + use crate::cmp::Ordering; + + // constant values + let a = $id::splat(false); + let b = $id::splat(true); + + test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), + Some(Ordering::Less)); + test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), + Some(Ordering::Greater)); + test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), + Some(Ordering::Equal)); + test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), + Some(Ordering::Equal)); + + // variable values: + // a = [false, false, false, false]; + // b = [false, false, false, true] + let a = $id::splat(false); + let mut b = $id::splat(false); + b = b.replace($id::lanes() - 1, true); + test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), + Some(Ordering::Less)); + test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), + Some(Ordering::Greater)); + test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), + Some(Ordering::Equal)); + test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), + Some(Ordering::Equal)); + + // variable values: + // a = [true, true, true, false]; + // b = [true, true, true, true] + let mut a = $id::splat(true); + let b = $id::splat(true); + a = a.replace($id::lanes() - 1, false); + test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), + Some(Ordering::Less)); + test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), + Some(Ordering::Greater)); + test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), + Some(Ordering::Equal)); + test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), + Some(Ordering::Equal)); + + if $id::lanes() > 2 { + // variable values + // a = [false, true, false, false]; + // b = [false, true, true, true] + let mut a = $id::splat(false); + let mut b = $id::splat(true); + a = a.replace(1, true); + b = b.replace(0, false); + test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), + Some(Ordering::Less)); + test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), + Some(Ordering::Greater)); + test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), + Some(Ordering::Equal)); + test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), + Some(Ordering::Equal)); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/cmp/vertical.rs b/third_party/rust/packed_simd/src/api/cmp/vertical.rs new file mode 100644 index 000000000000..ea4a0d1a3467 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cmp/vertical.rs @@ -0,0 +1,114 @@ +//! Vertical (lane-wise) vector comparisons returning vector masks. + +macro_rules! impl_cmp_vertical { + ( + [$elem_ty:ident; $elem_count:expr]: + $id:ident, + $mask_ty:ident, + $is_mask:expr,($true:expr, $false:expr) | $test_tt:tt + ) => { + impl $id { + /// Lane-wise equality comparison. + #[inline] + pub fn eq(self, other: Self) -> $mask_ty { + use crate::llvm::simd_eq; + Simd(unsafe { simd_eq(self.0, other.0) }) + } + + /// Lane-wise inequality comparison. + #[inline] + pub fn ne(self, other: Self) -> $mask_ty { + use crate::llvm::simd_ne; + Simd(unsafe { simd_ne(self.0, other.0) }) + } + + /// Lane-wise less-than comparison. + #[inline] + pub fn lt(self, other: Self) -> $mask_ty { + use crate::llvm::{simd_gt, simd_lt}; + if $is_mask { + Simd(unsafe { simd_gt(self.0, other.0) }) + } else { + Simd(unsafe { simd_lt(self.0, other.0) }) + } + } + + /// Lane-wise less-than-or-equals comparison. + #[inline] + pub fn le(self, other: Self) -> $mask_ty { + use crate::llvm::{simd_ge, simd_le}; + if $is_mask { + Simd(unsafe { simd_ge(self.0, other.0) }) + } else { + Simd(unsafe { simd_le(self.0, other.0) }) + } + } + + /// Lane-wise greater-than comparison. + #[inline] + pub fn gt(self, other: Self) -> $mask_ty { + use crate::llvm::{simd_gt, simd_lt}; + if $is_mask { + Simd(unsafe { simd_lt(self.0, other.0) }) + } else { + Simd(unsafe { simd_gt(self.0, other.0) }) + } + } + + /// Lane-wise greater-than-or-equals comparison. + #[inline] + pub fn ge(self, other: Self) -> $mask_ty { + use crate::llvm::{simd_ge, simd_le}; + if $is_mask { + Simd(unsafe { simd_le(self.0, other.0) }) + } else { + Simd(unsafe { simd_ge(self.0, other.0) }) + } + } + } + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _cmp_vertical>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn cmp() { + let a = $id::splat($false); + let b = $id::splat($true); + + let r = a.lt(b); + let e = $mask_ty::splat(true); + assert!(r == e); + let r = a.le(b); + assert!(r == e); + + let e = $mask_ty::splat(false); + let r = a.gt(b); + assert!(r == e); + let r = a.ge(b); + assert!(r == e); + let r = a.eq(b); + assert!(r == e); + + let mut a = a; + let mut b = b; + let mut e = e; + for i in 0..$id::lanes() { + if i % 2 == 0 { + a = a.replace(i, $false); + b = b.replace(i, $true); + e = e.replace(i, true); + } else { + a = a.replace(i, $true); + b = b.replace(i, $false); + e = e.replace(i, false); + } + } + let r = a.lt(b); + assert!(r == e); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/default.rs b/third_party/rust/packed_simd/src/api/default.rs new file mode 100644 index 000000000000..843d51bcc4bb --- /dev/null +++ b/third_party/rust/packed_simd/src/api/default.rs @@ -0,0 +1,28 @@ +//! Implements `Default` for vector types. + +macro_rules! impl_default { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl Default for $id { + #[inline] + fn default() -> Self { + Self::splat($elem_ty::default()) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _default>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn default() { + let a = $id::default(); + for i in 0..$id::lanes() { + assert_eq!(a.extract(i), $elem_ty::default()); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/fmt.rs b/third_party/rust/packed_simd/src/api/fmt.rs new file mode 100644 index 000000000000..f3f55c401548 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/fmt.rs @@ -0,0 +1,12 @@ +//! Implements formatting APIs + +#[macro_use] +mod debug; +#[macro_use] +mod lower_hex; +#[macro_use] +mod upper_hex; +#[macro_use] +mod octal; +#[macro_use] +mod binary; diff --git a/third_party/rust/packed_simd/src/api/fmt/binary.rs b/third_party/rust/packed_simd/src/api/fmt/binary.rs new file mode 100644 index 000000000000..b60769082d51 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/fmt/binary.rs @@ -0,0 +1,56 @@ +//! Implement Octal formatting + +macro_rules! impl_fmt_binary { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl crate::fmt::Binary for $id { + #[allow(clippy::missing_inline_in_public_items)] + fn fmt( + &self, f: &mut crate::fmt::Formatter<'_>, + ) -> crate::fmt::Result { + write!(f, "{}(", stringify!($id))?; + for i in 0..$elem_count { + if i > 0 { + write!(f, ", ")?; + } + self.extract(i).fmt(f)?; + } + write!(f, ")") + } + } + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _fmt_binary>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn binary() { + use arrayvec::{ArrayString,ArrayVec}; + type TinyString = ArrayString<[u8; 512]>; + + use crate::fmt::Write; + let v = $id::splat($elem_ty::default()); + let mut s = TinyString::new(); + write!(&mut s, "{:#b}", v).unwrap(); + + let mut beg = TinyString::new(); + write!(&mut beg, "{}(", stringify!($id)).unwrap(); + assert!(s.starts_with(beg.as_str())); + assert!(s.ends_with(")")); + let s: ArrayVec<[TinyString; 64]> + = s.replace(beg.as_str(), "") + .replace(")", "").split(",") + .map(|v| TinyString::from(v.trim()).unwrap()) + .collect(); + assert_eq!(s.len(), $id::lanes()); + for (index, ss) in s.into_iter().enumerate() { + let mut e = TinyString::new(); + write!(&mut e, "{:#b}", v.extract(index)).unwrap(); + assert_eq!(ss, e); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/fmt/debug.rs b/third_party/rust/packed_simd/src/api/fmt/debug.rs new file mode 100644 index 000000000000..ad0b8a59a1f0 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/fmt/debug.rs @@ -0,0 +1,62 @@ +//! Implement debug formatting + +macro_rules! impl_fmt_debug_tests { + ([$elem_ty:ty; $elem_count:expr]: $id:ident | $test_tt:tt) => { + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _fmt_debug>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn debug() { + use arrayvec::{ArrayString,ArrayVec}; + type TinyString = ArrayString<[u8; 512]>; + + use crate::fmt::Write; + let v = $id::default(); + let mut s = TinyString::new(); + write!(&mut s, "{:?}", v).unwrap(); + + let mut beg = TinyString::new(); + write!(&mut beg, "{}(", stringify!($id)).unwrap(); + assert!(s.starts_with(beg.as_str())); + assert!(s.ends_with(")")); + let s: ArrayVec<[TinyString; 64]> + = s.replace(beg.as_str(), "") + .replace(")", "").split(",") + .map(|v| TinyString::from(v.trim()).unwrap()) + .collect(); + assert_eq!(s.len(), $id::lanes()); + for (index, ss) in s.into_iter().enumerate() { + let mut e = TinyString::new(); + write!(&mut e, "{:?}", v.extract(index)).unwrap(); + assert_eq!(ss, e); + } + } + } + } + } + }; +} + +macro_rules! impl_fmt_debug { + ([$elem_ty:ty; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl crate::fmt::Debug for $id { + #[allow(clippy::missing_inline_in_public_items)] + fn fmt( + &self, f: &mut crate::fmt::Formatter<'_>, + ) -> crate::fmt::Result { + write!(f, "{}(", stringify!($id))?; + for i in 0..$elem_count { + if i > 0 { + write!(f, ", ")?; + } + self.extract(i).fmt(f)?; + } + write!(f, ")") + } + } + impl_fmt_debug_tests!([$elem_ty; $elem_count]: $id | $test_tt); + }; +} diff --git a/third_party/rust/packed_simd/src/api/fmt/lower_hex.rs b/third_party/rust/packed_simd/src/api/fmt/lower_hex.rs new file mode 100644 index 000000000000..5a7aa14b5b8a --- /dev/null +++ b/third_party/rust/packed_simd/src/api/fmt/lower_hex.rs @@ -0,0 +1,56 @@ +//! Implement `LowerHex` formatting + +macro_rules! impl_fmt_lower_hex { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl crate::fmt::LowerHex for $id { + #[allow(clippy::missing_inline_in_public_items)] + fn fmt( + &self, f: &mut crate::fmt::Formatter<'_>, + ) -> crate::fmt::Result { + write!(f, "{}(", stringify!($id))?; + for i in 0..$elem_count { + if i > 0 { + write!(f, ", ")?; + } + self.extract(i).fmt(f)?; + } + write!(f, ")") + } + } + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _fmt_lower_hex>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn lower_hex() { + use arrayvec::{ArrayString,ArrayVec}; + type TinyString = ArrayString<[u8; 512]>; + + use crate::fmt::Write; + let v = $id::splat($elem_ty::default()); + let mut s = TinyString::new(); + write!(&mut s, "{:#x}", v).unwrap(); + + let mut beg = TinyString::new(); + write!(&mut beg, "{}(", stringify!($id)).unwrap(); + assert!(s.starts_with(beg.as_str())); + assert!(s.ends_with(")")); + let s: ArrayVec<[TinyString; 64]> + = s.replace(beg.as_str(), "").replace(")", "") + .split(",") + .map(|v| TinyString::from(v.trim()).unwrap()) + .collect(); + assert_eq!(s.len(), $id::lanes()); + for (index, ss) in s.into_iter().enumerate() { + let mut e = TinyString::new(); + write!(&mut e, "{:#x}", v.extract(index)).unwrap(); + assert_eq!(ss, e); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/fmt/octal.rs b/third_party/rust/packed_simd/src/api/fmt/octal.rs new file mode 100644 index 000000000000..83ac8abc7dae --- /dev/null +++ b/third_party/rust/packed_simd/src/api/fmt/octal.rs @@ -0,0 +1,56 @@ +//! Implement Octal formatting + +macro_rules! impl_fmt_octal { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl crate::fmt::Octal for $id { + #[allow(clippy::missing_inline_in_public_items)] + fn fmt( + &self, f: &mut crate::fmt::Formatter<'_>, + ) -> crate::fmt::Result { + write!(f, "{}(", stringify!($id))?; + for i in 0..$elem_count { + if i > 0 { + write!(f, ", ")?; + } + self.extract(i).fmt(f)?; + } + write!(f, ")") + } + } + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _fmt_octal>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn octal_hex() { + use arrayvec::{ArrayString,ArrayVec}; + type TinyString = ArrayString<[u8; 512]>; + + use crate::fmt::Write; + let v = $id::splat($elem_ty::default()); + let mut s = TinyString::new(); + write!(&mut s, "{:#o}", v).unwrap(); + + let mut beg = TinyString::new(); + write!(&mut beg, "{}(", stringify!($id)).unwrap(); + assert!(s.starts_with(beg.as_str())); + assert!(s.ends_with(")")); + let s: ArrayVec<[TinyString; 64]> + = s.replace(beg.as_str(), "").replace(")", "") + .split(",") + .map(|v| TinyString::from(v.trim()).unwrap()) + .collect(); + assert_eq!(s.len(), $id::lanes()); + for (index, ss) in s.into_iter().enumerate() { + let mut e = TinyString::new(); + write!(&mut e, "{:#o}", v.extract(index)).unwrap(); + assert_eq!(ss, e); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/fmt/upper_hex.rs b/third_party/rust/packed_simd/src/api/fmt/upper_hex.rs new file mode 100644 index 000000000000..aa88f673abf0 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/fmt/upper_hex.rs @@ -0,0 +1,56 @@ +//! Implement `UpperHex` formatting + +macro_rules! impl_fmt_upper_hex { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl crate::fmt::UpperHex for $id { + #[allow(clippy::missing_inline_in_public_items)] + fn fmt( + &self, f: &mut crate::fmt::Formatter<'_>, + ) -> crate::fmt::Result { + write!(f, "{}(", stringify!($id))?; + for i in 0..$elem_count { + if i > 0 { + write!(f, ", ")?; + } + self.extract(i).fmt(f)?; + } + write!(f, ")") + } + } + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _fmt_upper_hex>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn upper_hex() { + use arrayvec::{ArrayString,ArrayVec}; + type TinyString = ArrayString<[u8; 512]>; + + use crate::fmt::Write; + let v = $id::splat($elem_ty::default()); + let mut s = TinyString::new(); + write!(&mut s, "{:#X}", v).unwrap(); + + let mut beg = TinyString::new(); + write!(&mut beg, "{}(", stringify!($id)).unwrap(); + assert!(s.starts_with(beg.as_str())); + assert!(s.ends_with(")")); + let s: ArrayVec<[TinyString; 64]> + = s.replace(beg.as_str(), "").replace(")", "") + .split(",") + .map(|v| TinyString::from(v.trim()).unwrap()) + .collect(); + assert_eq!(s.len(), $id::lanes()); + for (index, ss) in s.into_iter().enumerate() { + let mut e = TinyString::new(); + write!(&mut e, "{:#X}", v.extract(index)).unwrap(); + assert_eq!(ss, e); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/from.rs b/third_party/rust/packed_simd/src/api/from.rs new file mode 100644 index 000000000000..c30c4d6e216d --- /dev/null +++ b/third_party/rust/packed_simd/src/api/from.rs @@ -0,0 +1,7 @@ +//! Implementations of the `From` and `Into` traits + +#[macro_use] +mod from_array; + +#[macro_use] +mod from_vector; diff --git a/third_party/rust/packed_simd/src/api/from/from_array.rs b/third_party/rust/packed_simd/src/api/from/from_array.rs new file mode 100644 index 000000000000..964d1501df6a --- /dev/null +++ b/third_party/rust/packed_simd/src/api/from/from_array.rs @@ -0,0 +1,121 @@ +//! Implements `From<[T; N]>` and `Into<[T; N]>` for vector types. + +macro_rules! impl_from_array { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt + | ($non_default_array:expr, $non_default_vec:expr)) => { + impl From<[$elem_ty; $elem_count]> for $id { + #[inline] + fn from(array: [$elem_ty; $elem_count]) -> Self { + union U { + array: [$elem_ty; $elem_count], + vec: $id, + } + unsafe { U { array }.vec } + } + } + + impl From<$id> for [$elem_ty; $elem_count] { + #[inline] + fn from(vec: $id) -> Self { + union U { + array: [$elem_ty; $elem_count], + vec: $id, + } + unsafe { U { vec }.array } + } + } + + // FIXME: `Into::into` is not inline, but due to + // the blanket impl in `std`, which is not + // marked `default`, we cannot override it here with + // specialization. + /* + impl Into<[$elem_ty; $elem_count]> for $id { + #[inline] + fn into(self) -> [$elem_ty; $elem_count] { + union U { + array: [$elem_ty; $elem_count], + vec: $id, + } + unsafe { U { vec: self }.array } + } + } + + impl Into<$id> for [$elem_ty; $elem_count] { + #[inline] + fn into(self) -> $id { + union U { + array: [$elem_ty; $elem_count], + vec: $id, + } + unsafe { U { array: self }.vec } + } + } + */ + + test_if! { + $test_tt: + paste::item! { + mod [<$id _from>] { + use super::*; + #[test] + fn array() { + let vec: $id = Default::default(); + + // FIXME: Workaround for arrays with more than 32 + // elements. + // + // Safe because we never take a reference to any + // uninitialized element. + union W { + array: [$elem_ty; $elem_count], + other: () + } + let mut array = W { other: () }; + for i in 0..$elem_count { + let default: $elem_ty = Default::default(); + // note: array.other is the active member and + // initialized so we can take a reference to it: + let p = unsafe { + &mut array.other as *mut () as *mut $elem_ty + }; + // note: default is a valid bit-pattern for + // $elem_ty: + unsafe { + crate::ptr::write(p.wrapping_add(i), default) + }; + } + // note: the array variant of the union is properly + // initialized: + let mut array = unsafe { + array.array + }; + + array[0] = $non_default_array; + let vec = vec.replace(0, $non_default_vec); + + let vec_from_array = $id::from(array); + assert_eq!(vec_from_array, vec); + let array_from_vec + = <[$elem_ty; $elem_count]>::from(vec); + // FIXME: Workaround for arrays with more than 32 + // elements. + for i in 0..$elem_count { + assert_eq!(array_from_vec[i], array[i]); + } + + let vec_from_into_array: $id = array.into(); + assert_eq!(vec_from_into_array, vec); + let array_from_into_vec: [$elem_ty; $elem_count] + = vec.into(); + // FIXME: Workaround for arrays with more than 32 + // elements. + for i in 0..$elem_count { + assert_eq!(array_from_into_vec[i], array[i]); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/from/from_vector.rs b/third_party/rust/packed_simd/src/api/from/from_vector.rs new file mode 100644 index 000000000000..55f70016d51d --- /dev/null +++ b/third_party/rust/packed_simd/src/api/from/from_vector.rs @@ -0,0 +1,67 @@ +//! Implements `From` and `Into` for vector types. + +macro_rules! impl_from_vector { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt + | $source:ident) => { + impl From<$source> for $id { + #[inline] + fn from(source: $source) -> Self { + fn static_assert_same_number_of_lanes() + where + T: crate::sealed::Simd, + U: crate::sealed::Simd, + { + } + use crate::llvm::simd_cast; + static_assert_same_number_of_lanes::<$id, $source>(); + Simd(unsafe { simd_cast(source.0) }) + } + } + + // FIXME: `Into::into` is not inline, but due to the blanket impl in + // `std`, which is not marked `default`, we cannot override it here + // with specialization. + + /* + impl Into<$id> for $source { + #[inline] + fn into(self) -> $id { + unsafe { simd_cast(self) } + } + } + */ + + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _from_ $source>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn from() { + assert_eq!($id::lanes(), $source::lanes()); + let source: $source = Default::default(); + let vec: $id = Default::default(); + + let e = $id::from(source); + assert_eq!(e, vec); + + let e: $id = source.into(); + assert_eq!(e, vec); + } + } + } + } + }; +} + +macro_rules! impl_from_vectors { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt + | $($source:ident),*) => { + $( + impl_from_vector!( + [$elem_ty; $elem_count]: $id | $test_tt | $source + ); + )* + } +} diff --git a/third_party/rust/packed_simd/src/api/hash.rs b/third_party/rust/packed_simd/src/api/hash.rs new file mode 100644 index 000000000000..08d42496ea8b --- /dev/null +++ b/third_party/rust/packed_simd/src/api/hash.rs @@ -0,0 +1,47 @@ +//! Implements `Hash` for vector types. + +macro_rules! impl_hash { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl crate::hash::Hash for $id { + #[inline] + fn hash(&self, state: &mut H) { + unsafe { + union A { + data: [$elem_ty; $id::lanes()], + vec: $id, + } + A { vec: *self }.data.hash(state) + } + } + } + + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _hash>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn hash() { + use crate::hash::{Hash, Hasher}; + #[allow(deprecated)] + use crate::hash::{SipHasher13}; + type A = [$elem_ty; $id::lanes()]; + let a: A = [42 as $elem_ty; $id::lanes()]; + assert_eq!( + crate::mem::size_of::(), + crate::mem::size_of::<$id>() + ); + #[allow(deprecated)] + let mut a_hash = SipHasher13::new(); + let mut v_hash = a_hash.clone(); + a.hash(&mut a_hash); + + let v = $id::splat(42 as $elem_ty); + v.hash(&mut v_hash); + assert_eq!(a_hash.finish(), v_hash.finish()); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/into_bits.rs b/third_party/rust/packed_simd/src/api/into_bits.rs new file mode 100644 index 000000000000..f2cc1bae5397 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/into_bits.rs @@ -0,0 +1,59 @@ +//! Implementation of `FromBits` and `IntoBits`. + +/// Safe lossless bitwise conversion from `T` to `Self`. +pub trait FromBits: crate::marker::Sized { + /// Safe lossless bitwise transmute from `T` to `Self`. + fn from_bits(t: T) -> Self; +} + +/// Safe lossless bitwise conversion from `Self` to `T`. +pub trait IntoBits: crate::marker::Sized { + /// Safe lossless bitwise transmute from `self` to `T`. + fn into_bits(self) -> T; +} + +/// `FromBits` implies `IntoBits`. +impl IntoBits for T +where + U: FromBits, +{ + #[inline] + fn into_bits(self) -> U { + debug_assert!( + crate::mem::size_of::() == crate::mem::size_of::() + ); + U::from_bits(self) + } +} + +/// `FromBits` and `IntoBits` are reflexive +impl FromBits for T { + #[inline] + fn from_bits(t: Self) -> Self { + t + } +} + +#[macro_use] +mod macros; + +mod v16; +pub use self::v16::*; + +mod v32; +pub use self::v32::*; + +mod v64; +pub use self::v64::*; + +mod v128; +pub use self::v128::*; + +mod v256; +pub use self::v256::*; + +mod v512; +pub use self::v512::*; + +mod arch_specific; +pub use self::arch_specific::*; diff --git a/third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs b/third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs new file mode 100644 index 000000000000..6cc2fa37b728 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs @@ -0,0 +1,190 @@ +//! `FromBits` and `IntoBits` between portable vector types and the +//! architecture-specific vector types. +#![rustfmt::skip] + +// FIXME: MIPS FromBits/IntoBits + +#[allow(unused)] +use crate::*; + +/// This macro implements FromBits for the portable and the architecture +/// specific vector types. +/// +/// The "leaf" case is at the bottom, and the most generic case is at the top. +/// The generic case is split into smaller cases recursively. +macro_rules! impl_arch { + ([$arch_head_i:ident[$arch_head_tt:tt]: $($arch_head_ty:ident),*], + $([$arch_tail_i:ident[$arch_tail_tt:tt]: $($arch_tail_ty:ident),*]),* | + from: $($from_ty:ident),* | into: $($into_ty:ident),* | + test: $test_tt:tt) => { + impl_arch!( + [$arch_head_i[$arch_head_tt]: $($arch_head_ty),*] | + from: $($from_ty),* | + into: $($into_ty),* | + test: $test_tt + ); + impl_arch!( + $([$arch_tail_i[$arch_tail_tt]: $($arch_tail_ty),*]),* | + from: $($from_ty),* | + into: $($into_ty),* | + test: $test_tt + ); + }; + ([$arch:ident[$arch_tt:tt]: $($arch_ty:ident),*] | + from: $($from_ty:ident),* | into: $($into_ty:ident),* | + test: $test_tt:tt) => { + // note: if target is "arm", "+v7,+neon" must be enabled + // and the std library must be recompiled with them + #[cfg(any( + not(target_arch = "arm"), + all(target_feature = "v7", target_feature = "neon", + any(feature = "core_arch", libcore_neon))) + )] + // note: if target is "powerpc", "altivec" must be enabled + // and the std library must be recompiled with it + #[cfg(any( + not(target_arch = "powerpc"), + all(target_feature = "altivec", feature = "core_arch"), + ))] + #[cfg(target_arch = $arch_tt)] + use crate::arch::$arch::{ + $($arch_ty),* + }; + + #[cfg(any( + not(target_arch = "arm"), + all(target_feature = "v7", target_feature = "neon", + any(feature = "core_arch", libcore_neon))) + )] + #[cfg(any( + not(target_arch = "powerpc"), + all(target_feature = "altivec", feature = "core_arch"), + ))] + #[cfg(target_arch = $arch_tt)] + impl_arch!($($arch_ty),* | $($from_ty),* | $($into_ty),* | + test: $test_tt); + }; + ($arch_head:ident, $($arch_tail:ident),* | $($from_ty:ident),* + | $($into_ty:ident),* | test: $test_tt:tt) => { + impl_arch!($arch_head | $($from_ty),* | $($into_ty),* | + test: $test_tt); + impl_arch!($($arch_tail),* | $($from_ty),* | $($into_ty),* | + test: $test_tt); + }; + ($arch_head:ident | $($from_ty:ident),* | $($into_ty:ident),* | + test: $test_tt:tt) => { + impl_from_bits!($arch_head[$test_tt]: $($from_ty),*); + impl_into_bits!($arch_head[$test_tt]: $($into_ty),*); + }; +} + +//////////////////////////////////////////////////////////////////////////////// +// Implementations for the 64-bit wide vector types: + +// FIXME: 64-bit single element types +// FIXME: arm/aarch float16x4_t missing +impl_arch!( + [x86["x86"]: __m64], [x86_64["x86_64"]: __m64], + [arm["arm"]: int8x8_t, uint8x8_t, poly8x8_t, int16x4_t, uint16x4_t, + poly16x4_t, int32x2_t, uint32x2_t, float32x2_t, int64x1_t, + uint64x1_t], + [aarch64["aarch64"]: int8x8_t, uint8x8_t, poly8x8_t, int16x4_t, uint16x4_t, + poly16x4_t, int32x2_t, uint32x2_t, float32x2_t, int64x1_t, uint64x1_t, + float64x1_t] | + from: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2 | + into: i8x8, u8x8, i16x4, u16x4, i32x2, u32x2, f32x2 | + test: test_v64 +); + +//////////////////////////////////////////////////////////////////////////////// +// Implementations for the 128-bit wide vector types: + +// FIXME: arm/aarch float16x8_t missing +// FIXME: ppc vector_pixel missing +// FIXME: ppc64 vector_Float16 missing +// FIXME: ppc64 vector_signed_long_long missing +// FIXME: ppc64 vector_unsigned_long_long missing +// FIXME: ppc64 vector_bool_long_long missing +// FIXME: ppc64 vector_signed___int128 missing +// FIXME: ppc64 vector_unsigned___int128 missing +impl_arch!( + [x86["x86"]: __m128, __m128i, __m128d], + [x86_64["x86_64"]: __m128, __m128i, __m128d], + [arm["arm"]: int8x16_t, uint8x16_t, poly8x16_t, int16x8_t, uint16x8_t, + poly16x8_t, int32x4_t, uint32x4_t, float32x4_t, int64x2_t, uint64x2_t], + [aarch64["aarch64"]: int8x16_t, uint8x16_t, poly8x16_t, int16x8_t, + uint16x8_t, poly16x8_t, int32x4_t, uint32x4_t, float32x4_t, int64x2_t, + uint64x2_t, float64x2_t], + [powerpc["powerpc"]: vector_signed_char, vector_unsigned_char, + vector_signed_short, vector_unsigned_short, vector_signed_int, + vector_unsigned_int, vector_float], + [powerpc64["powerpc64"]: vector_signed_char, vector_unsigned_char, + vector_signed_short, vector_unsigned_short, vector_signed_int, + vector_unsigned_int, vector_float, vector_signed_long, + vector_unsigned_long, vector_double] | + from: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, + i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1 | + into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, i64x2, u64x2, f64x2, + i128x1, u128x1 | + test: test_v128 +); + +impl_arch!( + [powerpc["powerpc"]: vector_bool_char], + [powerpc64["powerpc64"]: vector_bool_char] | + from: m8x16, m16x8, m32x4, m64x2, m128x1 | + into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, + i64x2, u64x2, f64x2, i128x1, u128x1, + // Masks: + m8x16 | + test: test_v128 +); + +impl_arch!( + [powerpc["powerpc"]: vector_bool_short], + [powerpc64["powerpc64"]: vector_bool_short] | + from: m16x8, m32x4, m64x2, m128x1 | + into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, + i64x2, u64x2, f64x2, i128x1, u128x1, + // Masks: + m8x16, m16x8 | + test: test_v128 +); + +impl_arch!( + [powerpc["powerpc"]: vector_bool_int], + [powerpc64["powerpc64"]: vector_bool_int] | + from: m32x4, m64x2, m128x1 | + into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, + i64x2, u64x2, f64x2, i128x1, u128x1, + // Masks: + m8x16, m16x8, m32x4 | + test: test_v128 +); + +impl_arch!( + [powerpc64["powerpc64"]: vector_bool_long] | + from: m64x2, m128x1 | + into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, + i64x2, u64x2, f64x2, i128x1, u128x1, + // Masks: + m8x16, m16x8, m32x4, m64x2 | + test: test_v128 +); + +//////////////////////////////////////////////////////////////////////////////// +// Implementations for the 256-bit wide vector types + +impl_arch!( + [x86["x86"]: __m256, __m256i, __m256d], + [x86_64["x86_64"]: __m256, __m256i, __m256d] | + from: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, + i32x8, u32x8, f32x8, m32x8, + i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2 | + into: i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, f32x8, + i64x4, u64x4, f64x4, i128x2, u128x2 | + test: test_v256 +); + +//////////////////////////////////////////////////////////////////////////////// +// FIXME: Implementations for the 512-bit wide vector types diff --git a/third_party/rust/packed_simd/src/api/into_bits/macros.rs b/third_party/rust/packed_simd/src/api/into_bits/macros.rs new file mode 100644 index 000000000000..8cec5b00479f --- /dev/null +++ b/third_party/rust/packed_simd/src/api/into_bits/macros.rs @@ -0,0 +1,74 @@ +//! Macros implementing `FromBits` + +macro_rules! impl_from_bits_ { + ($id:ident[$test_tt:tt]: $from_ty:ident) => { + impl crate::api::into_bits::FromBits<$from_ty> for $id { + #[inline] + fn from_bits(x: $from_ty) -> Self { + unsafe { crate::mem::transmute(x) } + } + } + + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _from_bits_ $from_ty>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test() { + use crate::{ + ptr::{read_unaligned}, + mem::{size_of, zeroed} + }; + use crate::IntoBits; + assert_eq!(size_of::<$id>(), + size_of::<$from_ty>()); + // This is safe becasue we never create a reference to + // uninitialized memory: + let a: $from_ty = unsafe { zeroed() }; + + let b_0: $id = crate::FromBits::from_bits(a); + let b_1: $id = a.into_bits(); + + // Check that these are byte-wise equal, that is, + // that the bit patterns are identical: + for i in 0..size_of::<$id>() { + // This is safe because we only read initialized + // memory in bounds. Also, taking a reference to + // `b_i` is ok because the fields are initialized. + unsafe { + let b_0_v: u8 = read_unaligned( + (&b_0 as *const $id as *const u8) + .wrapping_add(i) + ); + let b_1_v: u8 = read_unaligned( + (&b_1 as *const $id as *const u8) + .wrapping_add(i) + ); + assert_eq!(b_0_v, b_1_v); + } + } + } + } + } + } + }; +} + +macro_rules! impl_from_bits { + ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { + $( + impl_from_bits_!($id[$test_tt]: $from_ty); + )* + } +} + +#[allow(unused)] +macro_rules! impl_into_bits { + ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { + $( + impl_from_bits_!($from_ty[$test_tt]: $id); + )* + } +} diff --git a/third_party/rust/packed_simd/src/api/into_bits/v128.rs b/third_party/rust/packed_simd/src/api/into_bits/v128.rs new file mode 100644 index 000000000000..804dbf282d53 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/into_bits/v128.rs @@ -0,0 +1,28 @@ +//! `FromBits` and `IntoBits` implementations for portable 128-bit wide vectors +#![rustfmt::skip] + +#[allow(unused)] // wasm_bindgen_test +use crate::*; + +impl_from_bits!(i8x16[test_v128]: u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); +impl_from_bits!(u8x16[test_v128]: i8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); +impl_from_bits!(m8x16[test_v128]: m16x8, m32x4, m64x2, m128x1); + +impl_from_bits!(i16x8[test_v128]: i8x16, u8x16, m8x16, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); +impl_from_bits!(u16x8[test_v128]: i8x16, u8x16, m8x16, i16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); +impl_from_bits!(m16x8[test_v128]: m32x4, m64x2, m128x1); + +impl_from_bits!(i32x4[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); +impl_from_bits!(u32x4[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); +impl_from_bits!(f32x4[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); +impl_from_bits!(m32x4[test_v128]: m64x2, m128x1); + +impl_from_bits!(i64x2[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); +impl_from_bits!(u64x2[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, f64x2, m64x2, i128x1, u128x1, m128x1); +impl_from_bits!(f64x2[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, m64x2, i128x1, u128x1, m128x1); +impl_from_bits!(m64x2[test_v128]: m128x1); + +impl_from_bits!(i128x1[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, u128x1, m128x1); +impl_from_bits!(u128x1[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, m128x1); +// note: m128x1 cannot be constructed from all the other masks bit patterns in here + diff --git a/third_party/rust/packed_simd/src/api/into_bits/v16.rs b/third_party/rust/packed_simd/src/api/into_bits/v16.rs new file mode 100644 index 000000000000..1162a62e5bd1 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/into_bits/v16.rs @@ -0,0 +1,9 @@ +//! `FromBits` and `IntoBits` implementations for portable 16-bit wide vectors +#![rustfmt::skip] + +#[allow(unused)] // wasm_bindgen_test +use crate::*; + +impl_from_bits!(i8x2[test_v16]: u8x2, m8x2); +impl_from_bits!(u8x2[test_v16]: i8x2, m8x2); +// note: m8x2 cannot be constructed from all i8x2 or u8x2 bit patterns diff --git a/third_party/rust/packed_simd/src/api/into_bits/v256.rs b/third_party/rust/packed_simd/src/api/into_bits/v256.rs new file mode 100644 index 000000000000..cc7a6646b535 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/into_bits/v256.rs @@ -0,0 +1,27 @@ +//! `FromBits` and `IntoBits` implementations for portable 256-bit wide vectors +#![rustfmt::skip] + +#[allow(unused)] // wasm_bindgen_test +use crate::*; + +impl_from_bits!(i8x32[test_v256]: u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); +impl_from_bits!(u8x32[test_v256]: i8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); +impl_from_bits!(m8x32[test_v256]: m16x16, m32x8, m64x4, m128x2); + +impl_from_bits!(i16x16[test_v256]: i8x32, u8x32, m8x32, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); +impl_from_bits!(u16x16[test_v256]: i8x32, u8x32, m8x32, i16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); +impl_from_bits!(m16x16[test_v256]: m32x8, m64x4, m128x2); + +impl_from_bits!(i32x8[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); +impl_from_bits!(u32x8[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); +impl_from_bits!(f32x8[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); +impl_from_bits!(m32x8[test_v256]: m64x4, m128x2); + +impl_from_bits!(i64x4[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); +impl_from_bits!(u64x4[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, f64x4, m64x4, i128x2, u128x2, m128x2); +impl_from_bits!(f64x4[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, m64x4, i128x2, u128x2, m128x2); +impl_from_bits!(m64x4[test_v256]: m128x2); + +impl_from_bits!(i128x2[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, u128x2, m128x2); +impl_from_bits!(u128x2[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, m128x2); +// note: m128x2 cannot be constructed from all the other masks bit patterns in here diff --git a/third_party/rust/packed_simd/src/api/into_bits/v32.rs b/third_party/rust/packed_simd/src/api/into_bits/v32.rs new file mode 100644 index 000000000000..2c183ecf1c77 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/into_bits/v32.rs @@ -0,0 +1,13 @@ +//! `FromBits` and `IntoBits` implementations for portable 32-bit wide vectors +#![rustfmt::skip] + +#[allow(unused)] // wasm_bindgen_test +use crate::*; + +impl_from_bits!(i8x4[test_v32]: u8x4, m8x4, i16x2, u16x2, m16x2); +impl_from_bits!(u8x4[test_v32]: i8x4, m8x4, i16x2, u16x2, m16x2); +impl_from_bits!(m8x4[test_v32]: m16x2); + +impl_from_bits!(i16x2[test_v32]: i8x4, u8x4, m8x4, u16x2, m16x2); +impl_from_bits!(u16x2[test_v32]: i8x4, u8x4, m8x4, i16x2, m16x2); +// note: m16x2 cannot be constructed from all m8x4 bit patterns diff --git a/third_party/rust/packed_simd/src/api/into_bits/v512.rs b/third_party/rust/packed_simd/src/api/into_bits/v512.rs new file mode 100644 index 000000000000..8dec6a7f63a0 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/into_bits/v512.rs @@ -0,0 +1,27 @@ +//! `FromBits` and `IntoBits` implementations for portable 512-bit wide vectors +#![rustfmt::skip] + +#[allow(unused)] // wasm_bindgen_test +use crate::*; + +impl_from_bits!(i8x64[test_v512]: u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); +impl_from_bits!(u8x64[test_v512]: i8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); +impl_from_bits!(m8x64[test_v512]: m16x32, m32x16, m64x8, m128x4); + +impl_from_bits!(i16x32[test_v512]: i8x64, u8x64, m8x64, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); +impl_from_bits!(u16x32[test_v512]: i8x64, u8x64, m8x64, i16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); +impl_from_bits!(m16x32[test_v512]: m32x16, m64x8, m128x4); + +impl_from_bits!(i32x16[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); +impl_from_bits!(u32x16[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); +impl_from_bits!(f32x16[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); +impl_from_bits!(m32x16[test_v512]: m64x8, m128x4); + +impl_from_bits!(i64x8[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); +impl_from_bits!(u64x8[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, f64x8, m64x8, i128x4, u128x4, m128x4); +impl_from_bits!(f64x8[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, m64x8, i128x4, u128x4, m128x4); +impl_from_bits!(m64x8[test_v512]: m128x4); + +impl_from_bits!(i128x4[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, u128x4, m128x4); +impl_from_bits!(u128x4[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, m128x4); +// note: m128x4 cannot be constructed from all the other masks bit patterns in here diff --git a/third_party/rust/packed_simd/src/api/into_bits/v64.rs b/third_party/rust/packed_simd/src/api/into_bits/v64.rs new file mode 100644 index 000000000000..8999d98e13f8 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/into_bits/v64.rs @@ -0,0 +1,18 @@ +//! `FromBits` and `IntoBits` implementations for portable 64-bit wide vectors +#![rustfmt::skip] + +#[allow(unused)] // wasm_bindgen_test +use crate::*; + +impl_from_bits!(i8x8[test_v64]: u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2); +impl_from_bits!(u8x8[test_v64]: i8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2); +impl_from_bits!(m8x8[test_v64]: m16x4, m32x2); + +impl_from_bits!(i16x4[test_v64]: i8x8, u8x8, m8x8, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2); +impl_from_bits!(u16x4[test_v64]: i8x8, u8x8, m8x8, i16x4, m16x4, i32x2, u32x2, f32x2, m32x2); +impl_from_bits!(m16x4[test_v64]: m32x2); + +impl_from_bits!(i32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, u32x2, f32x2, m32x2); +impl_from_bits!(u32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, f32x2, m32x2); +impl_from_bits!(f32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, m32x2); +// note: m32x2 cannot be constructed from all m16x4 or m8x8 bit patterns diff --git a/third_party/rust/packed_simd/src/api/math.rs b/third_party/rust/packed_simd/src/api/math.rs new file mode 100644 index 000000000000..e7a8d256baf5 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math.rs @@ -0,0 +1,4 @@ +//! Implements vertical math operations + +#[macro_use] +mod float; diff --git a/third_party/rust/packed_simd/src/api/math/float.rs b/third_party/rust/packed_simd/src/api/math/float.rs new file mode 100644 index 000000000000..c0ec46e91789 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float.rs @@ -0,0 +1,61 @@ +//! Implements vertical floating-point math operations. + +#[macro_use] +mod abs; + +#[macro_use] +mod consts; + +#[macro_use] +mod cos; + +#[macro_use] +mod exp; + +#[macro_use] +mod powf; + +#[macro_use] +mod ln; + +#[macro_use] +mod mul_add; + +#[macro_use] +mod mul_adde; + +#[macro_use] +mod recpre; + +#[macro_use] +mod rsqrte; + +#[macro_use] +mod sin; + +#[macro_use] +mod sqrt; + +#[macro_use] +mod sqrte; + +macro_rules! impl_float_category { + ([$elem_ty:ident; $elem_count:expr]: $id:ident, $mask_ty:ident) => { + impl $id { + #[inline] + pub fn is_nan(self) -> $mask_ty { + self.ne(self) + } + + #[inline] + pub fn is_infinite(self) -> $mask_ty { + self.eq(Self::INFINITY) | self.eq(Self::NEG_INFINITY) + } + + #[inline] + pub fn is_finite(self) -> $mask_ty { + !(self.is_nan() | self.is_infinite()) + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/abs.rs b/third_party/rust/packed_simd/src/api/math/float/abs.rs new file mode 100644 index 000000000000..1865bdb68ec6 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/abs.rs @@ -0,0 +1,31 @@ +//! Implements vertical (lane-wise) floating-point `abs`. + +macro_rules! impl_math_float_abs { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Absolute value. + #[inline] + pub fn abs(self) -> Self { + use crate::codegen::math::float::abs::Abs; + Abs::abs(self) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _math_abs>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn abs() { + let o = $id::splat(1 as $elem_ty); + assert_eq!(o, o.abs()); + + let mo = $id::splat(-1 as $elem_ty); + assert_eq!(o, mo.abs()); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/consts.rs b/third_party/rust/packed_simd/src/api/math/float/consts.rs new file mode 100644 index 000000000000..89f93a6d692b --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/consts.rs @@ -0,0 +1,86 @@ +macro_rules! impl_float_consts { + ([$elem_ty:ident; $elem_count:expr]: $id:ident) => { + impl $id { + /// Machine epsilon value. + pub const EPSILON: $id = $id::splat(core::$elem_ty::EPSILON); + + /// Smallest finite value. + pub const MIN: $id = $id::splat(core::$elem_ty::MIN); + + /// Smallest positive normal value. + pub const MIN_POSITIVE: $id = + $id::splat(core::$elem_ty::MIN_POSITIVE); + + /// Largest finite value. + pub const MAX: $id = $id::splat(core::$elem_ty::MAX); + + /// Not a Number (NaN). + pub const NAN: $id = $id::splat(core::$elem_ty::NAN); + + /// Infinity (∞). + pub const INFINITY: $id = $id::splat(core::$elem_ty::INFINITY); + + /// Negative infinity (-∞). + pub const NEG_INFINITY: $id = + $id::splat(core::$elem_ty::NEG_INFINITY); + + /// Archimedes' constant (π) + pub const PI: $id = $id::splat(core::$elem_ty::consts::PI); + + /// π/2 + pub const FRAC_PI_2: $id = + $id::splat(core::$elem_ty::consts::FRAC_PI_2); + + /// π/3 + pub const FRAC_PI_3: $id = + $id::splat(core::$elem_ty::consts::FRAC_PI_3); + + /// π/4 + pub const FRAC_PI_4: $id = + $id::splat(core::$elem_ty::consts::FRAC_PI_4); + + /// π/6 + pub const FRAC_PI_6: $id = + $id::splat(core::$elem_ty::consts::FRAC_PI_6); + + /// π/8 + pub const FRAC_PI_8: $id = + $id::splat(core::$elem_ty::consts::FRAC_PI_8); + + /// 1/π + pub const FRAC_1_PI: $id = + $id::splat(core::$elem_ty::consts::FRAC_1_PI); + + /// 2/π + pub const FRAC_2_PI: $id = + $id::splat(core::$elem_ty::consts::FRAC_2_PI); + + /// 2/sqrt(π) + pub const FRAC_2_SQRT_PI: $id = + $id::splat(core::$elem_ty::consts::FRAC_2_SQRT_PI); + + /// sqrt(2) + pub const SQRT_2: $id = $id::splat(core::$elem_ty::consts::SQRT_2); + + /// 1/sqrt(2) + pub const FRAC_1_SQRT_2: $id = + $id::splat(core::$elem_ty::consts::FRAC_1_SQRT_2); + + /// Euler's number (e) + pub const E: $id = $id::splat(core::$elem_ty::consts::E); + + /// log2(e) + pub const LOG2_E: $id = $id::splat(core::$elem_ty::consts::LOG2_E); + + /// log10(e) + pub const LOG10_E: $id = + $id::splat(core::$elem_ty::consts::LOG10_E); + + /// ln(2) + pub const LN_2: $id = $id::splat(core::$elem_ty::consts::LN_2); + + /// ln(10) + pub const LN_10: $id = $id::splat(core::$elem_ty::consts::LN_10); + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/cos.rs b/third_party/rust/packed_simd/src/api/math/float/cos.rs new file mode 100644 index 000000000000..e5b8f46036c7 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/cos.rs @@ -0,0 +1,44 @@ +//! Implements vertical (lane-wise) floating-point `cos`. + +macro_rules! impl_math_float_cos { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Cosine. + #[inline] + pub fn cos(self) -> Self { + use crate::codegen::math::float::cos::Cos; + Cos::cos(self) + } + + /// Cosine of `self * PI`. + #[inline] + pub fn cos_pi(self) -> Self { + use crate::codegen::math::float::cos_pi::CosPi; + CosPi::cos_pi(self) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _math_cos>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn cos() { + use crate::$elem_ty::consts::PI; + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let p = $id::splat(PI as $elem_ty); + let ph = $id::splat(PI as $elem_ty / 2.); + let z_r = $id::splat((PI as $elem_ty / 2.).cos()); + let o_r = $id::splat((PI as $elem_ty).cos()); + + assert_eq!(o, z.cos()); + assert_eq!(z_r, ph.cos()); + assert_eq!(o_r, p.cos()); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/exp.rs b/third_party/rust/packed_simd/src/api/math/float/exp.rs new file mode 100644 index 000000000000..e3356d853a83 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/exp.rs @@ -0,0 +1,33 @@ +//! Implements vertical (lane-wise) floating-point `exp`. + +macro_rules! impl_math_float_exp { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Returns the exponential function of `self`: `e^(self)`. + #[inline] + pub fn exp(self) -> Self { + use crate::codegen::math::float::exp::Exp; + Exp::exp(self) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _math_exp>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn exp() { + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + assert_eq!(o, z.exp()); + + let e = $id::splat(crate::f64::consts::E as $elem_ty); + let tol = $id::splat(2.4e-4 as $elem_ty); + assert!((e - o.exp()).abs().le(tol).all()); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/ln.rs b/third_party/rust/packed_simd/src/api/math/float/ln.rs new file mode 100644 index 000000000000..5ceb9173ae05 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/ln.rs @@ -0,0 +1,33 @@ +//! Implements vertical (lane-wise) floating-point `ln`. + +macro_rules! impl_math_float_ln { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Returns the natural logarithm of `self`. + #[inline] + pub fn ln(self) -> Self { + use crate::codegen::math::float::ln::Ln; + Ln::ln(self) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _math_ln>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn ln() { + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + assert_eq!(z, o.ln()); + + let e = $id::splat(crate::f64::consts::E as $elem_ty); + let tol = $id::splat(2.4e-4 as $elem_ty); + assert!((o - e.ln()).abs().le(tol).all()); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/mul_add.rs b/third_party/rust/packed_simd/src/api/math/float/mul_add.rs new file mode 100644 index 000000000000..4b170ee2b755 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/mul_add.rs @@ -0,0 +1,44 @@ +//! Implements vertical (lane-wise) floating-point `mul_add`. + +macro_rules! impl_math_float_mul_add { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Fused multiply add: `self * y + z` + #[inline] + pub fn mul_add(self, y: Self, z: Self) -> Self { + use crate::codegen::math::float::mul_add::MulAdd; + MulAdd::mul_add(self, y, z) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _math_mul_add>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn mul_add() { + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let t = $id::splat(2 as $elem_ty); + let t3 = $id::splat(3 as $elem_ty); + let f = $id::splat(4 as $elem_ty); + + assert_eq!(z, z.mul_add(z, z)); + assert_eq!(o, o.mul_add(o, z)); + assert_eq!(o, o.mul_add(z, o)); + assert_eq!(o, z.mul_add(o, o)); + + assert_eq!(t, o.mul_add(o, o)); + assert_eq!(t, o.mul_add(t, z)); + assert_eq!(t, t.mul_add(o, z)); + + assert_eq!(f, t.mul_add(t, z)); + assert_eq!(f, t.mul_add(o, t)); + assert_eq!(t3, t.mul_add(o, o)); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/mul_adde.rs b/third_party/rust/packed_simd/src/api/math/float/mul_adde.rs new file mode 100644 index 000000000000..c5b27110f2d7 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/mul_adde.rs @@ -0,0 +1,48 @@ +//! Implements vertical (lane-wise) floating-point `mul_adde`. + +macro_rules! impl_math_float_mul_adde { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Fused multiply add estimate: ~= `self * y + z` + /// + /// While fused multiply-add (`fma`) has infinite precision, + /// `mul_adde` has _at worst_ the same precision of a multiply followed by an add. + /// This might be more efficient on architectures that do not have an `fma` instruction. + #[inline] + pub fn mul_adde(self, y: Self, z: Self) -> Self { + use crate::codegen::math::float::mul_adde::MulAddE; + MulAddE::mul_adde(self, y, z) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _math_mul_adde>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn mul_adde() { + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let t = $id::splat(2 as $elem_ty); + let t3 = $id::splat(3 as $elem_ty); + let f = $id::splat(4 as $elem_ty); + + assert_eq!(z, z.mul_adde(z, z)); + assert_eq!(o, o.mul_adde(o, z)); + assert_eq!(o, o.mul_adde(z, o)); + assert_eq!(o, z.mul_adde(o, o)); + + assert_eq!(t, o.mul_adde(o, o)); + assert_eq!(t, o.mul_adde(t, z)); + assert_eq!(t, t.mul_adde(o, z)); + + assert_eq!(f, t.mul_adde(t, z)); + assert_eq!(f, t.mul_adde(o, t)); + assert_eq!(t3, t.mul_adde(o, o)); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/powf.rs b/third_party/rust/packed_simd/src/api/math/float/powf.rs new file mode 100644 index 000000000000..83dc9ff9c05e --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/powf.rs @@ -0,0 +1,36 @@ +//! Implements vertical (lane-wise) floating-point `powf`. + +macro_rules! impl_math_float_powf { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Raises `self` number to the floating point power of `x`. + #[inline] + pub fn powf(self, x: Self) -> Self { + use crate::codegen::math::float::powf::Powf; + Powf::powf(self, x) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _math_powf>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn powf() { + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let t = $id::splat(2 as $elem_ty); + assert_eq!(o, o.powf(z)); + assert_eq!(o, t.powf(z)); + assert_eq!(o, o.powf(o)); + assert_eq!(t, t.powf(o)); + + let f = $id::splat(4 as $elem_ty); + assert_eq!(f, t.powf(t)); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/recpre.rs b/third_party/rust/packed_simd/src/api/math/float/recpre.rs new file mode 100644 index 000000000000..127f0b2ff674 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/recpre.rs @@ -0,0 +1,36 @@ +//! Implements vertical (lane-wise) floating-point `recpre`. + +macro_rules! impl_math_float_recpre { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Reciprocal estimate: `~= 1. / self`. + /// + /// FIXME: The precision of the estimate is currently unspecified. + #[inline] + pub fn recpre(self) -> Self { + $id::splat(1.) / self + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _math_recpre>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn recpre() { + let tol = $id::splat(2.4e-4 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let error = (o - o.recpre()).abs(); + assert!(error.le(tol).all()); + + let t = $id::splat(2 as $elem_ty); + let e = 0.5; + let error = (e - t.recpre()).abs(); + assert!(error.le(tol).all()); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/rsqrte.rs b/third_party/rust/packed_simd/src/api/math/float/rsqrte.rs new file mode 100644 index 000000000000..c77977f7b1cd --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/rsqrte.rs @@ -0,0 +1,40 @@ +//! Implements vertical (lane-wise) floating-point `rsqrte`. + +macro_rules! impl_math_float_rsqrte { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Reciprocal square-root estimate: `~= 1. / self.sqrt()`. + /// + /// FIXME: The precision of the estimate is currently unspecified. + #[inline] + pub fn rsqrte(self) -> Self { + unsafe { + use crate::llvm::simd_fsqrt; + $id::splat(1.) / Simd(simd_fsqrt(self.0)) + } + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _math_rsqrte>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn rsqrte() { + use crate::$elem_ty::consts::SQRT_2; + let tol = $id::splat(2.4e-4 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let error = (o - o.rsqrte()).abs(); + assert!(error.le(tol).all()); + + let t = $id::splat(2 as $elem_ty); + let e = 1. / SQRT_2; + let error = (e - t.rsqrte()).abs(); + assert!(error.le(tol).all()); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/sin.rs b/third_party/rust/packed_simd/src/api/math/float/sin.rs new file mode 100644 index 000000000000..49908319b126 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/sin.rs @@ -0,0 +1,50 @@ +//! Implements vertical (lane-wise) floating-point `sin`. + +macro_rules! impl_math_float_sin { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Sine. + #[inline] + pub fn sin(self) -> Self { + use crate::codegen::math::float::sin::Sin; + Sin::sin(self) + } + + /// Sine of `self * PI`. + #[inline] + pub fn sin_pi(self) -> Self { + use crate::codegen::math::float::sin_pi::SinPi; + SinPi::sin_pi(self) + } + + /// Sine and cosine of `self * PI`. + #[inline] + pub fn sin_cos_pi(self) -> (Self, Self) { + use crate::codegen::math::float::sin_cos_pi::SinCosPi; + SinCosPi::sin_cos_pi(self) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _math_sin>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn sin() { + use crate::$elem_ty::consts::PI; + let z = $id::splat(0 as $elem_ty); + let p = $id::splat(PI as $elem_ty); + let ph = $id::splat(PI as $elem_ty / 2.); + let o_r = $id::splat((PI as $elem_ty / 2.).sin()); + let z_r = $id::splat((PI as $elem_ty).sin()); + + assert_eq!(z, z.sin()); + assert_eq!(o_r, ph.sin()); + assert_eq!(z_r, p.sin()); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/sqrt.rs b/third_party/rust/packed_simd/src/api/math/float/sqrt.rs new file mode 100644 index 000000000000..ae624122d0e2 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/sqrt.rs @@ -0,0 +1,35 @@ +//! Implements vertical (lane-wise) floating-point `sqrt`. + +macro_rules! impl_math_float_sqrt { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + #[inline] + pub fn sqrt(self) -> Self { + use crate::codegen::math::float::sqrt::Sqrt; + Sqrt::sqrt(self) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _math_sqrt>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn sqrt() { + use crate::$elem_ty::consts::SQRT_2; + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + assert_eq!(z, z.sqrt()); + assert_eq!(o, o.sqrt()); + + let t = $id::splat(2 as $elem_ty); + let e = $id::splat(SQRT_2); + assert_eq!(e, t.sqrt()); + + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/sqrte.rs b/third_party/rust/packed_simd/src/api/math/float/sqrte.rs new file mode 100644 index 000000000000..f7ffad748d9c --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/sqrte.rs @@ -0,0 +1,44 @@ +//! Implements vertical (lane-wise) floating-point `sqrte`. + +macro_rules! impl_math_float_sqrte { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Square-root estimate. + /// + /// FIXME: The precision of the estimate is currently unspecified. + #[inline] + pub fn sqrte(self) -> Self { + use crate::codegen::math::float::sqrte::Sqrte; + Sqrte::sqrte(self) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _math_sqrte>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn sqrte() { + use crate::$elem_ty::consts::SQRT_2; + let tol = $id::splat(2.4e-4 as $elem_ty); + + let z = $id::splat(0 as $elem_ty); + let error = (z - z.sqrte()).abs(); + assert!(error.le(tol).all()); + + let o = $id::splat(1 as $elem_ty); + let error = (o - o.sqrte()).abs(); + assert!(error.le(tol).all()); + + let t = $id::splat(2 as $elem_ty); + let e = $id::splat(SQRT_2 as $elem_ty); + let error = (e - t.sqrte()).abs(); + + assert!(error.le(tol).all()); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/minimal.rs b/third_party/rust/packed_simd/src/api/minimal.rs new file mode 100644 index 000000000000..840d9e32585d --- /dev/null +++ b/third_party/rust/packed_simd/src/api/minimal.rs @@ -0,0 +1,6 @@ +#[macro_use] +mod iuf; +#[macro_use] +mod mask; +#[macro_use] +mod ptr; diff --git a/third_party/rust/packed_simd/src/api/minimal/iuf.rs b/third_party/rust/packed_simd/src/api/minimal/iuf.rs new file mode 100644 index 000000000000..58ffabab994f --- /dev/null +++ b/third_party/rust/packed_simd/src/api/minimal/iuf.rs @@ -0,0 +1,167 @@ +//! Minimal API of signed integer, unsigned integer, and floating-point +//! vectors. + +macro_rules! impl_minimal_iuf { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident | + $test_tt:tt | $($elem_name:ident),+ | $(#[$doc:meta])*) => { + + $(#[$doc])* + pub type $id = Simd<[$elem_ty; $elem_count]>; + + impl sealed::Simd for $id { + type Element = $elem_ty; + const LANES: usize = $elem_count; + type LanesType = [u32; $elem_count]; + } + + impl $id { + /// Creates a new instance with each vector elements initialized + /// with the provided values. + #[inline] + #[allow(clippy::too_many_arguments)] + pub const fn new($($elem_name: $elem_ty),*) -> Self { + Simd(codegen::$id($($elem_name as $ielem_ty),*)) + } + + /// Returns the number of vector lanes. + #[inline] + pub const fn lanes() -> usize { + $elem_count + } + + /// Constructs a new instance with each element initialized to + /// `value`. + #[inline] + pub const fn splat(value: $elem_ty) -> Self { + Simd(codegen::$id($({ + #[allow(non_camel_case_types, dead_code)] + struct $elem_name; + value as $ielem_ty + }),*)) + } + + /// Extracts the value at `index`. + /// + /// # Panics + /// + /// If `index >= Self::lanes()`. + #[inline] + pub fn extract(self, index: usize) -> $elem_ty { + assert!(index < $elem_count); + unsafe { self.extract_unchecked(index) } + } + + /// Extracts the value at `index`. + /// + /// # Precondition + /// + /// If `index >= Self::lanes()` the behavior is undefined. + #[inline] + pub unsafe fn extract_unchecked(self, index: usize) -> $elem_ty { + use crate::llvm::simd_extract; + let e: $ielem_ty = simd_extract(self.0, index as u32); + e as $elem_ty + } + + /// Returns a new vector where the value at `index` is replaced by `new_value`. + /// + /// # Panics + /// + /// If `index >= Self::lanes()`. + #[inline] + #[must_use = "replace does not modify the original value - \ + it returns a new vector with the value at `index` \ + replaced by `new_value`d" + ] + pub fn replace(self, index: usize, new_value: $elem_ty) -> Self { + assert!(index < $elem_count); + unsafe { self.replace_unchecked(index, new_value) } + } + + /// Returns a new vector where the value at `index` is replaced by `new_value`. + /// + /// # Precondition + /// + /// If `index >= Self::lanes()` the behavior is undefined. + #[inline] + #[must_use = "replace_unchecked does not modify the original value - \ + it returns a new vector with the value at `index` \ + replaced by `new_value`d" + ] + pub unsafe fn replace_unchecked( + self, + index: usize, + new_value: $elem_ty, + ) -> Self { + use crate::llvm::simd_insert; + Simd(simd_insert(self.0, index as u32, new_value as $ielem_ty)) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _minimal>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn minimal() { + // lanes: + assert_eq!($elem_count, $id::lanes()); + + // splat and extract / extract_unchecked: + const VAL: $elem_ty = 7 as $elem_ty; + const VEC: $id = $id::splat(VAL); + for i in 0..$id::lanes() { + assert_eq!(VAL, VEC.extract(i)); + assert_eq!( + VAL, unsafe { VEC.extract_unchecked(i) } + ); + } + + // replace / replace_unchecked + let new_vec = VEC.replace(0, 42 as $elem_ty); + for i in 0..$id::lanes() { + if i == 0 { + assert_eq!(42 as $elem_ty, new_vec.extract(i)); + } else { + assert_eq!(VAL, new_vec.extract(i)); + } + } + let new_vec = unsafe { + VEC.replace_unchecked(0, 42 as $elem_ty) + }; + for i in 0..$id::lanes() { + if i == 0 { + assert_eq!(42 as $elem_ty, new_vec.extract(i)); + } else { + assert_eq!(VAL, new_vec.extract(i)); + } + } + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn extract_panic_oob() { + const VAL: $elem_ty = 7 as $elem_ty; + const VEC: $id = $id::splat(VAL); + let _ = VEC.extract($id::lanes()); + } + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn replace_panic_oob() { + const VAL: $elem_ty = 7 as $elem_ty; + const VEC: $id = $id::splat(VAL); + let _ = VEC.replace($id::lanes(), 42 as $elem_ty); + } + } + } + } + } +} diff --git a/third_party/rust/packed_simd/src/api/minimal/mask.rs b/third_party/rust/packed_simd/src/api/minimal/mask.rs new file mode 100644 index 000000000000..e65be95db12c --- /dev/null +++ b/third_party/rust/packed_simd/src/api/minimal/mask.rs @@ -0,0 +1,174 @@ +//! Minimal API of mask vectors. + +macro_rules! impl_minimal_mask { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident + | $test_tt:tt | $($elem_name:ident),+ | $(#[$doc:meta])*) => { + $(#[$doc])* + pub type $id = Simd<[$elem_ty; $elem_count]>; + + impl sealed::Simd for $id { + type Element = $elem_ty; + const LANES: usize = $elem_count; + type LanesType = [u32; $elem_count]; + } + + impl $id { + /// Creates a new instance with each vector elements initialized + /// with the provided values. + #[inline] + #[allow(clippy::too_many_arguments)] + pub const fn new($($elem_name: bool),*) -> Self { + Simd(codegen::$id($(Self::bool_to_internal($elem_name)),*)) + } + + /// Converts a boolean type into the type of the vector lanes. + #[inline] + #[allow(clippy::indexing_slicing)] + const fn bool_to_internal(x: bool) -> $ielem_ty { + [0 as $ielem_ty, !(0 as $ielem_ty)][x as usize] + } + + /// Returns the number of vector lanes. + #[inline] + pub const fn lanes() -> usize { + $elem_count + } + + /// Constructs a new instance with each element initialized to + /// `value`. + #[inline] + pub const fn splat(value: bool) -> Self { + Simd(codegen::$id($({ + #[allow(non_camel_case_types, dead_code)] + struct $elem_name; + Self::bool_to_internal(value) + }),*)) + } + + /// Extracts the value at `index`. + /// + /// # Panics + /// + /// If `index >= Self::lanes()`. + #[inline] + pub fn extract(self, index: usize) -> bool { + assert!(index < $elem_count); + unsafe { self.extract_unchecked(index) } + } + + /// Extracts the value at `index`. + /// + /// If `index >= Self::lanes()` the behavior is undefined. + #[inline] + pub unsafe fn extract_unchecked(self, index: usize) -> bool { + use crate::llvm::simd_extract; + let x: $ielem_ty = simd_extract(self.0, index as u32); + x != 0 + } + + /// Returns a new vector where the value at `index` is replaced by + /// `new_value`. + /// + /// # Panics + /// + /// If `index >= Self::lanes()`. + #[inline] + #[must_use = "replace does not modify the original value - \ + it returns a new vector with the value at `index` \ + replaced by `new_value`d" + ] + pub fn replace(self, index: usize, new_value: bool) -> Self { + assert!(index < $elem_count); + unsafe { self.replace_unchecked(index, new_value) } + } + + /// Returns a new vector where the value at `index` is replaced by + /// `new_value`. + /// + /// # Panics + /// + /// If `index >= Self::lanes()`. + #[inline] + #[must_use = "replace_unchecked does not modify the original value - \ + it returns a new vector with the value at `index` \ + replaced by `new_value`d" + ] + pub unsafe fn replace_unchecked( + self, + index: usize, + new_value: bool, + ) -> Self { + use crate::llvm::simd_insert; + Simd(simd_insert(self.0, index as u32, + Self::bool_to_internal(new_value))) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _minimal>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn minimal() { + // TODO: test new + + // lanes: + assert_eq!($elem_count, $id::lanes()); + + // splat and extract / extract_unchecked: + let vec = $id::splat(true); + for i in 0..$id::lanes() { + assert_eq!(true, vec.extract(i)); + assert_eq!(true, + unsafe { vec.extract_unchecked(i) } + ); + } + + // replace / replace_unchecked + let new_vec = vec.replace(0, false); + for i in 0..$id::lanes() { + if i == 0 { + assert_eq!(false, new_vec.extract(i)); + } else { + assert_eq!(true, new_vec.extract(i)); + } + } + let new_vec = unsafe { + vec.replace_unchecked(0, false) + }; + for i in 0..$id::lanes() { + if i == 0 { + assert_eq!(false, new_vec.extract(i)); + } else { + assert_eq!(true, new_vec.extract(i)); + } + } + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn extract_panic_oob() { + let vec = $id::splat(false); + let _ = vec.extract($id::lanes()); + } + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn replace_panic_oob() { + let vec = $id::splat(false); + let _ = vec.replace($id::lanes(), true); + } + } + } + } + } +} diff --git a/third_party/rust/packed_simd/src/api/minimal/ptr.rs b/third_party/rust/packed_simd/src/api/minimal/ptr.rs new file mode 100644 index 000000000000..75e5aad5c065 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/minimal/ptr.rs @@ -0,0 +1,1385 @@ +//! Minimal API of pointer vectors. + +macro_rules! impl_minimal_p { + ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident, + $usize_ty:ident, $isize_ty:ident | $ref:ident | $test_tt:tt + | $($elem_name:ident),+ | ($true:expr, $false:expr) | + $(#[$doc:meta])*) => { + + $(#[$doc])* + pub type $id = Simd<[$elem_ty; $elem_count]>; + + impl sealed::Simd for $id { + type Element = $elem_ty; + const LANES: usize = $elem_count; + type LanesType = [u32; $elem_count]; + } + + impl $id { + /// Creates a new instance with each vector elements initialized + /// with the provided values. + #[inline] + #[allow(clippy::too_many_arguments)] + pub const fn new($($elem_name: $elem_ty),*) -> Self { + Simd(codegen::$id($($elem_name),*)) + } + + /// Returns the number of vector lanes. + #[inline] + pub const fn lanes() -> usize { + $elem_count + } + + /// Constructs a new instance with each element initialized to + /// `value`. + #[inline] + pub const fn splat(value: $elem_ty) -> Self { + Simd(codegen::$id($({ + #[allow(non_camel_case_types, dead_code)] + struct $elem_name; + value + }),*)) + } + + /// Constructs a new instance with each element initialized to + /// `null`. + #[inline] + pub const fn null() -> Self { + Self::splat(crate::ptr::null_mut() as $elem_ty) + } + + /// Returns a mask that selects those lanes that contain `null` + /// pointers. + #[inline] + pub fn is_null(self) -> $mask_ty { + self.eq(Self::null()) + } + + /// Extracts the value at `index`. + /// + /// # Panics + /// + /// If `index >= Self::lanes()`. + #[inline] + pub fn extract(self, index: usize) -> $elem_ty { + assert!(index < $elem_count); + unsafe { self.extract_unchecked(index) } + } + + /// Extracts the value at `index`. + /// + /// # Precondition + /// + /// If `index >= Self::lanes()` the behavior is undefined. + #[inline] + pub unsafe fn extract_unchecked(self, index: usize) -> $elem_ty { + use crate::llvm::simd_extract; + simd_extract(self.0, index as u32) + } + + /// Returns a new vector where the value at `index` is replaced by + /// `new_value`. + /// + /// # Panics + /// + /// If `index >= Self::lanes()`. + #[inline] + #[must_use = "replace does not modify the original value - \ + it returns a new vector with the value at `index` \ + replaced by `new_value`d" + ] + #[allow(clippy::not_unsafe_ptr_arg_deref)] + pub fn replace(self, index: usize, new_value: $elem_ty) -> Self { + assert!(index < $elem_count); + unsafe { self.replace_unchecked(index, new_value) } + } + + /// Returns a new vector where the value at `index` is replaced by `new_value`. + /// + /// # Precondition + /// + /// If `index >= Self::lanes()` the behavior is undefined. + #[inline] + #[must_use = "replace_unchecked does not modify the original value - \ + it returns a new vector with the value at `index` \ + replaced by `new_value`d" + ] + pub unsafe fn replace_unchecked( + self, + index: usize, + new_value: $elem_ty, + ) -> Self { + use crate::llvm::simd_insert; + Simd(simd_insert(self.0, index as u32, new_value)) + } + } + + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _minimal>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn minimal() { + // lanes: + assert_eq!($elem_count, $id::::lanes()); + + // splat and extract / extract_unchecked: + let VAL7: <$id as sealed::Simd>::Element + = $ref!(7); + let VAL42: <$id as sealed::Simd>::Element + = $ref!(42); + let VEC: $id = $id::splat(VAL7); + for i in 0..$id::::lanes() { + assert_eq!(VAL7, VEC.extract(i)); + assert_eq!( + VAL7, unsafe { VEC.extract_unchecked(i) } + ); + } + + // replace / replace_unchecked + let new_vec = VEC.replace(0, VAL42); + for i in 0..$id::::lanes() { + if i == 0 { + assert_eq!(VAL42, new_vec.extract(i)); + } else { + assert_eq!(VAL7, new_vec.extract(i)); + } + } + let new_vec = unsafe { + VEC.replace_unchecked(0, VAL42) + }; + for i in 0..$id::::lanes() { + if i == 0 { + assert_eq!(VAL42, new_vec.extract(i)); + } else { + assert_eq!(VAL7, new_vec.extract(i)); + } + } + + let mut n = $id::::null(); + assert_eq!( + n, + $id::::splat(unsafe { crate::mem::zeroed() }) + ); + assert!(n.is_null().all()); + n = n.replace( + 0, unsafe { crate::mem::transmute(1_isize) } + ); + assert!(!n.is_null().all()); + if $id::::lanes() > 1 { + assert!(n.is_null().any()); + } else { + assert!(!n.is_null().any()); + } + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn extract_panic_oob() { + let VAL: <$id as sealed::Simd>::Element + = $ref!(7); + let VEC: $id = $id::splat(VAL); + let _ = VEC.extract($id::::lanes()); + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn replace_panic_oob() { + let VAL: <$id as sealed::Simd>::Element + = $ref!(7); + let VAL42: <$id as sealed::Simd>::Element + = $ref!(42); + let VEC: $id = $id::splat(VAL); + let _ = VEC.replace($id::::lanes(), VAL42); + } + } + } + } + + impl crate::fmt::Debug for $id { + #[allow(clippy::missing_inline_in_public_items)] + fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) + -> crate::fmt::Result { + write!( + f, + "{}<{}>(", + stringify!($id), + unsafe { crate::intrinsics::type_name::() } + )?; + for i in 0..$elem_count { + if i > 0 { + write!(f, ", ")?; + } + self.extract(i).fmt(f)?; + } + write!(f, ")") + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _fmt_debug>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn debug() { + use arrayvec::{ArrayString,ArrayVec}; + type TinyString = ArrayString<[u8; 512]>; + + use crate::fmt::Write; + let v = $id::::default(); + let mut s = TinyString::new(); + write!(&mut s, "{:?}", v).unwrap(); + + let mut beg = TinyString::new(); + write!(&mut beg, "{}(", stringify!($id)).unwrap(); + assert!( + s.starts_with(beg.as_str()), + "s = {} (should start with = {})", s, beg + ); + assert!(s.ends_with(")")); + let s: ArrayVec<[TinyString; 64]> + = s.replace(beg.as_str(), "") + .replace(")", "").split(",") + .map(|v| TinyString::from(v.trim()).unwrap()) + .collect(); + assert_eq!(s.len(), $id::::lanes()); + for (index, ss) in s.into_iter().enumerate() { + let mut e = TinyString::new(); + write!(&mut e, "{:?}", v.extract(index)).unwrap(); + assert_eq!(ss, e); + } + } + } + } + } + + impl Default for $id { + #[inline] + fn default() -> Self { + // FIXME: ptrs do not implement default + Self::null() + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _default>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn default() { + let a = $id::::default(); + for i in 0..$id::::lanes() { + assert_eq!( + a.extract(i), unsafe { crate::mem::zeroed() } + ); + } + } + } + } + } + + impl $id { + /// Lane-wise equality comparison. + #[inline] + pub fn eq(self, other: Self) -> $mask_ty { + unsafe { + use crate::llvm::simd_eq; + let a: $usize_ty = crate::mem::transmute(self); + let b: $usize_ty = crate::mem::transmute(other); + Simd(simd_eq(a.0, b.0)) + } + } + + /// Lane-wise inequality comparison. + #[inline] + pub fn ne(self, other: Self) -> $mask_ty { + unsafe { + use crate::llvm::simd_ne; + let a: $usize_ty = crate::mem::transmute(self); + let b: $usize_ty = crate::mem::transmute(other); + Simd(simd_ne(a.0, b.0)) + } + } + + /// Lane-wise less-than comparison. + #[inline] + pub fn lt(self, other: Self) -> $mask_ty { + unsafe { + use crate::llvm::simd_lt; + let a: $usize_ty = crate::mem::transmute(self); + let b: $usize_ty = crate::mem::transmute(other); + Simd(simd_lt(a.0, b.0)) + } + } + + /// Lane-wise less-than-or-equals comparison. + #[inline] + pub fn le(self, other: Self) -> $mask_ty { + unsafe { + use crate::llvm::simd_le; + let a: $usize_ty = crate::mem::transmute(self); + let b: $usize_ty = crate::mem::transmute(other); + Simd(simd_le(a.0, b.0)) + } + } + + /// Lane-wise greater-than comparison. + #[inline] + pub fn gt(self, other: Self) -> $mask_ty { + unsafe { + use crate::llvm::simd_gt; + let a: $usize_ty = crate::mem::transmute(self); + let b: $usize_ty = crate::mem::transmute(other); + Simd(simd_gt(a.0, b.0)) + } + } + + /// Lane-wise greater-than-or-equals comparison. + #[inline] + pub fn ge(self, other: Self) -> $mask_ty { + unsafe { + use crate::llvm::simd_ge; + let a: $usize_ty = crate::mem::transmute(self); + let b: $usize_ty = crate::mem::transmute(other); + Simd(simd_ge(a.0, b.0)) + } + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _cmp_vertical>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn cmp() { + let a = $id::::null(); + let b = $id::::splat(unsafe { + crate::mem::transmute(1_isize) + }); + + let r = a.lt(b); + let e = $mask_ty::splat(true); + assert!(r == e); + let r = a.le(b); + assert!(r == e); + + let e = $mask_ty::splat(false); + let r = a.gt(b); + assert!(r == e); + let r = a.ge(b); + assert!(r == e); + let r = a.eq(b); + assert!(r == e); + + let mut a = a; + let mut b = b; + let mut e = e; + for i in 0..$id::::lanes() { + if i % 2 == 0 { + a = a.replace( + i, + unsafe { crate::mem::transmute(0_isize) } + ); + b = b.replace( + i, + unsafe { crate::mem::transmute(1_isize) } + ); + e = e.replace(i, true); + } else { + a = a.replace( + i, + unsafe { crate::mem::transmute(1_isize) } + ); + b = b.replace( + i, + unsafe { crate::mem::transmute(0_isize) } + ); + e = e.replace(i, false); + } + } + let r = a.lt(b); + assert!(r == e); + } + } + } + } + + #[allow(clippy::partialeq_ne_impl)] + impl crate::cmp::PartialEq<$id> for $id { + #[inline] + fn eq(&self, other: &Self) -> bool { + $id::::eq(*self, *other).all() + } + #[inline] + fn ne(&self, other: &Self) -> bool { + $id::::ne(*self, *other).any() + } + } + + // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892 + #[allow(clippy::partialeq_ne_impl)] + impl crate::cmp::PartialEq>> + for LexicographicallyOrdered<$id> + { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } + #[inline] + fn ne(&self, other: &Self) -> bool { + self.0 != other.0 + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _cmp_PartialEq>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn partial_eq() { + let a = $id::::null(); + let b = $id::::splat(unsafe { + crate::mem::transmute(1_isize) + }); + + assert!(a != b); + assert!(!(a == b)); + assert!(a == a); + assert!(!(a != a)); + + if $id::::lanes() > 1 { + let a = $id::::null().replace(0, unsafe { + crate::mem::transmute(1_isize) + }); + let b = $id::::splat(unsafe { + crate::mem::transmute(1_isize) + }); + + assert!(a != b); + assert!(!(a == b)); + assert!(a == a); + assert!(!(a != a)); + } + } + } + } + } + + impl crate::cmp::Eq for $id {} + impl crate::cmp::Eq for LexicographicallyOrdered<$id> {} + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _cmp_eq>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn eq() { + fn foo(_: E) {} + let a = $id::::null(); + foo(a); + } + } + } + } + + impl From<[$elem_ty; $elem_count]> for $id { + #[inline] + fn from(array: [$elem_ty; $elem_count]) -> Self { + unsafe { + // FIXME: unnecessary zeroing; better than UB. + let mut u: Self = crate::mem::zeroed(); + crate::ptr::copy_nonoverlapping( + &array as *const [$elem_ty; $elem_count] as *const u8, + &mut u as *mut Self as *mut u8, + crate::mem::size_of::() + ); + u + } + } + } + impl Into<[$elem_ty; $elem_count]> for $id { + #[inline] + fn into(self) -> [$elem_ty; $elem_count] { + unsafe { + // FIXME: unnecessary zeroing; better than UB. + let mut u: [$elem_ty; $elem_count] = crate::mem::zeroed(); + crate::ptr::copy_nonoverlapping( + &self as *const $id as *const u8, + &mut u as *mut [$elem_ty; $elem_count] as *mut u8, + crate::mem::size_of::() + ); + u + } + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _from>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn array() { + let values = [1_i32; $elem_count]; + + let mut vec: $id = Default::default(); + let mut array = [ + $id::::null().extract(0); $elem_count + ]; + + for i in 0..$elem_count { + let ptr = unsafe { + crate::mem::transmute( + &values[i] as *const i32 + ) + }; + vec = vec.replace(i, ptr); + array[i] = ptr; + } + + // FIXME: there is no impl of From<$id> for [$elem_ty; N] + // let a0 = From::from(vec); + // assert_eq!(a0, array); + #[allow(unused_assignments)] + let mut a1 = array; + a1 = vec.into(); + assert_eq!(a1, array); + + let v0: $id = From::from(array); + assert_eq!(v0, vec); + let v1: $id = array.into(); + assert_eq!(v1, vec); + } + } + } + } + + impl $id { + /// Instantiates a new vector with the values of the `slice`. + /// + /// # Panics + /// + /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned + /// to an `align_of::()` boundary. + #[inline] + pub fn from_slice_aligned(slice: &[$elem_ty]) -> Self { + unsafe { + assert!(slice.len() >= $elem_count); + let target_ptr = slice.get_unchecked(0) as *const $elem_ty; + assert!( + target_ptr.align_offset(crate::mem::align_of::()) + == 0 + ); + Self::from_slice_aligned_unchecked(slice) + } + } + + /// Instantiates a new vector with the values of the `slice`. + /// + /// # Panics + /// + /// If `slice.len() < Self::lanes()`. + #[inline] + pub fn from_slice_unaligned(slice: &[$elem_ty]) -> Self { + unsafe { + assert!(slice.len() >= $elem_count); + Self::from_slice_unaligned_unchecked(slice) + } + } + + /// Instantiates a new vector with the values of the `slice`. + /// + /// # Precondition + /// + /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned + /// to an `align_of::()` boundary, the behavior is undefined. + #[inline] + pub unsafe fn from_slice_aligned_unchecked(slice: &[$elem_ty]) + -> Self { + #[allow(clippy::cast_ptr_alignment)] + *(slice.get_unchecked(0) as *const $elem_ty as *const Self) + } + + /// Instantiates a new vector with the values of the `slice`. + /// + /// # Precondition + /// + /// If `slice.len() < Self::lanes()` the behavior is undefined. + #[inline] + pub unsafe fn from_slice_unaligned_unchecked( + slice: &[$elem_ty], + ) -> Self { + use crate::mem::size_of; + let target_ptr = + slice.get_unchecked(0) as *const $elem_ty as *const u8; + let mut x = Self::splat(crate::ptr::null_mut() as $elem_ty); + let self_ptr = &mut x as *mut Self as *mut u8; + crate::ptr::copy_nonoverlapping( + target_ptr, + self_ptr, + size_of::(), + ); + x + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _slice_from_slice>] { + use super::*; + use crate::iter::Iterator; + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn from_slice_unaligned() { + let (null, non_null) = ptr_vals!($id); + + let mut unaligned = [ + non_null; $id::::lanes() + 1 + ]; + unaligned[0] = null; + let vec = $id::::from_slice_unaligned( + &unaligned[1..] + ); + for (index, &b) in unaligned.iter().enumerate() { + if index == 0 { + assert_eq!(b, null); + } else { + assert_eq!(b, non_null); + assert_eq!(b, vec.extract(index - 1)); + } + } + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn from_slice_unaligned_fail() { + let (_null, non_null) = ptr_vals!($id); + let unaligned = [non_null; $id::::lanes() + 1]; + // the slice is not large enough => panic + let _vec = $id::::from_slice_unaligned( + &unaligned[2..] + ); + } + + union A { + data: [<$id as sealed::Simd>::Element; + 2 * $id::::lanes()], + _vec: $id, + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn from_slice_aligned() { + let (null, non_null) = ptr_vals!($id); + let mut aligned = A { + data: [null; 2 * $id::::lanes()], + }; + for i in + $id::::lanes()..(2 * $id::::lanes()) { + unsafe { + aligned.data[i] = non_null; + } + } + + let vec = unsafe { + $id::::from_slice_aligned( + &aligned.data[$id::::lanes()..] + ) + }; + for (index, &b) in unsafe { + aligned.data.iter().enumerate() + } { + if index < $id::::lanes() { + assert_eq!(b, null); + } else { + assert_eq!(b, non_null); + assert_eq!( + b, vec.extract(index - $id::::lanes()) + ); + } + } + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn from_slice_aligned_fail_lanes() { + let (_null, non_null) = ptr_vals!($id); + let aligned = A { + data: [non_null; 2 * $id::::lanes()], + }; + // the slice is not large enough => panic + let _vec = unsafe { + $id::::from_slice_aligned( + &aligned.data[2 * $id::::lanes()..] + ) + }; + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn from_slice_aligned_fail_align() { + unsafe { + let (null, _non_null) = ptr_vals!($id); + let aligned = A { + data: [null; 2 * $id::::lanes()], + }; + + // get a pointer to the front of data + let ptr = aligned.data.as_ptr(); + // offset pointer by one element + let ptr = ptr.wrapping_add(1); + + if ptr.align_offset( + crate::mem::align_of::<$id>() + ) == 0 { + // the pointer is properly aligned, so + // from_slice_aligned won't fail here (e.g. this + // can happen for i128x1). So we panic to make + // the "should_fail" test pass: + panic!("ok"); + } + + // create a slice - this is safe, because the + // elements of the slice exist, are properly + // initialized, and properly aligned: + let s = slice::from_raw_parts( + ptr, $id::::lanes() + ); + // this should always panic because the slice + // alignment does not match the alignment + // requirements for the vector type: + let _vec = $id::::from_slice_aligned(s); + } + } + } + } + } + + impl $id { + /// Writes the values of the vector to the `slice`. + /// + /// # Panics + /// + /// If `slice.len() < Self::lanes()` or `&slice[0]` is not + /// aligned to an `align_of::()` boundary. + #[inline] + pub fn write_to_slice_aligned(self, slice: &mut [$elem_ty]) { + unsafe { + assert!(slice.len() >= $elem_count); + let target_ptr = + slice.get_unchecked_mut(0) as *mut $elem_ty; + assert!( + target_ptr.align_offset(crate::mem::align_of::()) + == 0 + ); + self.write_to_slice_aligned_unchecked(slice); + } + } + + /// Writes the values of the vector to the `slice`. + /// + /// # Panics + /// + /// If `slice.len() < Self::lanes()`. + #[inline] + pub fn write_to_slice_unaligned(self, slice: &mut [$elem_ty]) { + unsafe { + assert!(slice.len() >= $elem_count); + self.write_to_slice_unaligned_unchecked(slice); + } + } + + /// Writes the values of the vector to the `slice`. + /// + /// # Precondition + /// + /// If `slice.len() < Self::lanes()` or `&slice[0]` is not + /// aligned to an `align_of::()` boundary, the behavior is + /// undefined. + #[inline] + pub unsafe fn write_to_slice_aligned_unchecked( + self, slice: &mut [$elem_ty], + ) { + #[allow(clippy::cast_ptr_alignment)] + *(slice.get_unchecked_mut(0) as *mut $elem_ty as *mut Self) = + self; + } + + /// Writes the values of the vector to the `slice`. + /// + /// # Precondition + /// + /// If `slice.len() < Self::lanes()` the behavior is undefined. + #[inline] + pub unsafe fn write_to_slice_unaligned_unchecked( + self, slice: &mut [$elem_ty], + ) { + let target_ptr = + slice.get_unchecked_mut(0) as *mut $elem_ty as *mut u8; + let self_ptr = &self as *const Self as *const u8; + crate::ptr::copy_nonoverlapping( + self_ptr, + target_ptr, + crate::mem::size_of::(), + ); + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _slice_write_to_slice>] { + use super::*; + use crate::iter::Iterator; + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn write_to_slice_unaligned() { + let (null, non_null) = ptr_vals!($id); + let mut unaligned = [null; $id::::lanes() + 1]; + let vec = $id::::splat(non_null); + vec.write_to_slice_unaligned(&mut unaligned[1..]); + for (index, &b) in unaligned.iter().enumerate() { + if index == 0 { + assert_eq!(b, null); + } else { + assert_eq!(b, non_null); + assert_eq!(b, vec.extract(index - 1)); + } + } + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn write_to_slice_unaligned_fail() { + let (null, non_null) = ptr_vals!($id); + let mut unaligned = [null; $id::::lanes() + 1]; + let vec = $id::::splat(non_null); + // the slice is not large enough => panic + vec.write_to_slice_unaligned(&mut unaligned[2..]); + } + + union A { + data: [<$id as sealed::Simd>::Element; + 2 * $id::::lanes()], + _vec: $id, + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn write_to_slice_aligned() { + let (null, non_null) = ptr_vals!($id); + let mut aligned = A { + data: [null; 2 * $id::::lanes()], + }; + let vec = $id::::splat(non_null); + unsafe { + vec.write_to_slice_aligned( + &mut aligned.data[$id::::lanes()..] + ) + }; + for (index, &b) in + unsafe { aligned.data.iter().enumerate() } { + if index < $id::::lanes() { + assert_eq!(b, null); + } else { + assert_eq!(b, non_null); + assert_eq!( + b, vec.extract(index - $id::::lanes()) + ); + } + } + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn write_to_slice_aligned_fail_lanes() { + let (null, non_null) = ptr_vals!($id); + let mut aligned = A { + data: [null; 2 * $id::::lanes()], + }; + let vec = $id::::splat(non_null); + // the slice is not large enough => panic + unsafe { + vec.write_to_slice_aligned( + &mut aligned.data[2 * $id::::lanes()..] + ) + }; + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn write_to_slice_aligned_fail_align() { + let (null, non_null) = ptr_vals!($id); + unsafe { + let mut aligned = A { + data: [null; 2 * $id::::lanes()], + }; + + // get a pointer to the front of data + let ptr = aligned.data.as_mut_ptr(); + // offset pointer by one element + let ptr = ptr.wrapping_add(1); + + if ptr.align_offset( + crate::mem::align_of::<$id>() + ) == 0 { + // the pointer is properly aligned, so + // write_to_slice_aligned won't fail here (e.g. + // this can happen for i128x1). So we panic to + // make the "should_fail" test pass: + panic!("ok"); + } + + // create a slice - this is safe, because the + // elements of the slice exist, are properly + // initialized, and properly aligned: + let s = slice::from_raw_parts_mut( + ptr, $id::::lanes() + ); + // this should always panic because the slice + // alignment does not match the alignment + // requirements for the vector type: + let vec = $id::::splat(non_null); + vec.write_to_slice_aligned(s); + } + } + } + } + } + + impl crate::hash::Hash for $id { + #[inline] + fn hash(&self, state: &mut H) { + let s: $usize_ty = unsafe { crate::mem::transmute(*self) }; + s.hash(state) + } + } + + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _hash>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn hash() { + use crate::hash::{Hash, Hasher}; + #[allow(deprecated)] + use crate::hash::{SipHasher13}; + + let values = [1_i32; $elem_count]; + + let mut vec: $id = Default::default(); + let mut array = [ + $id::::null().extract(0); + $elem_count + ]; + + for i in 0..$elem_count { + let ptr = unsafe { + crate::mem::transmute( + &values[i] as *const i32 + ) + }; + vec = vec.replace(i, ptr); + array[i] = ptr; + } + + #[allow(deprecated)] + let mut a_hash = SipHasher13::new(); + let mut v_hash = a_hash.clone(); + array.hash(&mut a_hash); + vec.hash(&mut v_hash); + assert_eq!(a_hash.finish(), v_hash.finish()); + } + } + } + } + + impl $id { + /// Calculates the offset from a pointer. + /// + /// `count` is in units of `T`; e.g. a count of `3` represents a + /// pointer offset of `3 * size_of::()` bytes. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is + /// Undefined Behavior: + /// + /// * Both the starting and resulting pointer must be either in + /// bounds or one byte past the end of an allocated object. + /// + /// * The computed offset, in bytes, cannot overflow an `isize`. + /// + /// * The offset being in bounds cannot rely on "wrapping around" + /// the address space. That is, the infinite-precision sum, in bytes + /// must fit in a `usize`. + /// + /// The compiler and standard library generally tries to ensure + /// allocations never reach a size where an offset is a concern. For + /// instance, `Vec` and `Box` ensure they never allocate more than + /// `isize::MAX` bytes, so `vec.as_ptr().offset(vec.len() as isize)` + /// is always safe. + /// + /// Most platforms fundamentally can't even construct such an + /// allocation. For instance, no known 64-bit platform can ever + /// serve a request for 263 bytes due to page-table limitations or + /// splitting the address space. However, some 32-bit and 16-bit + /// platforms may successfully serve a request for more than + /// `isize::MAX` bytes with things like Physical Address Extension. + /// As such, memory acquired directly from allocators or memory + /// mapped files may be too large to handle with this function. + /// + /// Consider using `wrapping_offset` instead if these constraints + /// are difficult to satisfy. The only advantage of this method is + /// that it enables more aggressive compiler optimizations. + #[inline] + pub unsafe fn offset(self, count: $isize_ty) -> Self { + // FIXME: should use LLVM's `add nsw nuw` + self.wrapping_offset(count) + } + + /// Calculates the offset from a pointer using wrapping arithmetic. + /// + /// `count` is in units of `T`; e.g. a count of `3` represents a + /// pointer offset of `3 * size_of::()` bytes. + /// + /// # Safety + /// + /// The resulting pointer does not need to be in bounds, but it is + /// potentially hazardous to dereference (which requires unsafe). + /// + /// Always use `.offset(count)` instead when possible, because + /// offset allows the compiler to optimize better. + #[inline] + pub fn wrapping_offset(self, count: $isize_ty) -> Self { + unsafe { + let x: $isize_ty = crate::mem::transmute(self); + // note: {+,*} currently performs a `wrapping_{add, mul}` + crate::mem::transmute( + x + (count * crate::mem::size_of::() as isize) + ) + } + } + + /// Calculates the distance between two pointers. + /// + /// The returned value is in units of `T`: the distance in bytes is + /// divided by `mem::size_of::()`. + /// + /// This function is the inverse of offset. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is + /// Undefined Behavior: + /// + /// * Both the starting and other pointer must be either in bounds + /// or one byte past the end of the same allocated object. + /// + /// * The distance between the pointers, in bytes, cannot overflow + /// an `isize`. + /// + /// * The distance between the pointers, in bytes, must be an exact + /// multiple of the size of `T`. + /// + /// * The distance being in bounds cannot rely on "wrapping around" + /// the address space. + /// + /// The compiler and standard library generally try to ensure + /// allocations never reach a size where an offset is a concern. For + /// instance, `Vec` and `Box` ensure they never allocate more than + /// `isize::MAX` bytes, so `ptr_into_vec.offset_from(vec.as_ptr())` + /// is always safe. + /// + /// Most platforms fundamentally can't even construct such an + /// allocation. For instance, no known 64-bit platform can ever + /// serve a request for 263 bytes due to page-table limitations or + /// splitting the address space. However, some 32-bit and 16-bit + /// platforms may successfully serve a request for more than + /// `isize::MAX` bytes with things like Physical Address Extension. + /// As such, memory acquired directly from allocators or memory + /// mapped files may be too large to handle with this function. + /// + /// Consider using wrapping_offset_from instead if these constraints + /// are difficult to satisfy. The only advantage of this method is + /// that it enables more aggressive compiler optimizations. + #[inline] + pub unsafe fn offset_from(self, origin: Self) -> $isize_ty { + // FIXME: should use LLVM's `sub nsw nuw`. + self.wrapping_offset_from(origin) + } + + /// Calculates the distance between two pointers. + /// + /// The returned value is in units of `T`: the distance in bytes is + /// divided by `mem::size_of::()`. + /// + /// If the address different between the two pointers is not a + /// multiple of `mem::size_of::()` then the result of the + /// division is rounded towards zero. + /// + /// Though this method is safe for any two pointers, note that its + /// result will be mostly useless if the two pointers aren't into + /// the same allocated object, for example if they point to two + /// different local variables. + #[inline] + pub fn wrapping_offset_from(self, origin: Self) -> $isize_ty { + let x: $isize_ty = unsafe { crate::mem::transmute(self) }; + let y: $isize_ty = unsafe { crate::mem::transmute(origin) }; + // note: {-,/} currently perform wrapping_{sub, div} + (y - x) / (crate::mem::size_of::() as isize) + } + + /// Calculates the offset from a pointer (convenience for + /// `.offset(count as isize)`). + /// + /// `count` is in units of `T`; e.g. a count of 3 represents a + /// pointer offset of `3 * size_of::()` bytes. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is + /// Undefined Behavior: + /// + /// * Both the starting and resulting pointer must be either in + /// bounds or one byte past the end of an allocated object. + /// + /// * The computed offset, in bytes, cannot overflow an `isize`. + /// + /// * The offset being in bounds cannot rely on "wrapping around" + /// the address space. That is, the infinite-precision sum must fit + /// in a `usize`. + /// + /// The compiler and standard library generally tries to ensure + /// allocations never reach a size where an offset is a concern. For + /// instance, `Vec` and `Box` ensure they never allocate more than + /// `isize::MAX` bytes, so `vec.as_ptr().add(vec.len())` is always + /// safe. + /// + /// Most platforms fundamentally can't even construct such an + /// allocation. For instance, no known 64-bit platform can ever + /// serve a request for 263 bytes due to page-table limitations or + /// splitting the address space. However, some 32-bit and 16-bit + /// platforms may successfully serve a request for more than + /// `isize::MAX` bytes with things like Physical Address Extension. + /// As such, memory acquired directly from allocators or memory + /// mapped files may be too large to handle with this function. + /// + /// Consider using `wrapping_offset` instead if these constraints + /// are difficult to satisfy. The only advantage of this method is + /// that it enables more aggressive compiler optimizations. + #[inline] + #[allow(clippy::should_implement_trait)] + pub unsafe fn add(self, count: $usize_ty) -> Self { + self.offset(count.cast()) + } + + /// Calculates the offset from a pointer (convenience for + /// `.offset((count as isize).wrapping_neg())`). + /// + /// `count` is in units of T; e.g. a `count` of 3 represents a + /// pointer offset of `3 * size_of::()` bytes. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is + /// Undefined Behavior: + /// + /// * Both the starting and resulting pointer must be either in + /// bounds or one byte past the end of an allocated object. + /// + /// * The computed offset cannot exceed `isize::MAX` **bytes**. + /// + /// * The offset being in bounds cannot rely on "wrapping around" + /// the address space. That is, the infinite-precision sum must fit + /// in a usize. + /// + /// The compiler and standard library generally tries to ensure + /// allocations never reach a size where an offset is a concern. For + /// instance, `Vec` and `Box` ensure they never allocate more than + /// `isize::MAX` bytes, so + /// `vec.as_ptr().add(vec.len()).sub(vec.len())` is always safe. + /// + /// Most platforms fundamentally can't even construct such an + /// allocation. For instance, no known 64-bit platform can ever + /// serve a request for 263 bytes due to page-table + /// limitations or splitting the address space. However, some 32-bit + /// and 16-bit platforms may successfully serve a request for more + /// than `isize::MAX` bytes with things like Physical Address + /// Extension. As such, memory acquired directly from allocators or + /// memory mapped files *may* be too large to handle with this + /// function. + /// + /// Consider using `wrapping_offset` instead if these constraints + /// are difficult to satisfy. The only advantage of this method is + /// that it enables more aggressive compiler optimizations. + #[inline] + #[allow(clippy::should_implement_trait)] + pub unsafe fn sub(self, count: $usize_ty) -> Self { + let x: $isize_ty = count.cast(); + // note: - is currently wrapping_neg + self.offset(-x) + } + + /// Calculates the offset from a pointer using wrapping arithmetic. + /// (convenience for `.wrapping_offset(count as isize)`) + /// + /// `count` is in units of T; e.g. a `count` of 3 represents a + /// pointer offset of `3 * size_of::()` bytes. + /// + /// # Safety + /// + /// The resulting pointer does not need to be in bounds, but it is + /// potentially hazardous to dereference (which requires `unsafe`). + /// + /// Always use `.add(count)` instead when possible, because `add` + /// allows the compiler to optimize better. + #[inline] + pub fn wrapping_add(self, count: $usize_ty) -> Self { + self.wrapping_offset(count.cast()) + } + + /// Calculates the offset from a pointer using wrapping arithmetic. + /// (convenience for `.wrapping_offset((count as + /// isize).wrapping_sub())`) + /// + /// `count` is in units of T; e.g. a `count` of 3 represents a + /// pointer offset of `3 * size_of::()` bytes. + /// + /// # Safety + /// + /// The resulting pointer does not need to be in bounds, but it is + /// potentially hazardous to dereference (which requires `unsafe`). + /// + /// Always use `.sub(count)` instead when possible, because `sub` + /// allows the compiler to optimize better. + #[inline] + pub fn wrapping_sub(self, count: $usize_ty) -> Self { + let x: $isize_ty = count.cast(); + self.wrapping_offset(-1 * x) + } + } + + impl $id { + /// Shuffle vector elements according to `indices`. + #[inline] + pub fn shuffle1_dyn(self, indices: I) -> Self + where + Self: codegen::shuffle1_dyn::Shuffle1Dyn, + { + codegen::shuffle1_dyn::Shuffle1Dyn::shuffle1_dyn(self, indices) + } + } + + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _shuffle1_dyn>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn shuffle1_dyn() { + let (null, non_null) = ptr_vals!($id); + + // alternating = [non_null, null, non_null, null, ...] + let mut alternating = $id::::splat(null); + for i in 0..$id::::lanes() { + if i % 2 == 0 { + alternating = alternating.replace(i, non_null); + } + } + + type Indices = <$id + as codegen::shuffle1_dyn::Shuffle1Dyn>::Indices; + // even = [0, 0, 2, 2, 4, 4, ..] + let even = { + let mut v = Indices::splat(0); + for i in 0..$id::::lanes() { + if i % 2 == 0 { + v = v.replace(i, (i as u8).into()); + } else { + v = v.replace(i, (i as u8 - 1).into()); + } + } + v + }; + // odd = [1, 1, 3, 3, 5, 5, ...] + let odd = { + let mut v = Indices::splat(0); + for i in 0..$id::::lanes() { + if i % 2 != 0 { + v = v.replace(i, (i as u8).into()); + } else { + v = v.replace(i, (i as u8 + 1).into()); + } + } + v + }; + + assert_eq!( + alternating.shuffle1_dyn(even), + $id::::splat(non_null) + ); + if $id::::lanes() > 1 { + assert_eq!( + alternating.shuffle1_dyn(odd), + $id::::splat(null) + ); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ops.rs b/third_party/rust/packed_simd/src/api/ops.rs new file mode 100644 index 000000000000..f71c98795da3 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops.rs @@ -0,0 +1,32 @@ +//! Implementation of the `ops` traits +#[macro_use] +mod vector_mask_bitwise; +#[macro_use] +mod scalar_mask_bitwise; + +#[macro_use] +mod vector_arithmetic; +#[macro_use] +mod scalar_arithmetic; + +#[macro_use] +mod vector_bitwise; +#[macro_use] +mod scalar_bitwise; + +#[macro_use] +mod vector_shifts; +#[macro_use] +mod scalar_shifts; + +#[macro_use] +mod vector_rotates; + +#[macro_use] +mod vector_neg; + +#[macro_use] +mod vector_int_min_max; + +#[macro_use] +mod vector_float_min_max; diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs b/third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs new file mode 100644 index 000000000000..da1a2037eaaf --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs @@ -0,0 +1,203 @@ +//! Vertical (lane-wise) vector-scalar / scalar-vector arithmetic operations. + +macro_rules! impl_ops_scalar_arithmetic { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl crate::ops::Add<$elem_ty> for $id { + type Output = Self; + #[inline] + fn add(self, other: $elem_ty) -> Self { + self + $id::splat(other) + } + } + impl crate::ops::Add<$id> for $elem_ty { + type Output = $id; + #[inline] + fn add(self, other: $id) -> $id { + $id::splat(self) + other + } + } + + impl crate::ops::Sub<$elem_ty> for $id { + type Output = Self; + #[inline] + fn sub(self, other: $elem_ty) -> Self { + self - $id::splat(other) + } + } + impl crate::ops::Sub<$id> for $elem_ty { + type Output = $id; + #[inline] + fn sub(self, other: $id) -> $id { + $id::splat(self) - other + } + } + + impl crate::ops::Mul<$elem_ty> for $id { + type Output = Self; + #[inline] + fn mul(self, other: $elem_ty) -> Self { + self * $id::splat(other) + } + } + impl crate::ops::Mul<$id> for $elem_ty { + type Output = $id; + #[inline] + fn mul(self, other: $id) -> $id { + $id::splat(self) * other + } + } + + impl crate::ops::Div<$elem_ty> for $id { + type Output = Self; + #[inline] + fn div(self, other: $elem_ty) -> Self { + self / $id::splat(other) + } + } + impl crate::ops::Div<$id> for $elem_ty { + type Output = $id; + #[inline] + fn div(self, other: $id) -> $id { + $id::splat(self) / other + } + } + + impl crate::ops::Rem<$elem_ty> for $id { + type Output = Self; + #[inline] + fn rem(self, other: $elem_ty) -> Self { + self % $id::splat(other) + } + } + impl crate::ops::Rem<$id> for $elem_ty { + type Output = $id; + #[inline] + fn rem(self, other: $id) -> $id { + $id::splat(self) % other + } + } + + impl crate::ops::AddAssign<$elem_ty> for $id { + #[inline] + fn add_assign(&mut self, other: $elem_ty) { + *self = *self + other; + } + } + + impl crate::ops::SubAssign<$elem_ty> for $id { + #[inline] + fn sub_assign(&mut self, other: $elem_ty) { + *self = *self - other; + } + } + + impl crate::ops::MulAssign<$elem_ty> for $id { + #[inline] + fn mul_assign(&mut self, other: $elem_ty) { + *self = *self * other; + } + } + + impl crate::ops::DivAssign<$elem_ty> for $id { + #[inline] + fn div_assign(&mut self, other: $elem_ty) { + *self = *self / other; + } + } + + impl crate::ops::RemAssign<$elem_ty> for $id { + #[inline] + fn rem_assign(&mut self, other: $elem_ty) { + *self = *self % other; + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _ops_scalar_arith>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn ops_scalar_arithmetic() { + let zi = 0 as $elem_ty; + let oi = 1 as $elem_ty; + let ti = 2 as $elem_ty; + let fi = 4 as $elem_ty; + let z = $id::splat(zi); + let o = $id::splat(oi); + let t = $id::splat(ti); + let f = $id::splat(fi); + + // add + assert_eq!(zi + z, z); + assert_eq!(z + zi, z); + assert_eq!(oi + z, o); + assert_eq!(o + zi, o); + assert_eq!(ti + z, t); + assert_eq!(t + zi, t); + assert_eq!(ti + t, f); + assert_eq!(t + ti, f); + // sub + assert_eq!(zi - z, z); + assert_eq!(z - zi, z); + assert_eq!(oi - z, o); + assert_eq!(o - zi, o); + assert_eq!(ti - z, t); + assert_eq!(t - zi, t); + assert_eq!(fi - t, t); + assert_eq!(f - ti, t); + assert_eq!(f - o - o, t); + assert_eq!(f - oi - oi, t); + // mul + assert_eq!(zi * z, z); + assert_eq!(z * zi, z); + assert_eq!(zi * o, z); + assert_eq!(z * oi, z); + assert_eq!(zi * t, z); + assert_eq!(z * ti, z); + assert_eq!(oi * t, t); + assert_eq!(o * ti, t); + assert_eq!(ti * t, f); + assert_eq!(t * ti, f); + // div + assert_eq!(zi / o, z); + assert_eq!(z / oi, z); + assert_eq!(ti / o, t); + assert_eq!(t / oi, t); + assert_eq!(fi / o, f); + assert_eq!(f / oi, f); + assert_eq!(ti / t, o); + assert_eq!(t / ti, o); + assert_eq!(fi / t, t); + assert_eq!(f / ti, t); + // rem + assert_eq!(oi % o, z); + assert_eq!(o % oi, z); + assert_eq!(fi % t, z); + assert_eq!(f % ti, z); + + { + let mut v = z; + assert_eq!(v, z); + v += oi; // add_assign + assert_eq!(v, o); + v -= oi; // sub_assign + assert_eq!(v, z); + v = t; + v *= oi; // mul_assign + assert_eq!(v, t); + v *= ti; + assert_eq!(v, f); + v /= oi; // div_assign + assert_eq!(v, f); + v /= ti; + assert_eq!(v, t); + v %= ti; // rem_assign + assert_eq!(v, z); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs new file mode 100644 index 000000000000..88216769aec4 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs @@ -0,0 +1,162 @@ +//! Vertical (lane-wise) vector-scalar / scalar-vector bitwise operations. + +macro_rules! impl_ops_scalar_bitwise { + ( + [$elem_ty:ident; $elem_count:expr]: + $id:ident | $test_tt:tt | + ($true:expr, $false:expr) + ) => { + impl crate::ops::BitXor<$elem_ty> for $id { + type Output = Self; + #[inline] + fn bitxor(self, other: $elem_ty) -> Self { + self ^ $id::splat(other) + } + } + impl crate::ops::BitXor<$id> for $elem_ty { + type Output = $id; + #[inline] + fn bitxor(self, other: $id) -> $id { + $id::splat(self) ^ other + } + } + + impl crate::ops::BitAnd<$elem_ty> for $id { + type Output = Self; + #[inline] + fn bitand(self, other: $elem_ty) -> Self { + self & $id::splat(other) + } + } + impl crate::ops::BitAnd<$id> for $elem_ty { + type Output = $id; + #[inline] + fn bitand(self, other: $id) -> $id { + $id::splat(self) & other + } + } + + impl crate::ops::BitOr<$elem_ty> for $id { + type Output = Self; + #[inline] + fn bitor(self, other: $elem_ty) -> Self { + self | $id::splat(other) + } + } + impl crate::ops::BitOr<$id> for $elem_ty { + type Output = $id; + #[inline] + fn bitor(self, other: $id) -> $id { + $id::splat(self) | other + } + } + + impl crate::ops::BitAndAssign<$elem_ty> for $id { + #[inline] + fn bitand_assign(&mut self, other: $elem_ty) { + *self = *self & other; + } + } + impl crate::ops::BitOrAssign<$elem_ty> for $id { + #[inline] + fn bitor_assign(&mut self, other: $elem_ty) { + *self = *self | other; + } + } + impl crate::ops::BitXorAssign<$elem_ty> for $id { + #[inline] + fn bitxor_assign(&mut self, other: $elem_ty) { + *self = *self ^ other; + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _ops_scalar_bitwise>] { + use super::*; + + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn ops_scalar_bitwise() { + let zi = 0 as $elem_ty; + let oi = 1 as $elem_ty; + let ti = 2 as $elem_ty; + let z = $id::splat(zi); + let o = $id::splat(oi); + let t = $id::splat(ti); + + // BitAnd: + assert_eq!(oi & o, o); + assert_eq!(o & oi, o); + assert_eq!(oi & z, z); + assert_eq!(o & zi, z); + assert_eq!(zi & o, z); + assert_eq!(z & oi, z); + assert_eq!(zi & z, z); + assert_eq!(z & zi, z); + + assert_eq!(ti & t, t); + assert_eq!(t & ti, t); + assert_eq!(ti & o, z); + assert_eq!(t & oi, z); + assert_eq!(oi & t, z); + assert_eq!(o & ti, z); + + // BitOr: + assert_eq!(oi | o, o); + assert_eq!(o | oi, o); + assert_eq!(oi | z, o); + assert_eq!(o | zi, o); + assert_eq!(zi | o, o); + assert_eq!(z | oi, o); + assert_eq!(zi | z, z); + assert_eq!(z | zi, z); + + assert_eq!(ti | t, t); + assert_eq!(t | ti, t); + assert_eq!(zi | t, t); + assert_eq!(z | ti, t); + assert_eq!(ti | z, t); + assert_eq!(t | zi, t); + + // BitXOR: + assert_eq!(oi ^ o, z); + assert_eq!(o ^ oi, z); + assert_eq!(zi ^ z, z); + assert_eq!(z ^ zi, z); + assert_eq!(zi ^ o, o); + assert_eq!(z ^ oi, o); + assert_eq!(oi ^ z, o); + assert_eq!(o ^ zi, o); + + assert_eq!(ti ^ t, z); + assert_eq!(t ^ ti, z); + assert_eq!(ti ^ z, t); + assert_eq!(t ^ zi, t); + assert_eq!(zi ^ t, t); + assert_eq!(z ^ ti, t); + + { + // AndAssign: + let mut v = o; + v &= ti; + assert_eq!(v, z); + } + { + // OrAssign: + let mut v = z; + v |= oi; + assert_eq!(v, o); + } + { + // XORAssign: + let mut v = z; + v ^= oi; + assert_eq!(v, o); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs new file mode 100644 index 000000000000..523a85207b6b --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs @@ -0,0 +1,140 @@ +//! Vertical (lane-wise) vector-vector bitwise operations. + +macro_rules! impl_ops_scalar_mask_bitwise { + ( + [$elem_ty:ident; $elem_count:expr]: + $id:ident | $test_tt:tt | + ($true:expr, $false:expr) + ) => { + impl crate::ops::BitXor for $id { + type Output = Self; + #[inline] + fn bitxor(self, other: bool) -> Self { + self ^ $id::splat(other) + } + } + impl crate::ops::BitXor<$id> for bool { + type Output = $id; + #[inline] + fn bitxor(self, other: $id) -> $id { + $id::splat(self) ^ other + } + } + + impl crate::ops::BitAnd for $id { + type Output = Self; + #[inline] + fn bitand(self, other: bool) -> Self { + self & $id::splat(other) + } + } + impl crate::ops::BitAnd<$id> for bool { + type Output = $id; + #[inline] + fn bitand(self, other: $id) -> $id { + $id::splat(self) & other + } + } + + impl crate::ops::BitOr for $id { + type Output = Self; + #[inline] + fn bitor(self, other: bool) -> Self { + self | $id::splat(other) + } + } + impl crate::ops::BitOr<$id> for bool { + type Output = $id; + #[inline] + fn bitor(self, other: $id) -> $id { + $id::splat(self) | other + } + } + + impl crate::ops::BitAndAssign for $id { + #[inline] + fn bitand_assign(&mut self, other: bool) { + *self = *self & other; + } + } + impl crate::ops::BitOrAssign for $id { + #[inline] + fn bitor_assign(&mut self, other: bool) { + *self = *self | other; + } + } + impl crate::ops::BitXorAssign for $id { + #[inline] + fn bitxor_assign(&mut self, other: bool) { + *self = *self ^ other; + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _ops_scalar_mask_bitwise>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn ops_scalar_mask_bitwise() { + let ti = true; + let fi = false; + let t = $id::splat(ti); + let f = $id::splat(fi); + assert!(t != f); + assert!(!(t == f)); + + // BitAnd: + assert_eq!(ti & f, f); + assert_eq!(t & fi, f); + assert_eq!(fi & t, f); + assert_eq!(f & ti, f); + assert_eq!(ti & t, t); + assert_eq!(t & ti, t); + assert_eq!(fi & f, f); + assert_eq!(f & fi, f); + + // BitOr: + assert_eq!(ti | f, t); + assert_eq!(t | fi, t); + assert_eq!(fi | t, t); + assert_eq!(f | ti, t); + assert_eq!(ti | t, t); + assert_eq!(t | ti, t); + assert_eq!(fi | f, f); + assert_eq!(f | fi, f); + + // BitXOR: + assert_eq!(ti ^ f, t); + assert_eq!(t ^ fi, t); + assert_eq!(fi ^ t, t); + assert_eq!(f ^ ti, t); + assert_eq!(ti ^ t, f); + assert_eq!(t ^ ti, f); + assert_eq!(fi ^ f, f); + assert_eq!(f ^ fi, f); + + { + // AndAssign: + let mut v = f; + v &= ti; + assert_eq!(v, f); + } + { + // OrAssign: + let mut v = f; + v |= ti; + assert_eq!(v, t); + } + { + // XORAssign: + let mut v = f; + v ^= ti; + assert_eq!(v, t); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs b/third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs new file mode 100644 index 000000000000..9c164ad56c0b --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs @@ -0,0 +1,107 @@ +//! Vertical (lane-wise) vector-scalar shifts operations. + +macro_rules! impl_ops_scalar_shifts { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl crate::ops::Shl for $id { + type Output = Self; + #[inline] + fn shl(self, other: u32) -> Self { + self << $id::splat(other as $elem_ty) + } + } + impl crate::ops::Shr for $id { + type Output = Self; + #[inline] + fn shr(self, other: u32) -> Self { + self >> $id::splat(other as $elem_ty) + } + } + + impl crate::ops::ShlAssign for $id { + #[inline] + fn shl_assign(&mut self, other: u32) { + *self = *self << other; + } + } + impl crate::ops::ShrAssign for $id { + #[inline] + fn shr_assign(&mut self, other: u32) { + *self = *self >> other; + } + } + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _ops_scalar_shifts>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg_attr(any(target_arch = "s390x", target_arch = "sparc64"), + allow(unreachable_code, + unused_variables, + unused_mut) + )] + // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 + fn ops_scalar_shifts() { + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let t = $id::splat(2 as $elem_ty); + let f = $id::splat(4 as $elem_ty); + + { + let zi = 0 as u32; + let oi = 1 as u32; + let ti = 2 as u32; + let maxi + = (mem::size_of::<$elem_ty>() * 8 - 1) as u32; + + // shr + assert_eq!(z >> zi, z); + assert_eq!(z >> oi, z); + assert_eq!(z >> ti, z); + assert_eq!(z >> ti, z); + + #[cfg(any(target_arch = "s390x", target_arch = "sparc64"))] { + // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/13 + return; + } + + assert_eq!(o >> zi, o); + assert_eq!(t >> zi, t); + assert_eq!(f >> zi, f); + assert_eq!(f >> maxi, z); + + assert_eq!(o >> oi, z); + assert_eq!(t >> oi, o); + assert_eq!(t >> ti, z); + assert_eq!(f >> oi, t); + assert_eq!(f >> ti, o); + assert_eq!(f >> maxi, z); + + // shl + assert_eq!(z << zi, z); + assert_eq!(o << zi, o); + assert_eq!(t << zi, t); + assert_eq!(f << zi, f); + assert_eq!(f << maxi, z); + + assert_eq!(o << oi, t); + assert_eq!(o << ti, f); + assert_eq!(t << oi, f); + + { // shr_assign + let mut v = o; + v >>= oi; + assert_eq!(v, z); + } + { // shl_assign + let mut v = o; + v <<= oi; + assert_eq!(v, t); + } + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs b/third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs new file mode 100644 index 000000000000..7057f52d0317 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs @@ -0,0 +1,148 @@ +//! Vertical (lane-wise) vector-vector arithmetic operations. + +macro_rules! impl_ops_vector_arithmetic { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl crate::ops::Add for $id { + type Output = Self; + #[inline] + fn add(self, other: Self) -> Self { + use crate::llvm::simd_add; + unsafe { Simd(simd_add(self.0, other.0)) } + } + } + + impl crate::ops::Sub for $id { + type Output = Self; + #[inline] + fn sub(self, other: Self) -> Self { + use crate::llvm::simd_sub; + unsafe { Simd(simd_sub(self.0, other.0)) } + } + } + + impl crate::ops::Mul for $id { + type Output = Self; + #[inline] + fn mul(self, other: Self) -> Self { + use crate::llvm::simd_mul; + unsafe { Simd(simd_mul(self.0, other.0)) } + } + } + + impl crate::ops::Div for $id { + type Output = Self; + #[inline] + fn div(self, other: Self) -> Self { + use crate::llvm::simd_div; + unsafe { Simd(simd_div(self.0, other.0)) } + } + } + + impl crate::ops::Rem for $id { + type Output = Self; + #[inline] + fn rem(self, other: Self) -> Self { + use crate::llvm::simd_rem; + unsafe { Simd(simd_rem(self.0, other.0)) } + } + } + + impl crate::ops::AddAssign for $id { + #[inline] + fn add_assign(&mut self, other: Self) { + *self = *self + other; + } + } + + impl crate::ops::SubAssign for $id { + #[inline] + fn sub_assign(&mut self, other: Self) { + *self = *self - other; + } + } + + impl crate::ops::MulAssign for $id { + #[inline] + fn mul_assign(&mut self, other: Self) { + *self = *self * other; + } + } + + impl crate::ops::DivAssign for $id { + #[inline] + fn div_assign(&mut self, other: Self) { + *self = *self / other; + } + } + + impl crate::ops::RemAssign for $id { + #[inline] + fn rem_assign(&mut self, other: Self) { + *self = *self % other; + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _ops_vector_arith>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn ops_vector_arithmetic() { + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let t = $id::splat(2 as $elem_ty); + let f = $id::splat(4 as $elem_ty); + + // add + assert_eq!(z + z, z); + assert_eq!(o + z, o); + assert_eq!(t + z, t); + assert_eq!(t + t, f); + // sub + assert_eq!(z - z, z); + assert_eq!(o - z, o); + assert_eq!(t - z, t); + assert_eq!(f - t, t); + assert_eq!(f - o - o, t); + // mul + assert_eq!(z * z, z); + assert_eq!(z * o, z); + assert_eq!(z * t, z); + assert_eq!(o * t, t); + assert_eq!(t * t, f); + // div + assert_eq!(z / o, z); + assert_eq!(t / o, t); + assert_eq!(f / o, f); + assert_eq!(t / t, o); + assert_eq!(f / t, t); + // rem + assert_eq!(o % o, z); + assert_eq!(f % t, z); + + { + let mut v = z; + assert_eq!(v, z); + v += o; // add_assign + assert_eq!(v, o); + v -= o; // sub_assign + assert_eq!(v, z); + v = t; + v *= o; // mul_assign + assert_eq!(v, t); + v *= t; + assert_eq!(v, f); + v /= o; // div_assign + assert_eq!(v, f); + v /= t; + assert_eq!(v, t); + v %= t; // rem_assign + assert_eq!(v, z); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs new file mode 100644 index 000000000000..7be9603fa261 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs @@ -0,0 +1,129 @@ +//! Vertical (lane-wise) vector-vector bitwise operations. + +macro_rules! impl_ops_vector_bitwise { + ( + [$elem_ty:ident; $elem_count:expr]: + $id:ident | $test_tt:tt | + ($true:expr, $false:expr) + ) => { + impl crate::ops::Not for $id { + type Output = Self; + #[inline] + fn not(self) -> Self { + Self::splat($true) ^ self + } + } + impl crate::ops::BitXor for $id { + type Output = Self; + #[inline] + fn bitxor(self, other: Self) -> Self { + use crate::llvm::simd_xor; + unsafe { Simd(simd_xor(self.0, other.0)) } + } + } + impl crate::ops::BitAnd for $id { + type Output = Self; + #[inline] + fn bitand(self, other: Self) -> Self { + use crate::llvm::simd_and; + unsafe { Simd(simd_and(self.0, other.0)) } + } + } + impl crate::ops::BitOr for $id { + type Output = Self; + #[inline] + fn bitor(self, other: Self) -> Self { + use crate::llvm::simd_or; + unsafe { Simd(simd_or(self.0, other.0)) } + } + } + impl crate::ops::BitAndAssign for $id { + #[inline] + fn bitand_assign(&mut self, other: Self) { + *self = *self & other; + } + } + impl crate::ops::BitOrAssign for $id { + #[inline] + fn bitor_assign(&mut self, other: Self) { + *self = *self | other; + } + } + impl crate::ops::BitXorAssign for $id { + #[inline] + fn bitxor_assign(&mut self, other: Self) { + *self = *self ^ other; + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _ops_vector_bitwise>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn ops_vector_bitwise() { + + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let t = $id::splat(2 as $elem_ty); + let m = $id::splat(!z.extract(0)); + + // Not: + assert_eq!(!z, m); + assert_eq!(!m, z); + + // BitAnd: + assert_eq!(o & o, o); + assert_eq!(o & z, z); + assert_eq!(z & o, z); + assert_eq!(z & z, z); + + assert_eq!(t & t, t); + assert_eq!(t & o, z); + assert_eq!(o & t, z); + + // BitOr: + assert_eq!(o | o, o); + assert_eq!(o | z, o); + assert_eq!(z | o, o); + assert_eq!(z | z, z); + + assert_eq!(t | t, t); + assert_eq!(z | t, t); + assert_eq!(t | z, t); + + // BitXOR: + assert_eq!(o ^ o, z); + assert_eq!(z ^ z, z); + assert_eq!(z ^ o, o); + assert_eq!(o ^ z, o); + + assert_eq!(t ^ t, z); + assert_eq!(t ^ z, t); + assert_eq!(z ^ t, t); + + { + // AndAssign: + let mut v = o; + v &= t; + assert_eq!(v, z); + } + { + // OrAssign: + let mut v = z; + v |= o; + assert_eq!(v, o); + } + { + // XORAssign: + let mut v = z; + v ^= o; + assert_eq!(v, o); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs b/third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs new file mode 100644 index 000000000000..4126e87042f5 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs @@ -0,0 +1,69 @@ +//! Vertical (lane-wise) vector `min` and `max` for floating-point vectors. + +macro_rules! impl_ops_vector_float_min_max { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Minimum of two vectors. + /// + /// Returns a new vector containing the minimum value of each of + /// the input vector lanes. + #[inline] + pub fn min(self, x: Self) -> Self { + use crate::llvm::simd_fmin; + unsafe { Simd(simd_fmin(self.0, x.0)) } + } + + /// Maximum of two vectors. + /// + /// Returns a new vector containing the maximum value of each of + /// the input vector lanes. + #[inline] + pub fn max(self, x: Self) -> Self { + use crate::llvm::simd_fmax; + unsafe { Simd(simd_fmax(self.0, x.0)) } + } + } + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _ops_vector_min_max>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn min_max() { + let n = crate::$elem_ty::NAN; + let o = $id::splat(1. as $elem_ty); + let t = $id::splat(2. as $elem_ty); + + let mut m = o; // [1., 2., 1., 2., ...] + let mut on = o; + for i in 0..$id::lanes() { + if i % 2 == 0 { + m = m.replace(i, 2. as $elem_ty); + on = on.replace(i, n); + } + } + + assert_eq!(o.min(t), o); + assert_eq!(t.min(o), o); + assert_eq!(m.min(o), o); + assert_eq!(o.min(m), o); + assert_eq!(m.min(t), m); + assert_eq!(t.min(m), m); + + assert_eq!(o.max(t), t); + assert_eq!(t.max(o), t); + assert_eq!(m.max(o), m); + assert_eq!(o.max(m), m); + assert_eq!(m.max(t), t); + assert_eq!(t.max(m), t); + + assert_eq!(on.min(o), o); + assert_eq!(o.min(on), o); + assert_eq!(on.max(o), o); + assert_eq!(o.max(on), o); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs b/third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs new file mode 100644 index 000000000000..36ea98e6bf32 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs @@ -0,0 +1,57 @@ +//! Vertical (lane-wise) vector `min` and `max` for integer vectors. + +macro_rules! impl_ops_vector_int_min_max { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Minimum of two vectors. + /// + /// Returns a new vector containing the minimum value of each of + /// the input vector lanes. + #[inline] + pub fn min(self, x: Self) -> Self { + self.lt(x).select(self, x) + } + + /// Maximum of two vectors. + /// + /// Returns a new vector containing the maximum value of each of + /// the input vector lanes. + #[inline] + pub fn max(self, x: Self) -> Self { + self.gt(x).select(self, x) + } + } + test_if!{$test_tt: + paste::item! { + pub mod [<$id _ops_vector_min_max>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn min_max() { + let o = $id::splat(1 as $elem_ty); + let t = $id::splat(2 as $elem_ty); + + let mut m = o; + for i in 0..$id::lanes() { + if i % 2 == 0 { + m = m.replace(i, 2 as $elem_ty); + } + } + assert_eq!(o.min(t), o); + assert_eq!(t.min(o), o); + assert_eq!(m.min(o), o); + assert_eq!(o.min(m), o); + assert_eq!(m.min(t), m); + assert_eq!(t.min(m), m); + + assert_eq!(o.max(t), t); + assert_eq!(t.max(o), t); + assert_eq!(m.max(o), m); + assert_eq!(o.max(m), m); + assert_eq!(m.max(t), t); + assert_eq!(t.max(m), t); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs new file mode 100644 index 000000000000..295fc1ca81c9 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs @@ -0,0 +1,116 @@ +//! Vertical (lane-wise) vector-vector bitwise operations. + +macro_rules! impl_ops_vector_mask_bitwise { + ( + [$elem_ty:ident; $elem_count:expr]: + $id:ident | $test_tt:tt | + ($true:expr, $false:expr) + ) => { + impl crate::ops::Not for $id { + type Output = Self; + #[inline] + fn not(self) -> Self { + Self::splat($true) ^ self + } + } + impl crate::ops::BitXor for $id { + type Output = Self; + #[inline] + fn bitxor(self, other: Self) -> Self { + use crate::llvm::simd_xor; + unsafe { Simd(simd_xor(self.0, other.0)) } + } + } + impl crate::ops::BitAnd for $id { + type Output = Self; + #[inline] + fn bitand(self, other: Self) -> Self { + use crate::llvm::simd_and; + unsafe { Simd(simd_and(self.0, other.0)) } + } + } + impl crate::ops::BitOr for $id { + type Output = Self; + #[inline] + fn bitor(self, other: Self) -> Self { + use crate::llvm::simd_or; + unsafe { Simd(simd_or(self.0, other.0)) } + } + } + impl crate::ops::BitAndAssign for $id { + #[inline] + fn bitand_assign(&mut self, other: Self) { + *self = *self & other; + } + } + impl crate::ops::BitOrAssign for $id { + #[inline] + fn bitor_assign(&mut self, other: Self) { + *self = *self | other; + } + } + impl crate::ops::BitXorAssign for $id { + #[inline] + fn bitxor_assign(&mut self, other: Self) { + *self = *self ^ other; + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _ops_vector_mask_bitwise>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn ops_vector_mask_bitwise() { + let t = $id::splat(true); + let f = $id::splat(false); + assert!(t != f); + assert!(!(t == f)); + + // Not: + assert_eq!(!t, f); + assert_eq!(t, !f); + + // BitAnd: + assert_eq!(t & f, f); + assert_eq!(f & t, f); + assert_eq!(t & t, t); + assert_eq!(f & f, f); + + // BitOr: + assert_eq!(t | f, t); + assert_eq!(f | t, t); + assert_eq!(t | t, t); + assert_eq!(f | f, f); + + // BitXOR: + assert_eq!(t ^ f, t); + assert_eq!(f ^ t, t); + assert_eq!(t ^ t, f); + assert_eq!(f ^ f, f); + + { + // AndAssign: + let mut v = f; + v &= t; + assert_eq!(v, f); + } + { + // OrAssign: + let mut v = f; + v |= t; + assert_eq!(v, t); + } + { + // XORAssign: + let mut v = f; + v ^= t; + assert_eq!(v, t); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ops/vector_neg.rs b/third_party/rust/packed_simd/src/api/ops/vector_neg.rs new file mode 100644 index 000000000000..e2d91fd2fed6 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops/vector_neg.rs @@ -0,0 +1,43 @@ +//! Vertical (lane-wise) vector `Neg`. + +macro_rules! impl_ops_vector_neg { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl crate::ops::Neg for $id { + type Output = Self; + #[inline] + fn neg(self) -> Self { + Self::splat(-1 as $elem_ty) * self + } + } + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _ops_vector_neg>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn neg() { + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let t = $id::splat(2 as $elem_ty); + let f = $id::splat(4 as $elem_ty); + + let nz = $id::splat(-(0 as $elem_ty)); + let no = $id::splat(-(1 as $elem_ty)); + let nt = $id::splat(-(2 as $elem_ty)); + let nf = $id::splat(-(4 as $elem_ty)); + + assert_eq!(-z, nz); + assert_eq!(-o, no); + assert_eq!(-t, nt); + assert_eq!(-f, nf); + + assert_eq!(z, -nz); + assert_eq!(o, -no); + assert_eq!(t, -nt); + assert_eq!(f, -nf); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ops/vector_rotates.rs b/third_party/rust/packed_simd/src/api/ops/vector_rotates.rs new file mode 100644 index 000000000000..6c794ecf4b93 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops/vector_rotates.rs @@ -0,0 +1,90 @@ +//! Vertical (lane-wise) vector rotates operations. +#![allow(unused)] + +macro_rules! impl_ops_vector_rotates { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Shifts the bits of each lane to the left by the specified + /// amount in the corresponding lane of `n`, wrapping the + /// truncated bits to the end of the resulting integer. + /// + /// Note: this is neither the same operation as `<<` nor equivalent + /// to `slice::rotate_left`. + #[inline] + pub fn rotate_left(self, n: $id) -> $id { + const LANE_WIDTH: $elem_ty = + crate::mem::size_of::<$elem_ty>() as $elem_ty * 8; + // Protect against undefined behavior for over-long bit shifts + let n = n % LANE_WIDTH; + (self << n) | (self >> ((LANE_WIDTH - n) % LANE_WIDTH)) + } + + /// Shifts the bits of each lane to the right by the specified + /// amount in the corresponding lane of `n`, wrapping the + /// truncated bits to the beginning of the resulting integer. + /// + /// Note: this is neither the same operation as `<<` nor equivalent + /// to `slice::rotate_left`. + #[inline] + pub fn rotate_right(self, n: $id) -> $id { + const LANE_WIDTH: $elem_ty = + crate::mem::size_of::<$elem_ty>() as $elem_ty * 8; + // Protect against undefined behavior for over-long bit shifts + let n = n % LANE_WIDTH; + (self >> n) | (self << ((LANE_WIDTH - n) % LANE_WIDTH)) + } + } + + test_if!{ + $test_tt: + paste::item! { + // FIXME: + // https://github.com/rust-lang-nursery/packed_simd/issues/75 + #[cfg(not(any( + target_arch = "s390x", + target_arch = "sparc64", + )))] + pub mod [<$id _ops_vector_rotate>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn rotate_ops() { + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let t = $id::splat(2 as $elem_ty); + let f = $id::splat(4 as $elem_ty); + + let max = $id::splat( + (mem::size_of::<$elem_ty>() * 8 - 1) as $elem_ty); + + // rotate_right + assert_eq!(z.rotate_right(z), z); + assert_eq!(z.rotate_right(o), z); + assert_eq!(z.rotate_right(t), z); + + assert_eq!(o.rotate_right(z), o); + assert_eq!(t.rotate_right(z), t); + assert_eq!(f.rotate_right(z), f); + assert_eq!(f.rotate_right(max), f << 1); + + assert_eq!(o.rotate_right(o), o << max); + assert_eq!(t.rotate_right(o), o); + assert_eq!(t.rotate_right(t), o << max); + assert_eq!(f.rotate_right(o), t); + assert_eq!(f.rotate_right(t), o); + + // rotate_left + assert_eq!(z.rotate_left(z), z); + assert_eq!(o.rotate_left(z), o); + assert_eq!(t.rotate_left(z), t); + assert_eq!(f.rotate_left(z), f); + assert_eq!(f.rotate_left(max), t); + + assert_eq!(o.rotate_left(o), t); + assert_eq!(o.rotate_left(t), f); + assert_eq!(t.rotate_left(o), f); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ops/vector_shifts.rs b/third_party/rust/packed_simd/src/api/ops/vector_shifts.rs new file mode 100644 index 000000000000..22e1fbc0ec76 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops/vector_shifts.rs @@ -0,0 +1,107 @@ +//! Vertical (lane-wise) vector-vector shifts operations. + +macro_rules! impl_ops_vector_shifts { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl crate::ops::Shl<$id> for $id { + type Output = Self; + #[inline] + fn shl(self, other: Self) -> Self { + use crate::llvm::simd_shl; + unsafe { Simd(simd_shl(self.0, other.0)) } + } + } + impl crate::ops::Shr<$id> for $id { + type Output = Self; + #[inline] + fn shr(self, other: Self) -> Self { + use crate::llvm::simd_shr; + unsafe { Simd(simd_shr(self.0, other.0)) } + } + } + impl crate::ops::ShlAssign<$id> for $id { + #[inline] + fn shl_assign(&mut self, other: Self) { + *self = *self << other; + } + } + impl crate::ops::ShrAssign<$id> for $id { + #[inline] + fn shr_assign(&mut self, other: Self) { + *self = *self >> other; + } + } + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _ops_vector_shifts>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg_attr(any(target_arch = "s390x", target_arch = "sparc64"), + allow(unreachable_code, + unused_variables, + unused_mut) + )] + // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 + fn ops_vector_shifts() { + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let t = $id::splat(2 as $elem_ty); + let f = $id::splat(4 as $elem_ty); + + let max =$id::splat( + (mem::size_of::<$elem_ty>() * 8 - 1) as $elem_ty + ); + + // shr + assert_eq!(z >> z, z); + assert_eq!(z >> o, z); + assert_eq!(z >> t, z); + assert_eq!(z >> t, z); + + #[cfg(any(target_arch = "s390x", target_arch = "sparc64"))] { + // FIXME: rust produces bad codegen for shifts: + // https://github.com/rust-lang-nursery/packed_simd/issues/13 + return; + } + + assert_eq!(o >> z, o); + assert_eq!(t >> z, t); + assert_eq!(f >> z, f); + assert_eq!(f >> max, z); + + assert_eq!(o >> o, z); + assert_eq!(t >> o, o); + assert_eq!(t >> t, z); + assert_eq!(f >> o, t); + assert_eq!(f >> t, o); + assert_eq!(f >> max, z); + + // shl + assert_eq!(z << z, z); + assert_eq!(o << z, o); + assert_eq!(t << z, t); + assert_eq!(f << z, f); + assert_eq!(f << max, z); + + assert_eq!(o << o, t); + assert_eq!(o << t, f); + assert_eq!(t << o, f); + + { + // shr_assign + let mut v = o; + v >>= o; + assert_eq!(v, z); + } + { + // shl_assign + let mut v = o; + v <<= o; + assert_eq!(v, t); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ptr.rs b/third_party/rust/packed_simd/src/api/ptr.rs new file mode 100644 index 000000000000..d2e523a49faf --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ptr.rs @@ -0,0 +1,4 @@ +//! Vector of pointers + +#[macro_use] +mod gather_scatter; diff --git a/third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs b/third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs new file mode 100644 index 000000000000..9d8e113bb44f --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs @@ -0,0 +1,241 @@ +//! Implements masked gather and scatters for vectors of pointers + +macro_rules! impl_ptr_read { + ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident + | $test_tt:tt) => { + impl $id + where + [T; $elem_count]: sealed::SimdArray, + { + /// Reads selected vector elements from memory. + /// + /// Instantiates a new vector by reading the values from `self` for + /// those lanes whose `mask` is `true`, and using the elements of + /// `value` otherwise. + /// + /// No memory is accessed for those lanes of `self` whose `mask` is + /// `false`. + /// + /// # Safety + /// + /// This method is unsafe because it dereferences raw pointers. The + /// pointers must be aligned to `mem::align_of::()`. + #[inline] + pub unsafe fn read( + self, mask: Simd<[M; $elem_count]>, + value: Simd<[T; $elem_count]>, + ) -> Simd<[T; $elem_count]> + where + M: sealed::Mask, + [M; $elem_count]: sealed::SimdArray, + { + use crate::llvm::simd_gather; + Simd(simd_gather(value.0, self.0, mask.0)) + } + } + + test_if! { + $test_tt: + paste::item! { + mod [<$id _read>] { + use super::*; + #[test] + fn read() { + let mut v = [0_i32; $elem_count]; + for i in 0..$elem_count { + v[i] = i as i32; + } + + let mut ptr = $id::::null(); + + for i in 0..$elem_count { + ptr = ptr.replace(i, unsafe { + crate::mem::transmute(&v[i] as *const i32) + }); + } + + // all mask elements are true: + let mask = $mask_ty::splat(true); + let def = Simd::<[i32; $elem_count]>::splat(42_i32); + let r: Simd<[i32; $elem_count]> = unsafe { + ptr.read(mask, def) + }; + assert_eq!( + r, + Simd::<[i32; $elem_count]>::from_slice_unaligned( + &v + ) + ); + + let mut mask = mask; + for i in 0..$elem_count { + if i % 2 != 0 { + mask = mask.replace(i, false); + } + } + + // even mask elements are true, odd ones are false: + let r: Simd<[i32; $elem_count]> = unsafe { + ptr.read(mask, def) + }; + let mut e = v; + for i in 0..$elem_count { + if i % 2 != 0 { + e[i] = 42; + } + } + assert_eq!( + r, + Simd::<[i32; $elem_count]>::from_slice_unaligned( + &e + ) + ); + + // all mask elements are false: + let mask = $mask_ty::splat(false); + let def = Simd::<[i32; $elem_count]>::splat(42_i32); + let r: Simd<[i32; $elem_count]> = unsafe { + ptr.read(mask, def) } + ; + assert_eq!(r, def); + } + } + } + } + }; +} + +macro_rules! impl_ptr_write { + ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident + | $test_tt:tt) => { + impl $id + where + [T; $elem_count]: sealed::SimdArray, + { + /// Writes selected vector elements to memory. + /// + /// Writes the lanes of `values` for which the mask is `true` to + /// their corresponding memory addresses in `self`. + /// + /// No memory is accessed for those lanes of `self` whose `mask` is + /// `false`. + /// + /// Overlapping memory addresses of `self` are written to in order + /// from the lest-significant to the most-significant element. + /// + /// # Safety + /// + /// This method is unsafe because it dereferences raw pointers. The + /// pointers must be aligned to `mem::align_of::()`. + #[inline] + pub unsafe fn write( + self, mask: Simd<[M; $elem_count]>, + value: Simd<[T; $elem_count]>, + ) where + M: sealed::Mask, + [M; $elem_count]: sealed::SimdArray, + { + // FIXME: + // https://github.com/rust-lang-nursery/packed_simd/issues/85 + #[cfg(not(target_arch = "mips"))] + { + use crate::llvm::simd_scatter; + simd_scatter(value.0, self.0, mask.0) + } + #[cfg(target_arch = "mips")] + { + let m_ptr = + &mask as *const Simd<[M; $elem_count]> as *const M; + for i in 0..$elem_count { + let m = ptr::read(m_ptr.add(i)); + if m.test() { + let t_ptr = &self + as *const Simd<[*mut T; $elem_count]> + as *mut *mut T; + let v_ptr = &value as *const Simd<[T; $elem_count]> + as *const T; + ptr::write( + ptr::read(t_ptr.add(i)), + ptr::read(v_ptr.add(i)), + ); + } + } + } + } + } + + test_if! { + $test_tt: + paste::item! { + mod [<$id _write>] { + use super::*; + #[test] + fn write() { + // fourty_two = [42, 42, 42, ...] + let fourty_two + = Simd::<[i32; $elem_count]>::splat(42_i32); + + // This test will write to this array + let mut arr = [0_i32; $elem_count]; + for i in 0..$elem_count { + arr[i] = i as i32; + } + // arr = [0, 1, 2, ...] + + let mut ptr = $id::::null(); + for i in 0..$elem_count { + ptr = ptr.replace(i, unsafe { + crate::mem::transmute(arr.as_ptr().add(i)) + }); + } + // ptr = [&arr[0], &arr[1], ...] + + // write `fourty_two` to all elements of `v` + { + let backup = arr; + unsafe { + ptr.write($mask_ty::splat(true), fourty_two) + }; + assert_eq!(arr, [42_i32; $elem_count]); + arr = backup; // arr = [0, 1, 2, ...] + } + + // write 42 to even elements of arr: + { + // set odd elements of the mask to false + let mut mask = $mask_ty::splat(true); + for i in 0..$elem_count { + if i % 2 != 0 { + mask = mask.replace(i, false); + } + } + // mask = [true, false, true, false, ...] + + // expected result r = [42, 1, 42, 3, 42, 5, ...] + let mut r = arr; + for i in 0..$elem_count { + if i % 2 == 0 { + r[i] = 42; + } + } + + let backup = arr; + unsafe { ptr.write(mask, fourty_two) }; + assert_eq!(arr, r); + arr = backup; // arr = [0, 1, 2, 3, ...] + } + + // write 42 to no elements of arr + { + let backup = arr; + unsafe { + ptr.write($mask_ty::splat(false), fourty_two) + }; + assert_eq!(arr, backup); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/reductions.rs b/third_party/rust/packed_simd/src/api/reductions.rs new file mode 100644 index 000000000000..54d2f0cc7f08 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/reductions.rs @@ -0,0 +1,12 @@ +//! Reductions + +#[macro_use] +mod float_arithmetic; +#[macro_use] +mod integer_arithmetic; +#[macro_use] +mod bitwise; +#[macro_use] +mod mask; +#[macro_use] +mod min_max; diff --git a/third_party/rust/packed_simd/src/api/reductions/bitwise.rs b/third_party/rust/packed_simd/src/api/reductions/bitwise.rs new file mode 100644 index 000000000000..5bad4f474b16 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/reductions/bitwise.rs @@ -0,0 +1,151 @@ +//! Implements portable horizontal bitwise vector reductions. +#![allow(unused)] + +macro_rules! impl_reduction_bitwise { + ( + [$elem_ty:ident; $elem_count:expr]: + $id:ident | $ielem_ty:ident | $test_tt:tt | + ($convert:expr) | + ($true:expr, $false:expr) + ) => { + impl $id { + /// Lane-wise bitwise `and` of the vector elements. + /// + /// Note: if the vector has one lane, the first element of the + /// vector is returned. + #[inline] + pub fn and(self) -> $elem_ty { + #[cfg(not(target_arch = "aarch64"))] + { + use crate::llvm::simd_reduce_and; + let r: $ielem_ty = unsafe { simd_reduce_and(self.0) }; + $convert(r) + } + #[cfg(target_arch = "aarch64")] + { + // FIXME: broken on aarch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x &= self.extract(i) as $elem_ty; + } + x + } + } + + /// Lane-wise bitwise `or` of the vector elements. + /// + /// Note: if the vector has one lane, the first element of the + /// vector is returned. + #[inline] + pub fn or(self) -> $elem_ty { + #[cfg(not(target_arch = "aarch64"))] + { + use crate::llvm::simd_reduce_or; + let r: $ielem_ty = unsafe { simd_reduce_or(self.0) }; + $convert(r) + } + #[cfg(target_arch = "aarch64")] + { + // FIXME: broken on aarch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x |= self.extract(i) as $elem_ty; + } + x + } + } + + /// Lane-wise bitwise `xor` of the vector elements. + /// + /// Note: if the vector has one lane, the first element of the + /// vector is returned. + #[inline] + pub fn xor(self) -> $elem_ty { + #[cfg(not(target_arch = "aarch64"))] + { + use crate::llvm::simd_reduce_xor; + let r: $ielem_ty = unsafe { simd_reduce_xor(self.0) }; + $convert(r) + } + #[cfg(target_arch = "aarch64")] + { + // FIXME: broken on aarch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x ^= self.extract(i) as $elem_ty; + } + x + } + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _reduction_bitwise>] { + use super::*; + + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn and() { + let v = $id::splat($false); + assert_eq!(v.and(), $false); + let v = $id::splat($true); + assert_eq!(v.and(), $true); + let v = $id::splat($false); + let v = v.replace(0, $true); + if $id::lanes() > 1 { + assert_eq!(v.and(), $false); + } else { + assert_eq!(v.and(), $true); + } + let v = $id::splat($true); + let v = v.replace(0, $false); + assert_eq!(v.and(), $false); + + } + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn or() { + let v = $id::splat($false); + assert_eq!(v.or(), $false); + let v = $id::splat($true); + assert_eq!(v.or(), $true); + let v = $id::splat($false); + let v = v.replace(0, $true); + assert_eq!(v.or(), $true); + let v = $id::splat($true); + let v = v.replace(0, $false); + if $id::lanes() > 1 { + assert_eq!(v.or(), $true); + } else { + assert_eq!(v.or(), $false); + } + } + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn xor() { + let v = $id::splat($false); + assert_eq!(v.xor(), $false); + let v = $id::splat($true); + if $id::lanes() > 1 { + assert_eq!(v.xor(), $false); + } else { + assert_eq!(v.xor(), $true); + } + let v = $id::splat($false); + let v = v.replace(0, $true); + assert_eq!(v.xor(), $true); + let v = $id::splat($true); + let v = v.replace(0, $false); + if $id::lanes() > 1 { + assert_eq!(v.xor(), $true); + } else { + assert_eq!(v.xor(), $false); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs b/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs new file mode 100644 index 000000000000..dd722ae25fdd --- /dev/null +++ b/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs @@ -0,0 +1,312 @@ +//! Implements portable horizontal float vector arithmetic reductions. + +macro_rules! impl_reduction_float_arithmetic { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Horizontal sum of the vector elements. + /// + /// The intrinsic performs a tree-reduction of the vector elements. + /// That is, for an 8 element vector: + /// + /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7)) + /// + /// If one of the vector element is `NaN` the reduction returns + /// `NaN`. The resulting `NaN` is not required to be equal to any + /// of the `NaN`s in the vector. + #[inline] + pub fn sum(self) -> $elem_ty { + #[cfg(not(target_arch = "aarch64"))] + { + use crate::llvm::simd_reduce_add_ordered; + unsafe { simd_reduce_add_ordered(self.0, 0 as $elem_ty) } + } + #[cfg(target_arch = "aarch64")] + { + // FIXME: broken on AArch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x += self.extract(i) as $elem_ty; + } + x + } + } + + /// Horizontal product of the vector elements. + /// + /// The intrinsic performs a tree-reduction of the vector elements. + /// That is, for an 8 element vector: + /// + /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7)) + /// + /// If one of the vector element is `NaN` the reduction returns + /// `NaN`. The resulting `NaN` is not required to be equal to any + /// of the `NaN`s in the vector. + #[inline] + pub fn product(self) -> $elem_ty { + #[cfg(not(target_arch = "aarch64"))] + { + use crate::llvm::simd_reduce_mul_ordered; + unsafe { simd_reduce_mul_ordered(self.0, 1 as $elem_ty) } + } + #[cfg(target_arch = "aarch64")] + { + // FIXME: broken on AArch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x *= self.extract(i) as $elem_ty; + } + x + } + } + } + + impl crate::iter::Sum for $id { + #[inline] + fn sum>(iter: I) -> $id { + iter.fold($id::splat(0.), crate::ops::Add::add) + } + } + + impl crate::iter::Product for $id { + #[inline] + fn product>(iter: I) -> $id { + iter.fold($id::splat(1.), crate::ops::Mul::mul) + } + } + + impl<'a> crate::iter::Sum<&'a $id> for $id { + #[inline] + fn sum>(iter: I) -> $id { + iter.fold($id::splat(0.), |a, b| crate::ops::Add::add(a, *b)) + } + } + + impl<'a> crate::iter::Product<&'a $id> for $id { + #[inline] + fn product>(iter: I) -> $id { + iter.fold($id::splat(1.), |a, b| crate::ops::Mul::mul(a, *b)) + } + } + + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _reduction_float_arith>] { + use super::*; + fn alternating(x: usize) -> $id { + let mut v = $id::splat(1 as $elem_ty); + for i in 0..$id::lanes() { + if i % x == 0 { + v = v.replace(i, 2 as $elem_ty); + } + } + v + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn sum() { + let v = $id::splat(0 as $elem_ty); + assert_eq!(v.sum(), 0 as $elem_ty); + let v = $id::splat(1 as $elem_ty); + assert_eq!(v.sum(), $id::lanes() as $elem_ty); + let v = alternating(2); + assert_eq!( + v.sum(), + ($id::lanes() / 2 + $id::lanes()) as $elem_ty + ); + } + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn product() { + let v = $id::splat(0 as $elem_ty); + assert_eq!(v.product(), 0 as $elem_ty); + let v = $id::splat(1 as $elem_ty); + assert_eq!(v.product(), 1 as $elem_ty); + let f = match $id::lanes() { + 64 => 16, + 32 => 8, + 16 => 4, + _ => 2, + }; + let v = alternating(f); + assert_eq!( + v.product(), + (2_usize.pow(($id::lanes() / f) as u32) + as $elem_ty) + ); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[allow(unreachable_code)] + #[allow(unused_mut)] + // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 + fn sum_nan() { + // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732 + // https://github.com/rust-lang-nursery/packed_simd/issues/6 + return; + + let n0 = crate::$elem_ty::NAN; + let v0 = $id::splat(-3.0); + for i in 0..$id::lanes() { + let mut v = v0.replace(i, n0); + // If the vector contains a NaN the result is NaN: + assert!( + v.sum().is_nan(), + "nan at {} => {} | {:?}", + i, + v.sum(), + v + ); + for j in 0..i { + v = v.replace(j, n0); + assert!(v.sum().is_nan()); + } + } + let v = $id::splat(n0); + assert!(v.sum().is_nan(), "all nans | {:?}", v); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[allow(unreachable_code)] + #[allow(unused_mut)] + // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 + fn product_nan() { + // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732 + // https://github.com/rust-lang-nursery/packed_simd/issues/6 + return; + + let n0 = crate::$elem_ty::NAN; + let v0 = $id::splat(-3.0); + for i in 0..$id::lanes() { + let mut v = v0.replace(i, n0); + // If the vector contains a NaN the result is NaN: + assert!( + v.product().is_nan(), + "nan at {} => {} | {:?}", + i, + v.product(), + v + ); + for j in 0..i { + v = v.replace(j, n0); + assert!(v.product().is_nan()); + } + } + let v = $id::splat(n0); + assert!(v.product().is_nan(), "all nans | {:?}", v); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[allow(unused, dead_code)] + fn sum_roundoff() { + // Performs a tree-reduction + fn tree_reduce_sum(a: &[$elem_ty]) -> $elem_ty { + assert!(!a.is_empty()); + if a.len() == 1 { + a[0] + } else if a.len() == 2 { + a[0] + a[1] + } else { + let mid = a.len() / 2; + let (left, right) = a.split_at(mid); + tree_reduce_sum(left) + tree_reduce_sum(right) + } + } + + let mut start = crate::$elem_ty::EPSILON; + let mut scalar_reduction = 0. as $elem_ty; + + let mut v = $id::splat(0. as $elem_ty); + for i in 0..$id::lanes() { + let c = if i % 2 == 0 { 1e3 } else { -1. }; + start *= 3.14 * c; + scalar_reduction += start; + v = v.replace(i, start); + } + let simd_reduction = v.sum(); + + let mut a = [0. as $elem_ty; $id::lanes()]; + v.write_to_slice_unaligned(&mut a); + let tree_reduction = tree_reduce_sum(&a); + + // tolerate 1 ULP difference: + let red_bits = simd_reduction.to_bits(); + let tree_bits = tree_reduction.to_bits(); + assert!( + if red_bits > tree_bits { + red_bits - tree_bits + } else { + tree_bits - red_bits + } < 2, + "vector: {:?} | simd_reduction: {:?} | \ + tree_reduction: {} | scalar_reduction: {}", + v, + simd_reduction, + tree_reduction, + scalar_reduction + ); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[allow(unused, dead_code)] + fn product_roundoff() { + // Performs a tree-reduction + fn tree_reduce_product(a: &[$elem_ty]) -> $elem_ty { + assert!(!a.is_empty()); + if a.len() == 1 { + a[0] + } else if a.len() == 2 { + a[0] * a[1] + } else { + let mid = a.len() / 2; + let (left, right) = a.split_at(mid); + tree_reduce_product(left) + * tree_reduce_product(right) + } + } + + let mut start = crate::$elem_ty::EPSILON; + let mut scalar_reduction = 1. as $elem_ty; + + let mut v = $id::splat(0. as $elem_ty); + for i in 0..$id::lanes() { + let c = if i % 2 == 0 { 1e3 } else { -1. }; + start *= 3.14 * c; + scalar_reduction *= start; + v = v.replace(i, start); + } + let simd_reduction = v.product(); + + let mut a = [0. as $elem_ty; $id::lanes()]; + v.write_to_slice_unaligned(&mut a); + let tree_reduction = tree_reduce_product(&a); + + // tolerate 1 ULP difference: + let red_bits = simd_reduction.to_bits(); + let tree_bits = tree_reduction.to_bits(); + assert!( + if red_bits > tree_bits { + red_bits - tree_bits + } else { + tree_bits - red_bits + } < 2, + "vector: {:?} | simd_reduction: {:?} | \ + tree_reduction: {} | scalar_reduction: {}", + v, + simd_reduction, + tree_reduction, + scalar_reduction + ); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs b/third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs new file mode 100644 index 000000000000..91dffad31032 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs @@ -0,0 +1,197 @@ +//! Implements portable horizontal integer vector arithmetic reductions. + +macro_rules! impl_reduction_integer_arithmetic { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident + | $test_tt:tt) => { + impl $id { + /// Horizontal wrapping sum of the vector elements. + /// + /// The intrinsic performs a tree-reduction of the vector elements. + /// That is, for an 8 element vector: + /// + /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7)) + /// + /// If an operation overflows it returns the mathematical result + /// modulo `2^n` where `n` is the number of times it overflows. + #[inline] + pub fn wrapping_sum(self) -> $elem_ty { + #[cfg(not(target_arch = "aarch64"))] + { + use crate::llvm::simd_reduce_add_ordered; + let v: $ielem_ty = unsafe { + simd_reduce_add_ordered(self.0, 0 as $ielem_ty) + }; + v as $elem_ty + } + #[cfg(target_arch = "aarch64")] + { + // FIXME: broken on AArch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x = x.wrapping_add(self.extract(i) as $elem_ty); + } + x + } + } + + /// Horizontal wrapping product of the vector elements. + /// + /// The intrinsic performs a tree-reduction of the vector elements. + /// That is, for an 8 element vector: + /// + /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7)) + /// + /// If an operation overflows it returns the mathematical result + /// modulo `2^n` where `n` is the number of times it overflows. + #[inline] + pub fn wrapping_product(self) -> $elem_ty { + #[cfg(not(target_arch = "aarch64"))] + { + use crate::llvm::simd_reduce_mul_ordered; + let v: $ielem_ty = unsafe { + simd_reduce_mul_ordered(self.0, 1 as $ielem_ty) + }; + v as $elem_ty + } + #[cfg(target_arch = "aarch64")] + { + // FIXME: broken on AArch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x = x.wrapping_mul(self.extract(i) as $elem_ty); + } + x + } + } + } + + impl crate::iter::Sum for $id { + #[inline] + fn sum>(iter: I) -> $id { + iter.fold($id::splat(0), crate::ops::Add::add) + } + } + + impl crate::iter::Product for $id { + #[inline] + fn product>(iter: I) -> $id { + iter.fold($id::splat(1), crate::ops::Mul::mul) + } + } + + impl<'a> crate::iter::Sum<&'a $id> for $id { + #[inline] + fn sum>(iter: I) -> $id { + iter.fold($id::splat(0), |a, b| crate::ops::Add::add(a, *b)) + } + } + + impl<'a> crate::iter::Product<&'a $id> for $id { + #[inline] + fn product>(iter: I) -> $id { + iter.fold($id::splat(1), |a, b| crate::ops::Mul::mul(a, *b)) + } + } + + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _reduction_int_arith>] { + use super::*; + + fn alternating(x: usize) -> $id { + let mut v = $id::splat(1 as $elem_ty); + for i in 0..$id::lanes() { + if i % x == 0 { + v = v.replace(i, 2 as $elem_ty); + } + } + v + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn wrapping_sum() { + let v = $id::splat(0 as $elem_ty); + assert_eq!(v.wrapping_sum(), 0 as $elem_ty); + let v = $id::splat(1 as $elem_ty); + assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty); + let v = alternating(2); + if $id::lanes() > 1 { + assert_eq!( + v.wrapping_sum(), + ($id::lanes() / 2 + $id::lanes()) as $elem_ty + ); + } else { + assert_eq!( + v.wrapping_sum(), + 2 as $elem_ty + ); + } + } + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn wrapping_sum_overflow() { + let start = $elem_ty::max_value() + - ($id::lanes() as $elem_ty / 2); + + let v = $id::splat(start as $elem_ty); + let vwrapping_sum = v.wrapping_sum(); + + let mut wrapping_sum = start; + for _ in 1..$id::lanes() { + wrapping_sum = wrapping_sum.wrapping_add(start); + } + assert_eq!(wrapping_sum, vwrapping_sum, "v = {:?}", v); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn wrapping_product() { + let v = $id::splat(0 as $elem_ty); + assert_eq!(v.wrapping_product(), 0 as $elem_ty); + let v = $id::splat(1 as $elem_ty); + assert_eq!(v.wrapping_product(), 1 as $elem_ty); + let f = match $id::lanes() { + 64 => 16, + 32 => 8, + 16 => 4, + _ => 2, + }; + let v = alternating(f); + if $id::lanes() > 1 { + assert_eq!( + v.wrapping_product(), + (2_usize.pow(($id::lanes() / f) as u32) + as $elem_ty) + ); + } else { + assert_eq!( + v.wrapping_product(), + 2 as $elem_ty + ); + } + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn wrapping_product_overflow() { + let start = $elem_ty::max_value() + - ($id::lanes() as $elem_ty / 2); + + let v = $id::splat(start as $elem_ty); + let vmul = v.wrapping_product(); + + let mut mul = start; + for _ in 1..$id::lanes() { + mul = mul.wrapping_mul(start); + } + assert_eq!(mul, vmul, "v = {:?}", v); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/reductions/mask.rs b/third_party/rust/packed_simd/src/api/reductions/mask.rs new file mode 100644 index 000000000000..0dd6a84e7e8d --- /dev/null +++ b/third_party/rust/packed_simd/src/api/reductions/mask.rs @@ -0,0 +1,89 @@ +//! Implements portable horizontal mask reductions. + +macro_rules! impl_reduction_mask { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Are `all` vector lanes `true`? + #[inline] + pub fn all(self) -> bool { + unsafe { crate::codegen::reductions::mask::All::all(self) } + } + /// Is `any` vector lane `true`? + #[inline] + pub fn any(self) -> bool { + unsafe { crate::codegen::reductions::mask::Any::any(self) } + } + /// Are `all` vector lanes `false`? + #[inline] + pub fn none(self) -> bool { + !self.any() + } + } + + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _reduction>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn all() { + let a = $id::splat(true); + assert!(a.all()); + let a = $id::splat(false); + assert!(!a.all()); + + if $id::lanes() > 1 { + for i in 0..$id::lanes() { + let mut a = $id::splat(true); + a = a.replace(i, false); + assert!(!a.all()); + let mut a = $id::splat(false); + a = a.replace(i, true); + assert!(!a.all()); + } + } + } + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn any() { + let a = $id::splat(true); + assert!(a.any()); + let a = $id::splat(false); + assert!(!a.any()); + + if $id::lanes() > 1 { + for i in 0..$id::lanes() { + let mut a = $id::splat(true); + a = a.replace(i, false); + assert!(a.any()); + let mut a = $id::splat(false); + a = a.replace(i, true); + assert!(a.any()); + } + } + } + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn none() { + let a = $id::splat(true); + assert!(!a.none()); + let a = $id::splat(false); + assert!(a.none()); + + if $id::lanes() > 1 { + for i in 0..$id::lanes() { + let mut a = $id::splat(true); + a = a.replace(i, false); + assert!(!a.none()); + let mut a = $id::splat(false); + a = a.replace(i, true); + assert!(!a.none()); + } + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/reductions/min_max.rs b/third_party/rust/packed_simd/src/api/reductions/min_max.rs new file mode 100644 index 000000000000..c4d3aa10f15c --- /dev/null +++ b/third_party/rust/packed_simd/src/api/reductions/min_max.rs @@ -0,0 +1,377 @@ +//! Implements portable horizontal vector min/max reductions. + +macro_rules! impl_reduction_min_max { + ([$elem_ty:ident; $elem_count:expr]: $id:ident + | $ielem_ty:ident | $test_tt:tt) => { + impl $id { + /// Largest vector element value. + #[inline] + pub fn max_element(self) -> $elem_ty { + #[cfg(not(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc64", + target_arch = "wasm32", + )))] + { + use crate::llvm::simd_reduce_max; + let v: $ielem_ty = unsafe { simd_reduce_max(self.0) }; + v as $elem_ty + } + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc64", + target_arch = "wasm32", + ))] + { + // FIXME: broken on AArch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + // FIXME: broken on WASM32 + // https://github.com/rust-lang-nursery/packed_simd/issues/91 + let mut x = self.extract(0); + for i in 1..$id::lanes() { + x = x.max(self.extract(i)); + } + x + } + } + + /// Smallest vector element value. + #[inline] + pub fn min_element(self) -> $elem_ty { + #[cfg(not(any( + target_arch = "aarch64", + target_arch = "arm", + all(target_arch = "x86", not(target_feature = "sse2")), + target_arch = "powerpc64", + target_arch = "wasm32", + ),))] + { + use crate::llvm::simd_reduce_min; + let v: $ielem_ty = unsafe { simd_reduce_min(self.0) }; + v as $elem_ty + } + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + all(target_arch = "x86", not(target_feature = "sse2")), + target_arch = "powerpc64", + target_arch = "wasm32", + ))] + { + // FIXME: broken on AArch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + // FIXME: broken on i586-unknown-linux-gnu + // https://github.com/rust-lang-nursery/packed_simd/issues/22 + // FIXME: broken on WASM32 + // https://github.com/rust-lang-nursery/packed_simd/issues/91 + let mut x = self.extract(0); + for i in 1..$id::lanes() { + x = x.min(self.extract(i)); + } + x + } + } + } + test_if! {$test_tt: + paste::item! { + pub mod [<$id _reduction_min_max>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + pub fn max_element() { + let v = $id::splat(0 as $elem_ty); + assert_eq!(v.max_element(), 0 as $elem_ty); + if $id::lanes() > 1 { + let v = v.replace(1, 1 as $elem_ty); + assert_eq!(v.max_element(), 1 as $elem_ty); + } + let v = v.replace(0, 2 as $elem_ty); + assert_eq!(v.max_element(), 2 as $elem_ty); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + pub fn min_element() { + let v = $id::splat(0 as $elem_ty); + assert_eq!(v.min_element(), 0 as $elem_ty); + if $id::lanes() > 1 { + let v = v.replace(1, 1 as $elem_ty); + assert_eq!(v.min_element(), 0 as $elem_ty); + } + let v = $id::splat(1 as $elem_ty); + let v = v.replace(0, 2 as $elem_ty); + if $id::lanes() > 1 { + assert_eq!(v.min_element(), 1 as $elem_ty); + } else { + assert_eq!(v.min_element(), 2 as $elem_ty); + } + if $id::lanes() > 1 { + let v = $id::splat(2 as $elem_ty); + let v = v.replace(1, 1 as $elem_ty); + assert_eq!(v.min_element(), 1 as $elem_ty); + } + } + } + } + } + }; +} + +macro_rules! test_reduction_float_min_max { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _reduction_min_max_nan>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn min_element_test() { + let n = crate::$elem_ty::NAN; + + assert_eq!(n.min(-3.), -3.); + assert_eq!((-3. as $elem_ty).min(n), -3.); + + let v0 = $id::splat(-3.); + + let target_with_broken_last_lane_nan = !cfg!(any( + target_arch = "arm", target_arch = "aarch64", + all(target_arch = "x86", + not(target_feature = "sse2") + ), + target_arch = "powerpc64", + target_arch = "wasm32", + )); + + // The vector is initialized to `-3.`s: [-3, -3, -3, -3] + for i in 0..$id::lanes() { + // We replace the i-th element of the vector with + // `NaN`: [-3, -3, -3, NaN] + let mut v = v0.replace(i, n); + + // If the NaN is in the last place, the LLVM + // implementation of these methods is broken on some + // targets: + if i == $id::lanes() - 1 && + target_with_broken_last_lane_nan { + // FIXME: + // https://github.com/rust-lang-nursery/packed_simd/issues/5 + // + // If there is a NaN, the result should always + // the smallest element, but currently when the + // last element is NaN the current + // implementation incorrectly returns NaN. + // + // The targets mentioned above use different + // codegen that produces the correct result. + // + // These asserts detect if this behavior changes + assert!(v.min_element().is_nan(), + // FIXME: ^^^ should be -3. + "[A]: nan at {} => {} | {:?}", + i, v.min_element(), v); + + // If we replace all the elements in the vector + // up-to the `i-th` lane with `NaN`s, the result + // is still always `-3.` unless all elements of + // the vector are `NaN`s: + // + // This is also broken: + for j in 0..i { + v = v.replace(j, n); + assert!(v.min_element().is_nan(), + // FIXME: ^^^ should be -3. + "[B]: nan at {} => {} | {:?}", + i, v.min_element(), v); + } + + // We are done here, since we were in the last + // lane which is the last iteration of the loop. + break + } + + // We are not in the last lane, and there is only + // one `NaN` in the vector. + + // If the vector has one lane, the result is `NaN`: + if $id::lanes() == 1 { + assert!(v.min_element().is_nan(), + "[C]: all nans | v={:?} | min={} | \ + is_nan: {}", + v, v.min_element(), + v.min_element().is_nan() + ); + + // And we are done, since the vector only has + // one lane anyways. + break; + } + + // The vector has more than one lane, since there is + // only one `NaN` in the vector, the result is + // always `-3`. + assert_eq!(v.min_element(), -3., + "[D]: nan at {} => {} | {:?}", + i, v.min_element(), v); + + // If we replace all the elements in the vector + // up-to the `i-th` lane with `NaN`s, the result is + // still always `-3.` unless all elements of the + // vector are `NaN`s: + for j in 0..i { + v = v.replace(j, n); + + if i == $id::lanes() - 1 && j == i - 1 { + // All elements of the vector are `NaN`s, + // therefore the result is NaN as well. + // + // Note: the #lanes of the vector is > 1, so + // "i - 1" does not overflow. + assert!(v.min_element().is_nan(), + "[E]: all nans | v={:?} | min={} | \ + is_nan: {}", + v, v.min_element(), + v.min_element().is_nan()); + } else { + // There are non-`NaN` elements in the + // vector, therefore the result is `-3.`: + assert_eq!(v.min_element(), -3., + "[F]: nan at {} => {} | {:?}", + i, v.min_element(), v); + } + } + } + + // If the vector contains all NaNs the result is NaN: + assert!($id::splat(n).min_element().is_nan(), + "all nans | v={:?} | min={} | is_nan: {}", + $id::splat(n), $id::splat(n).min_element(), + $id::splat(n).min_element().is_nan()); + } + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn max_element_test() { + let n = crate::$elem_ty::NAN; + + assert_eq!(n.max(-3.), -3.); + assert_eq!((-3. as $elem_ty).max(n), -3.); + + let v0 = $id::splat(-3.); + + let target_with_broken_last_lane_nan = !cfg!(any( + target_arch = "arm", target_arch = "aarch64", + target_arch = "powerpc64", target_arch = "wasm32", + )); + + // The vector is initialized to `-3.`s: [-3, -3, -3, -3] + for i in 0..$id::lanes() { + // We replace the i-th element of the vector with + // `NaN`: [-3, -3, -3, NaN] + let mut v = v0.replace(i, n); + + // If the NaN is in the last place, the LLVM + // implementation of these methods is broken on some + // targets: + if i == $id::lanes() - 1 && + target_with_broken_last_lane_nan { + // FIXME: + // https://github.com/rust-lang-nursery/packed_simd/issues/5 + // + // If there is a NaN, the result should + // always the largest element, but currently + // when the last element is NaN the current + // implementation incorrectly returns NaN. + // + // The targets mentioned above use different + // codegen that produces the correct result. + // + // These asserts detect if this behavior + // changes + assert!(v.max_element().is_nan(), + // FIXME: ^^^ should be -3. + "[A]: nan at {} => {} | {:?}", + i, v.max_element(), v); + + // If we replace all the elements in the vector + // up-to the `i-th` lane with `NaN`s, the result + // is still always `-3.` unless all elements of + // the vector are `NaN`s: + // + // This is also broken: + for j in 0..i { + v = v.replace(j, n); + assert!(v.max_element().is_nan(), + // FIXME: ^^^ should be -3. + "[B]: nan at {} => {} | {:?}", + i, v.max_element(), v); + } + + // We are done here, since we were in the last + // lane which is the last iteration of the loop. + break + } + + // We are not in the last lane, and there is only + // one `NaN` in the vector. + + // If the vector has one lane, the result is `NaN`: + if $id::lanes() == 1 { + assert!(v.max_element().is_nan(), + "[C]: all nans | v={:?} | min={} | \ + is_nan: {}", + v, v.max_element(), + v.max_element().is_nan()); + + // And we are done, since the vector only has + // one lane anyways. + break; + } + + // The vector has more than one lane, since there is + // only one `NaN` in the vector, the result is + // always `-3`. + assert_eq!(v.max_element(), -3., + "[D]: nan at {} => {} | {:?}", + i, v.max_element(), v); + + // If we replace all the elements in the vector + // up-to the `i-th` lane with `NaN`s, the result is + // still always `-3.` unless all elements of the + // vector are `NaN`s: + for j in 0..i { + v = v.replace(j, n); + + if i == $id::lanes() - 1 && j == i - 1 { + // All elements of the vector are `NaN`s, + // therefore the result is NaN as well. + // + // Note: the #lanes of the vector is > 1, so + // "i - 1" does not overflow. + assert!(v.max_element().is_nan(), + "[E]: all nans | v={:?} | max={} | \ + is_nan: {}", + v, v.max_element(), + v.max_element().is_nan()); + } else { + // There are non-`NaN` elements in the + // vector, therefore the result is `-3.`: + assert_eq!(v.max_element(), -3., + "[F]: nan at {} => {} | {:?}", + i, v.max_element(), v); + } + } + } + + // If the vector contains all NaNs the result is NaN: + assert!($id::splat(n).max_element().is_nan(), + "all nans | v={:?} | max={} | is_nan: {}", + $id::splat(n), $id::splat(n).max_element(), + $id::splat(n).max_element().is_nan()); + } + } + } + } + } +} diff --git a/third_party/rust/packed_simd/src/api/select.rs b/third_party/rust/packed_simd/src/api/select.rs new file mode 100644 index 000000000000..24525df56c73 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/select.rs @@ -0,0 +1,75 @@ +//! Implements mask's `select`. + +/// Implements mask select method +macro_rules! impl_select { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Selects elements of `a` and `b` using mask. + /// + /// The lanes of the result for which the mask is `true` contain + /// the values of `a`. The remaining lanes contain the values of + /// `b`. + #[inline] + pub fn select(self, a: Simd, b: Simd) -> Simd + where + T: sealed::SimdArray< + NT = <[$elem_ty; $elem_count] as sealed::SimdArray>::NT, + >, + { + use crate::llvm::simd_select; + Simd(unsafe { simd_select(self.0, a.0, b.0) }) + } + } + + test_select!(bool, $id, $id, (false, true) | $test_tt); + }; +} + +macro_rules! test_select { + ( + $elem_ty:ident, + $mask_ty:ident, + $vec_ty:ident,($small:expr, $large:expr) | + $test_tt:tt + ) => { + test_if! { + $test_tt: + paste::item! { + pub mod [<$vec_ty _select>] { + use super::*; + + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn select() { + let o = $small as $elem_ty; + let t = $large as $elem_ty; + + let a = $vec_ty::splat(o); + let b = $vec_ty::splat(t); + let m = a.lt(b); + assert_eq!(m.select(a, b), a); + + let m = b.lt(a); + assert_eq!(m.select(b, a), a); + + let mut c = a; + let mut d = b; + let mut m_e = $mask_ty::splat(false); + for i in 0..$vec_ty::lanes() { + if i % 2 == 0 { + let c_tmp = c.extract(i); + c = c.replace(i, d.extract(i)); + d = d.replace(i, c_tmp); + } else { + m_e = m_e.replace(i, true); + } + } + + let m = c.lt(d); + assert_eq!(m_e, m); + assert_eq!(m.select(c, d), a); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/shuffle.rs b/third_party/rust/packed_simd/src/api/shuffle.rs new file mode 100644 index 000000000000..13a7fae5fcee --- /dev/null +++ b/third_party/rust/packed_simd/src/api/shuffle.rs @@ -0,0 +1,190 @@ +//! Implements portable vector shuffles with immediate indices. + +// FIXME: comprehensive tests +// https://github.com/rust-lang-nursery/packed_simd/issues/20 + +/// Shuffles vector elements. +/// +/// This macro returns a new vector that contains a shuffle of the elements in +/// one (`shuffle!(vec, [indices...])`) or two (`shuffle!(vec0, vec1, +/// [indices...])`) input vectors. +/// +/// The type of `vec0` and `vec1` must be equal, and the element type of the +/// resulting vector is the element type of the input vector. +/// +/// The number of `indices` must be a power-of-two in range `[0, 64)`, since +/// currently, the largest vector supported by the library has 64 lanes. The +/// length of the resulting vector equals the number of indices provided. +/// +/// The indices must be in range `[0, M * N)` where `M` is the number of input +/// vectors (`1` or `2`) and `N` is the number of lanes of the input vectors. +/// The indices `i` in range `[0, N)` refer to the `i`-th element of `vec0`, +/// while the indices in range `[N, 2*N)` refer to the `i - N`-th element of +/// `vec1`. +/// +/// # Examples +/// +/// Shuffling elements of two vectors: +/// +/// ``` +/// # #[macro_use] +/// # extern crate packed_simd; +/// # use packed_simd::*; +/// # fn main() { +/// // Shuffle allows reordering the elements: +/// let x = i32x4::new(1, 2, 3, 4); +/// let y = i32x4::new(5, 6, 7, 8); +/// let r = shuffle!(x, y, [4, 0, 5, 1]); +/// assert_eq!(r, i32x4::new(5, 1, 6, 2)); +/// +/// // The resulting vector can als be smaller than the input: +/// let r = shuffle!(x, y, [1, 6]); +/// assert_eq!(r, i32x2::new(2, 7)); +/// +/// // Or larger: +/// let r = shuffle!(x, y, [1, 3, 4, 2, 1, 7, 2, 2]); +/// assert_eq!(r, i32x8::new(2, 4, 5, 3, 2, 8, 3, 3)); +/// // At most 2 * the number of lanes in the input vector. +/// # } +/// ``` +/// +/// Shuffling elements of one vector: +/// +/// ``` +/// # #[macro_use] +/// # extern crate packed_simd; +/// # use packed_simd::*; +/// # fn main() { +/// // Shuffle allows reordering the elements of a vector: +/// let x = i32x4::new(1, 2, 3, 4); +/// let r = shuffle!(x, [2, 1, 3, 0]); +/// assert_eq!(r, i32x4::new(3, 2, 4, 1)); +/// +/// // The resulting vector can be smaller than the input: +/// let r = shuffle!(x, [1, 3]); +/// assert_eq!(r, i32x2::new(2, 4)); +/// +/// // Equal: +/// let r = shuffle!(x, [1, 3, 2, 0]); +/// assert_eq!(r, i32x4::new(2, 4, 3, 1)); +/// +/// // Or larger: +/// let r = shuffle!(x, [1, 3, 2, 2, 1, 3, 2, 2]); +/// assert_eq!(r, i32x8::new(2, 4, 3, 3, 2, 4, 3, 3)); +/// // At most 2 * the number of lanes in the input vector. +/// # } +/// ``` +#[macro_export] +macro_rules! shuffle { + ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr]) => {{ + #[allow(unused_unsafe)] + unsafe { + $crate::Simd($crate::__shuffle_vector2( + $vec0.0, + $vec1.0, + [$l0, $l1], + )) + } + }}; + ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr, $l2:expr, $l3:expr]) => {{ + #[allow(unused_unsafe)] + unsafe { + $crate::Simd($crate::__shuffle_vector4( + $vec0.0, + $vec1.0, + [$l0, $l1, $l2, $l3], + )) + } + }}; + ($vec0:expr, $vec1:expr, + [$l0:expr, $l1:expr, $l2:expr, $l3:expr, + $l4:expr, $l5:expr, $l6:expr, $l7:expr]) => {{ + #[allow(unused_unsafe)] + unsafe { + $crate::Simd($crate::__shuffle_vector8( + $vec0.0, + $vec1.0, + [$l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7], + )) + } + }}; + ($vec0:expr, $vec1:expr, + [$l0:expr, $l1:expr, $l2:expr, $l3:expr, + $l4:expr, $l5:expr, $l6:expr, $l7:expr, + $l8:expr, $l9:expr, $l10:expr, $l11:expr, + $l12:expr, $l13:expr, $l14:expr, $l15:expr]) => {{ + #[allow(unused_unsafe)] + unsafe { + $crate::Simd($crate::__shuffle_vector16( + $vec0.0, + $vec1.0, + [ + $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10, + $l11, $l12, $l13, $l14, $l15, + ], + )) + } + }}; + ($vec0:expr, $vec1:expr, + [$l0:expr, $l1:expr, $l2:expr, $l3:expr, + $l4:expr, $l5:expr, $l6:expr, $l7:expr, + $l8:expr, $l9:expr, $l10:expr, $l11:expr, + $l12:expr, $l13:expr, $l14:expr, $l15:expr, + $l16:expr, $l17:expr, $l18:expr, $l19:expr, + $l20:expr, $l21:expr, $l22:expr, $l23:expr, + $l24:expr, $l25:expr, $l26:expr, $l27:expr, + $l28:expr, $l29:expr, $l30:expr, $l31:expr]) => {{ + #[allow(unused_unsafe)] + unsafe { + $crate::Simd($crate::__shuffle_vector32( + $vec0.0, + $vec1.0, + [ + $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10, + $l11, $l12, $l13, $l14, $l15, $l16, $l17, $l18, $l19, + $l20, $l21, $l22, $l23, $l24, $l25, $l26, $l27, $l28, + $l29, $l30, $l31, + ], + )) + } + }}; + ($vec0:expr, $vec1:expr, + [$l0:expr, $l1:expr, $l2:expr, $l3:expr, + $l4:expr, $l5:expr, $l6:expr, $l7:expr, + $l8:expr, $l9:expr, $l10:expr, $l11:expr, + $l12:expr, $l13:expr, $l14:expr, $l15:expr, + $l16:expr, $l17:expr, $l18:expr, $l19:expr, + $l20:expr, $l21:expr, $l22:expr, $l23:expr, + $l24:expr, $l25:expr, $l26:expr, $l27:expr, + $l28:expr, $l29:expr, $l30:expr, $l31:expr, + $l32:expr, $l33:expr, $l34:expr, $l35:expr, + $l36:expr, $l37:expr, $l38:expr, $l39:expr, + $l40:expr, $l41:expr, $l42:expr, $l43:expr, + $l44:expr, $l45:expr, $l46:expr, $l47:expr, + $l48:expr, $l49:expr, $l50:expr, $l51:expr, + $l52:expr, $l53:expr, $l54:expr, $l55:expr, + $l56:expr, $l57:expr, $l58:expr, $l59:expr, + $l60:expr, $l61:expr, $l62:expr, $l63:expr]) => {{ + #[allow(unused_unsafe)] + unsafe { + $crate::Simd($crate::__shuffle_vector64( + $vec0.0, + $vec1.0, + [ + $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10, + $l11, $l12, $l13, $l14, $l15, $l16, $l17, $l18, $l19, + $l20, $l21, $l22, $l23, $l24, $l25, $l26, $l27, $l28, + $l29, $l30, $l31, $l32, $l33, $l34, $l35, $l36, $l37, + $l38, $l39, $l40, $l41, $l42, $l43, $l44, $l45, $l46, + $l47, $l48, $l49, $l50, $l51, $l52, $l53, $l54, $l55, + $l56, $l57, $l58, $l59, $l60, $l61, $l62, $l63, + ], + )) + } + }}; + ($vec:expr, [$($l:expr),*]) => { + match $vec { + v => shuffle!(v, v, [$($l),*]) + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/shuffle1_dyn.rs b/third_party/rust/packed_simd/src/api/shuffle1_dyn.rs new file mode 100644 index 000000000000..64536be6cba1 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/shuffle1_dyn.rs @@ -0,0 +1,159 @@ +//! Shuffle vector elements according to a dynamic vector of indices. + +macro_rules! impl_shuffle1_dyn { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Shuffle vector elements according to `indices`. + #[inline] + pub fn shuffle1_dyn(self, indices: I) -> Self + where + Self: codegen::shuffle1_dyn::Shuffle1Dyn, + { + codegen::shuffle1_dyn::Shuffle1Dyn::shuffle1_dyn(self, indices) + } + } + }; +} + +macro_rules! test_shuffle1_dyn { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _shuffle1_dyn>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn shuffle1_dyn() { + let increasing = { + let mut v = $id::splat(0 as $elem_ty); + for i in 0..$id::lanes() { + v = v.replace(i, i as $elem_ty); + } + v + }; + let decreasing = { + let mut v = $id::splat(0 as $elem_ty); + for i in 0..$id::lanes() { + v = v.replace( + i, + ($id::lanes() - 1 - i) as $elem_ty + ); + } + v + }; + + type Indices = < + $id as codegen::shuffle1_dyn::Shuffle1Dyn + >::Indices; + let increasing_ids: Indices = increasing.cast(); + let decreasing_ids: Indices = decreasing.cast(); + + assert_eq!( + increasing.shuffle1_dyn(increasing_ids), + increasing, + "(i,i)=>i" + ); + assert_eq!( + decreasing.shuffle1_dyn(increasing_ids), + decreasing, + "(d,i)=>d" + ); + assert_eq!( + increasing.shuffle1_dyn(decreasing_ids), + decreasing, + "(i,d)=>d" + ); + assert_eq!( + decreasing.shuffle1_dyn(decreasing_ids), + increasing, + "(d,d)=>i" + ); + + for i in 0..$id::lanes() { + let v_ids: Indices + = $id::splat(i as $elem_ty).cast(); + assert_eq!(increasing.shuffle1_dyn(v_ids), + $id::splat(increasing.extract(i)) + ); + assert_eq!(decreasing.shuffle1_dyn(v_ids), + $id::splat(decreasing.extract(i)) + ); + assert_eq!( + $id::splat(i as $elem_ty) + .shuffle1_dyn(increasing_ids), + $id::splat(i as $elem_ty) + ); + assert_eq!( + $id::splat(i as $elem_ty) + .shuffle1_dyn(decreasing_ids), + $id::splat(i as $elem_ty) + ); + } + } + } + } + } + }; +} + +macro_rules! test_shuffle1_dyn_mask { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _shuffle1_dyn>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn shuffle1_dyn() { + // alternating = [true, false, true, false, ...] + let mut alternating = $id::splat(false); + for i in 0..$id::lanes() { + if i % 2 == 0 { + alternating = alternating.replace(i, true); + } + } + + type Indices = < + $id as codegen::shuffle1_dyn::Shuffle1Dyn + >::Indices; + // even = [0, 0, 2, 2, 4, 4, ..] + let even = { + let mut v = Indices::splat(0); + for i in 0..$id::lanes() { + if i % 2 == 0 { + v = v.replace(i, (i as u8).into()); + } else { + v = v.replace(i, (i as u8 - 1).into()); + } + } + v + }; + // odd = [1, 1, 3, 3, 5, 5, ...] + let odd = { + let mut v = Indices::splat(0); + for i in 0..$id::lanes() { + if i % 2 != 0 { + v = v.replace(i, (i as u8).into()); + } else { + v = v.replace(i, (i as u8 + 1).into()); + } + } + v + }; + + assert_eq!( + alternating.shuffle1_dyn(even), + $id::splat(true) + ); + if $id::lanes() > 1 { + assert_eq!( + alternating.shuffle1_dyn(odd), + $id::splat(false) + ); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/slice.rs b/third_party/rust/packed_simd/src/api/slice.rs new file mode 100644 index 000000000000..526b848b5c06 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/slice.rs @@ -0,0 +1,7 @@ +//! Slice from/to methods + +#[macro_use] +mod from_slice; + +#[macro_use] +mod write_to_slice; diff --git a/third_party/rust/packed_simd/src/api/slice/from_slice.rs b/third_party/rust/packed_simd/src/api/slice/from_slice.rs new file mode 100644 index 000000000000..109cd1f10b01 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/slice/from_slice.rs @@ -0,0 +1,216 @@ +//! Implements methods to read a vector type from a slice. + +macro_rules! impl_slice_from_slice { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Instantiates a new vector with the values of the `slice`. + /// + /// # Panics + /// + /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned + /// to an `align_of::()` boundary. + #[inline] + pub fn from_slice_aligned(slice: &[$elem_ty]) -> Self { + unsafe { + assert!(slice.len() >= $elem_count); + let target_ptr = slice.get_unchecked(0) as *const $elem_ty; + assert_eq!( + target_ptr + .align_offset(crate::mem::align_of::()), + 0 + ); + Self::from_slice_aligned_unchecked(slice) + } + } + + /// Instantiates a new vector with the values of the `slice`. + /// + /// # Panics + /// + /// If `slice.len() < Self::lanes()`. + #[inline] + pub fn from_slice_unaligned(slice: &[$elem_ty]) -> Self { + unsafe { + assert!(slice.len() >= $elem_count); + Self::from_slice_unaligned_unchecked(slice) + } + } + + /// Instantiates a new vector with the values of the `slice`. + /// + /// # Precondition + /// + /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned + /// to an `align_of::()` boundary, the behavior is undefined. + #[inline] + pub unsafe fn from_slice_aligned_unchecked( + slice: &[$elem_ty], + ) -> Self { + debug_assert!(slice.len() >= $elem_count); + let target_ptr = slice.get_unchecked(0) as *const $elem_ty; + debug_assert_eq!( + target_ptr.align_offset(crate::mem::align_of::()), + 0 + ); + + #[allow(clippy::cast_ptr_alignment)] + *(target_ptr as *const Self) + } + + /// Instantiates a new vector with the values of the `slice`. + /// + /// # Precondition + /// + /// If `slice.len() < Self::lanes()` the behavior is undefined. + #[inline] + pub unsafe fn from_slice_unaligned_unchecked( + slice: &[$elem_ty], + ) -> Self { + use crate::mem::size_of; + debug_assert!(slice.len() >= $elem_count); + let target_ptr = + slice.get_unchecked(0) as *const $elem_ty as *const u8; + let mut x = Self::splat(0 as $elem_ty); + let self_ptr = &mut x as *mut Self as *mut u8; + crate::ptr::copy_nonoverlapping( + target_ptr, + self_ptr, + size_of::(), + ); + x + } + } + + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _slice_from_slice>] { + use super::*; + use crate::iter::Iterator; + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn from_slice_unaligned() { + let mut unaligned = [42 as $elem_ty; $id::lanes() + 1]; + unaligned[0] = 0 as $elem_ty; + let vec = $id::from_slice_unaligned(&unaligned[1..]); + for (index, &b) in unaligned.iter().enumerate() { + if index == 0 { + assert_eq!(b, 0 as $elem_ty); + } else { + assert_eq!(b, 42 as $elem_ty); + assert_eq!(b, vec.extract(index - 1)); + } + } + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn from_slice_unaligned_fail() { + let mut unaligned = [42 as $elem_ty; $id::lanes() + 1]; + unaligned[0] = 0 as $elem_ty; + // the slice is not large enough => panic + let _vec = $id::from_slice_unaligned(&unaligned[2..]); + } + + union A { + data: [$elem_ty; 2 * $id::lanes()], + _vec: $id, + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn from_slice_aligned() { + let mut aligned = A { + data: [0 as $elem_ty; 2 * $id::lanes()], + }; + for i in $id::lanes()..(2 * $id::lanes()) { + unsafe { + aligned.data[i] = 42 as $elem_ty; + } + } + + let vec = unsafe { + $id::from_slice_aligned( + &aligned.data[$id::lanes()..] + ) + }; + for (index, &b) in + unsafe { aligned.data.iter().enumerate() } { + if index < $id::lanes() { + assert_eq!(b, 0 as $elem_ty); + } else { + assert_eq!(b, 42 as $elem_ty); + assert_eq!( + b, vec.extract(index - $id::lanes()) + ); + } + } + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn from_slice_aligned_fail_lanes() { + let aligned = A { + data: [0 as $elem_ty; 2 * $id::lanes()], + }; + let _vec = unsafe { + $id::from_slice_aligned( + &aligned.data[2 * $id::lanes()..] + ) + }; + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn from_slice_aligned_fail_align() { + unsafe { + let aligned = A { + data: [0 as $elem_ty; 2 * $id::lanes()], + }; + + // get a pointer to the front of data + let ptr: *const $elem_ty = aligned.data.as_ptr() + as *const $elem_ty; + // offset pointer by one element + let ptr = ptr.wrapping_add(1); + + if ptr.align_offset( + crate::mem::align_of::<$id>() + ) == 0 { + // the pointer is properly aligned, so + // from_slice_aligned won't fail here (e.g. this + // can happen for i128x1). So we panic to make + // the "should_fail" test pass: + panic!("ok"); + } + + // create a slice - this is safe, because the + // elements of the slice exist, are properly + // initialized, and properly aligned: + let s: &[$elem_ty] = slice::from_raw_parts( + ptr, $id::lanes() + ); + // this should always panic because the slice + // alignment does not match the alignment + // requirements for the vector type: + let _vec = $id::from_slice_aligned(s); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/slice/write_to_slice.rs b/third_party/rust/packed_simd/src/api/slice/write_to_slice.rs new file mode 100644 index 000000000000..fcb288da70fc --- /dev/null +++ b/third_party/rust/packed_simd/src/api/slice/write_to_slice.rs @@ -0,0 +1,211 @@ +//! Implements methods to write a vector type to a slice. + +macro_rules! impl_slice_write_to_slice { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Writes the values of the vector to the `slice`. + /// + /// # Panics + /// + /// If `slice.len() < Self::lanes()` or `&slice[0]` is not + /// aligned to an `align_of::()` boundary. + #[inline] + pub fn write_to_slice_aligned(self, slice: &mut [$elem_ty]) { + unsafe { + assert!(slice.len() >= $elem_count); + let target_ptr = + slice.get_unchecked_mut(0) as *mut $elem_ty; + assert_eq!( + target_ptr + .align_offset(crate::mem::align_of::()), + 0 + ); + self.write_to_slice_aligned_unchecked(slice); + } + } + + /// Writes the values of the vector to the `slice`. + /// + /// # Panics + /// + /// If `slice.len() < Self::lanes()`. + #[inline] + pub fn write_to_slice_unaligned(self, slice: &mut [$elem_ty]) { + unsafe { + assert!(slice.len() >= $elem_count); + self.write_to_slice_unaligned_unchecked(slice); + } + } + + /// Writes the values of the vector to the `slice`. + /// + /// # Precondition + /// + /// If `slice.len() < Self::lanes()` or `&slice[0]` is not + /// aligned to an `align_of::()` boundary, the behavior is + /// undefined. + #[inline] + pub unsafe fn write_to_slice_aligned_unchecked( + self, slice: &mut [$elem_ty], + ) { + debug_assert!(slice.len() >= $elem_count); + let target_ptr = slice.get_unchecked_mut(0) as *mut $elem_ty; + debug_assert_eq!( + target_ptr.align_offset(crate::mem::align_of::()), + 0 + ); + + #[allow(clippy::cast_ptr_alignment)] + #[allow(clippy::cast_ptr_alignment)] + #[allow(clippy::cast_ptr_alignment)] + #[allow(clippy::cast_ptr_alignment)] + *(target_ptr as *mut Self) = self; + } + + /// Writes the values of the vector to the `slice`. + /// + /// # Precondition + /// + /// If `slice.len() < Self::lanes()` the behavior is undefined. + #[inline] + pub unsafe fn write_to_slice_unaligned_unchecked( + self, slice: &mut [$elem_ty], + ) { + debug_assert!(slice.len() >= $elem_count); + let target_ptr = + slice.get_unchecked_mut(0) as *mut $elem_ty as *mut u8; + let self_ptr = &self as *const Self as *const u8; + crate::ptr::copy_nonoverlapping( + self_ptr, + target_ptr, + crate::mem::size_of::(), + ); + } + } + + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _slice_write_to_slice>] { + use super::*; + use crate::iter::Iterator; + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn write_to_slice_unaligned() { + let mut unaligned = [0 as $elem_ty; $id::lanes() + 1]; + let vec = $id::splat(42 as $elem_ty); + vec.write_to_slice_unaligned(&mut unaligned[1..]); + for (index, &b) in unaligned.iter().enumerate() { + if index == 0 { + assert_eq!(b, 0 as $elem_ty); + } else { + assert_eq!(b, 42 as $elem_ty); + assert_eq!(b, vec.extract(index - 1)); + } + } + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn write_to_slice_unaligned_fail() { + let mut unaligned = [0 as $elem_ty; $id::lanes() + 1]; + let vec = $id::splat(42 as $elem_ty); + vec.write_to_slice_unaligned(&mut unaligned[2..]); + } + + union A { + data: [$elem_ty; 2 * $id::lanes()], + _vec: $id, + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn write_to_slice_aligned() { + let mut aligned = A { + data: [0 as $elem_ty; 2 * $id::lanes()], + }; + let vec = $id::splat(42 as $elem_ty); + unsafe { + vec.write_to_slice_aligned( + &mut aligned.data[$id::lanes()..] + ); + for (idx, &b) in aligned.data.iter().enumerate() { + if idx < $id::lanes() { + assert_eq!(b, 0 as $elem_ty); + } else { + assert_eq!(b, 42 as $elem_ty); + assert_eq!( + b, vec.extract(idx - $id::lanes()) + ); + } + } + } + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn write_to_slice_aligned_fail_lanes() { + let mut aligned = A { + data: [0 as $elem_ty; 2 * $id::lanes()], + }; + let vec = $id::splat(42 as $elem_ty); + unsafe { + vec.write_to_slice_aligned( + &mut aligned.data[2 * $id::lanes()..] + ) + }; + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn write_to_slice_aligned_fail_align() { + unsafe { + let mut aligned = A { + data: [0 as $elem_ty; 2 * $id::lanes()], + }; + + // get a pointer to the front of data + let ptr: *mut $elem_ty + = aligned.data.as_mut_ptr() as *mut $elem_ty; + // offset pointer by one element + let ptr = ptr.wrapping_add(1); + + if ptr.align_offset(crate::mem::align_of::<$id>()) + == 0 { + // the pointer is properly aligned, so + // write_to_slice_aligned won't fail here (e.g. + // this can happen for i128x1). So we panic to + // make the "should_fail" test pass: + panic!("ok"); + } + + // create a slice - this is safe, because the + // elements of the slice exist, are properly + // initialized, and properly aligned: + let s: &mut [$elem_ty] + = slice::from_raw_parts_mut(ptr, $id::lanes()); + // this should always panic because the slice + // alignment does not match the alignment + // requirements for the vector type: + let vec = $id::splat(42 as $elem_ty); + vec.write_to_slice_aligned(s); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/swap_bytes.rs b/third_party/rust/packed_simd/src/api/swap_bytes.rs new file mode 100644 index 000000000000..53bba25bd311 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/swap_bytes.rs @@ -0,0 +1,192 @@ +//! Horizontal swap bytes + +macro_rules! impl_swap_bytes { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Reverses the byte order of the vector. + #[inline] + pub fn swap_bytes(self) -> Self { + super::codegen::swap_bytes::SwapBytes::swap_bytes(self) + } + + /// Converts self to little endian from the target's endianness. + /// + /// On little endian this is a no-op. On big endian the bytes are + /// swapped. + #[inline] + pub fn to_le(self) -> Self { + #[cfg(target_endian = "little")] + { + self + } + #[cfg(not(target_endian = "little"))] + { + self.swap_bytes() + } + } + + /// Converts self to big endian from the target's endianness. + /// + /// On big endian this is a no-op. On little endian the bytes are + /// swapped. + #[inline] + pub fn to_be(self) -> Self { + #[cfg(target_endian = "big")] + { + self + } + #[cfg(not(target_endian = "big"))] + { + self.swap_bytes() + } + } + + /// Converts a vector from little endian to the target's endianness. + /// + /// On little endian this is a no-op. On big endian the bytes are + /// swapped. + #[inline] + pub fn from_le(x: Self) -> Self { + #[cfg(target_endian = "little")] + { + x + } + #[cfg(not(target_endian = "little"))] + { + x.swap_bytes() + } + } + + /// Converts a vector from big endian to the target's endianness. + /// + /// On big endian this is a no-op. On little endian the bytes are + /// swapped. + #[inline] + pub fn from_be(x: Self) -> Self { + #[cfg(target_endian = "big")] + { + x + } + #[cfg(not(target_endian = "big"))] + { + x.swap_bytes() + } + } + } + + test_if! { + $test_tt: + paste::item_with_macros! { + pub mod [<$id _swap_bytes>] { + use super::*; + + const BYTES: [u8; 64] = [ + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + ]; + + macro_rules! swap { + ($func: ident) => {{ + // catch possible future >512 vectors + assert!(mem::size_of::<$id>() <= 64); + + let mut actual = BYTES; + let elems: &mut [$elem_ty] = unsafe { + slice::from_raw_parts_mut( + actual.as_mut_ptr() as *mut $elem_ty, + $id::lanes(), + ) + }; + + let vec = $id::from_slice_unaligned(elems); + $id::$func(vec).write_to_slice_unaligned(elems); + + actual + }}; + } + + macro_rules! test_swap { + ($func: ident) => {{ + let actual = swap!($func); + let expected = + BYTES.iter().rev() + .skip(64 - crate::mem::size_of::<$id>()); + assert!(actual.iter().zip(expected) + .all(|(x, y)| x == y)); + }}; + } + + macro_rules! test_no_swap { + ($func: ident) => {{ + let actual = swap!($func); + let expected = BYTES.iter() + .take(mem::size_of::<$id>()); + + assert!(actual.iter().zip(expected) + .all(|(x, y)| x == y)); + }}; + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn swap_bytes() { + test_swap!(swap_bytes); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn to_le() { + #[cfg(target_endian = "little")] + { + test_no_swap!(to_le); + } + #[cfg(not(target_endian = "little"))] + { + test_swap!(to_le); + } + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn to_be() { + #[cfg(target_endian = "big")] + { + test_no_swap!(to_be); + } + #[cfg(not(target_endian = "big"))] + { + test_swap!(to_be); + } + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn from_le() { + #[cfg(target_endian = "little")] + { + test_no_swap!(from_le); + } + #[cfg(not(target_endian = "little"))] + { + test_swap!(from_le); + } + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn from_be() { + #[cfg(target_endian = "big")] + { + test_no_swap!(from_be); + } + #[cfg(not(target_endian = "big"))] + { + test_swap!(from_be); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/codegen.rs b/third_party/rust/packed_simd/src/codegen.rs new file mode 100644 index 000000000000..b7ccd838603f --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen.rs @@ -0,0 +1,59 @@ +//! Code-generation utilities + +crate mod bit_manip; +crate mod llvm; +crate mod math; +crate mod reductions; +crate mod shuffle; +crate mod shuffle1_dyn; +crate mod swap_bytes; + +macro_rules! impl_simd_array { + ([$elem_ty:ident; $elem_count:expr]: + $tuple_id:ident | $($elem_tys:ident),*) => { + #[derive(Copy, Clone)] + #[repr(simd)] + pub struct $tuple_id($(crate $elem_tys),*); + //^^^^^^^ leaked through SimdArray + + impl crate::sealed::SimdArray for [$elem_ty; $elem_count] { + type Tuple = $tuple_id; + type T = $elem_ty; + const N: usize = $elem_count; + type NT = [u32; $elem_count]; + } + + impl crate::sealed::Simd for $tuple_id { + type Element = $elem_ty; + const LANES: usize = $elem_count; + type LanesType = [u32; $elem_count]; + } + + } +} + +crate mod pointer_sized_int; + +crate mod v16; +crate use self::v16::*; + +crate mod v32; +crate use self::v32::*; + +crate mod v64; +crate use self::v64::*; + +crate mod v128; +crate use self::v128::*; + +crate mod v256; +crate use self::v256::*; + +crate mod v512; +crate use self::v512::*; + +crate mod vSize; +crate use self::vSize::*; + +crate mod vPtr; +crate use self::vPtr::*; diff --git a/third_party/rust/packed_simd/src/codegen/bit_manip.rs b/third_party/rust/packed_simd/src/codegen/bit_manip.rs new file mode 100644 index 000000000000..947266f5bce8 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/bit_manip.rs @@ -0,0 +1,354 @@ +//! LLVM bit manipulation intrinsics. +#![rustfmt::skip] + +use crate::*; + +#[allow(improper_ctypes, dead_code)] +extern "C" { + #[link_name = "llvm.ctlz.v2i8"] + fn ctlz_u8x2(x: u8x2, is_zero_undef: bool) -> u8x2; + #[link_name = "llvm.ctlz.v4i8"] + fn ctlz_u8x4(x: u8x4, is_zero_undef: bool) -> u8x4; + #[link_name = "llvm.ctlz.v8i8"] + fn ctlz_u8x8(x: u8x8, is_zero_undef: bool) -> u8x8; + #[link_name = "llvm.ctlz.v16i8"] + fn ctlz_u8x16(x: u8x16, is_zero_undef: bool) -> u8x16; + #[link_name = "llvm.ctlz.v32i8"] + fn ctlz_u8x32(x: u8x32, is_zero_undef: bool) -> u8x32; + #[link_name = "llvm.ctlz.v64i8"] + fn ctlz_u8x64(x: u8x64, is_zero_undef: bool) -> u8x64; + + #[link_name = "llvm.ctlz.v2i16"] + fn ctlz_u16x2(x: u16x2, is_zero_undef: bool) -> u16x2; + #[link_name = "llvm.ctlz.v4i16"] + fn ctlz_u16x4(x: u16x4, is_zero_undef: bool) -> u16x4; + #[link_name = "llvm.ctlz.v8i16"] + fn ctlz_u16x8(x: u16x8, is_zero_undef: bool) -> u16x8; + #[link_name = "llvm.ctlz.v16i16"] + fn ctlz_u16x16(x: u16x16, is_zero_undef: bool) -> u16x16; + #[link_name = "llvm.ctlz.v32i16"] + fn ctlz_u16x32(x: u16x32, is_zero_undef: bool) -> u16x32; + + #[link_name = "llvm.ctlz.v2i32"] + fn ctlz_u32x2(x: u32x2, is_zero_undef: bool) -> u32x2; + #[link_name = "llvm.ctlz.v4i32"] + fn ctlz_u32x4(x: u32x4, is_zero_undef: bool) -> u32x4; + #[link_name = "llvm.ctlz.v8i32"] + fn ctlz_u32x8(x: u32x8, is_zero_undef: bool) -> u32x8; + #[link_name = "llvm.ctlz.v16i32"] + fn ctlz_u32x16(x: u32x16, is_zero_undef: bool) -> u32x16; + + #[link_name = "llvm.ctlz.v2i64"] + fn ctlz_u64x2(x: u64x2, is_zero_undef: bool) -> u64x2; + #[link_name = "llvm.ctlz.v4i64"] + fn ctlz_u64x4(x: u64x4, is_zero_undef: bool) -> u64x4; + #[link_name = "llvm.ctlz.v8i64"] + fn ctlz_u64x8(x: u64x8, is_zero_undef: bool) -> u64x8; + + #[link_name = "llvm.ctlz.v1i128"] + fn ctlz_u128x1(x: u128x1, is_zero_undef: bool) -> u128x1; + #[link_name = "llvm.ctlz.v2i128"] + fn ctlz_u128x2(x: u128x2, is_zero_undef: bool) -> u128x2; + #[link_name = "llvm.ctlz.v4i128"] + fn ctlz_u128x4(x: u128x4, is_zero_undef: bool) -> u128x4; + + #[link_name = "llvm.cttz.v2i8"] + fn cttz_u8x2(x: u8x2, is_zero_undef: bool) -> u8x2; + #[link_name = "llvm.cttz.v4i8"] + fn cttz_u8x4(x: u8x4, is_zero_undef: bool) -> u8x4; + #[link_name = "llvm.cttz.v8i8"] + fn cttz_u8x8(x: u8x8, is_zero_undef: bool) -> u8x8; + #[link_name = "llvm.cttz.v16i8"] + fn cttz_u8x16(x: u8x16, is_zero_undef: bool) -> u8x16; + #[link_name = "llvm.cttz.v32i8"] + fn cttz_u8x32(x: u8x32, is_zero_undef: bool) -> u8x32; + #[link_name = "llvm.cttz.v64i8"] + fn cttz_u8x64(x: u8x64, is_zero_undef: bool) -> u8x64; + + #[link_name = "llvm.cttz.v2i16"] + fn cttz_u16x2(x: u16x2, is_zero_undef: bool) -> u16x2; + #[link_name = "llvm.cttz.v4i16"] + fn cttz_u16x4(x: u16x4, is_zero_undef: bool) -> u16x4; + #[link_name = "llvm.cttz.v8i16"] + fn cttz_u16x8(x: u16x8, is_zero_undef: bool) -> u16x8; + #[link_name = "llvm.cttz.v16i16"] + fn cttz_u16x16(x: u16x16, is_zero_undef: bool) -> u16x16; + #[link_name = "llvm.cttz.v32i16"] + fn cttz_u16x32(x: u16x32, is_zero_undef: bool) -> u16x32; + + #[link_name = "llvm.cttz.v2i32"] + fn cttz_u32x2(x: u32x2, is_zero_undef: bool) -> u32x2; + #[link_name = "llvm.cttz.v4i32"] + fn cttz_u32x4(x: u32x4, is_zero_undef: bool) -> u32x4; + #[link_name = "llvm.cttz.v8i32"] + fn cttz_u32x8(x: u32x8, is_zero_undef: bool) -> u32x8; + #[link_name = "llvm.cttz.v16i32"] + fn cttz_u32x16(x: u32x16, is_zero_undef: bool) -> u32x16; + + #[link_name = "llvm.cttz.v2i64"] + fn cttz_u64x2(x: u64x2, is_zero_undef: bool) -> u64x2; + #[link_name = "llvm.cttz.v4i64"] + fn cttz_u64x4(x: u64x4, is_zero_undef: bool) -> u64x4; + #[link_name = "llvm.cttz.v8i64"] + fn cttz_u64x8(x: u64x8, is_zero_undef: bool) -> u64x8; + + #[link_name = "llvm.cttz.v1i128"] + fn cttz_u128x1(x: u128x1, is_zero_undef: bool) -> u128x1; + #[link_name = "llvm.cttz.v2i128"] + fn cttz_u128x2(x: u128x2, is_zero_undef: bool) -> u128x2; + #[link_name = "llvm.cttz.v4i128"] + fn cttz_u128x4(x: u128x4, is_zero_undef: bool) -> u128x4; + + #[link_name = "llvm.ctpop.v2i8"] + fn ctpop_u8x2(x: u8x2) -> u8x2; + #[link_name = "llvm.ctpop.v4i8"] + fn ctpop_u8x4(x: u8x4) -> u8x4; + #[link_name = "llvm.ctpop.v8i8"] + fn ctpop_u8x8(x: u8x8) -> u8x8; + #[link_name = "llvm.ctpop.v16i8"] + fn ctpop_u8x16(x: u8x16) -> u8x16; + #[link_name = "llvm.ctpop.v32i8"] + fn ctpop_u8x32(x: u8x32) -> u8x32; + #[link_name = "llvm.ctpop.v64i8"] + fn ctpop_u8x64(x: u8x64) -> u8x64; + + #[link_name = "llvm.ctpop.v2i16"] + fn ctpop_u16x2(x: u16x2) -> u16x2; + #[link_name = "llvm.ctpop.v4i16"] + fn ctpop_u16x4(x: u16x4) -> u16x4; + #[link_name = "llvm.ctpop.v8i16"] + fn ctpop_u16x8(x: u16x8) -> u16x8; + #[link_name = "llvm.ctpop.v16i16"] + fn ctpop_u16x16(x: u16x16) -> u16x16; + #[link_name = "llvm.ctpop.v32i16"] + fn ctpop_u16x32(x: u16x32) -> u16x32; + + #[link_name = "llvm.ctpop.v2i32"] + fn ctpop_u32x2(x: u32x2) -> u32x2; + #[link_name = "llvm.ctpop.v4i32"] + fn ctpop_u32x4(x: u32x4) -> u32x4; + #[link_name = "llvm.ctpop.v8i32"] + fn ctpop_u32x8(x: u32x8) -> u32x8; + #[link_name = "llvm.ctpop.v16i32"] + fn ctpop_u32x16(x: u32x16) -> u32x16; + + #[link_name = "llvm.ctpop.v2i64"] + fn ctpop_u64x2(x: u64x2) -> u64x2; + #[link_name = "llvm.ctpop.v4i64"] + fn ctpop_u64x4(x: u64x4) -> u64x4; + #[link_name = "llvm.ctpop.v8i64"] + fn ctpop_u64x8(x: u64x8) -> u64x8; + + #[link_name = "llvm.ctpop.v1i128"] + fn ctpop_u128x1(x: u128x1) -> u128x1; + #[link_name = "llvm.ctpop.v2i128"] + fn ctpop_u128x2(x: u128x2) -> u128x2; + #[link_name = "llvm.ctpop.v4i128"] + fn ctpop_u128x4(x: u128x4) -> u128x4; +} + +crate trait BitManip { + fn ctpop(self) -> Self; + fn ctlz(self) -> Self; + fn cttz(self) -> Self; +} + +macro_rules! impl_bit_manip { + (inner: $ty:ident, $scalar:ty, $uty:ident, + $ctpop:ident, $ctlz:ident, $cttz:ident) => { + // FIXME: several LLVM intrinsics break on s390x https://github.com/rust-lang-nursery/packed_simd/issues/192 + #[cfg(target_arch = "s390x")] + impl_bit_manip! { scalar: $ty, $scalar } + #[cfg(not(target_arch = "s390x"))] + impl BitManip for $ty { + #[inline] + fn ctpop(self) -> Self { + let y: $uty = self.cast(); + unsafe { $ctpop(y).cast() } + } + + #[inline] + fn ctlz(self) -> Self { + let y: $uty = self.cast(); + // the ctxx intrinsics need compile-time constant + // `is_zero_undef` + unsafe { $ctlz(y, false).cast() } + } + + #[inline] + fn cttz(self) -> Self { + let y: $uty = self.cast(); + unsafe { $cttz(y, false).cast() } + } + } + }; + (sized_inner: $ty:ident, $scalar:ty, $uty:ident) => { + #[cfg(target_arch = "s390x")] + impl_bit_manip! { scalar: $ty, $scalar } + #[cfg(not(target_arch = "s390x"))] + impl BitManip for $ty { + #[inline] + fn ctpop(self) -> Self { + let y: $uty = self.cast(); + $uty::ctpop(y).cast() + } + + #[inline] + fn ctlz(self) -> Self { + let y: $uty = self.cast(); + $uty::ctlz(y).cast() + } + + #[inline] + fn cttz(self) -> Self { + let y: $uty = self.cast(); + $uty::cttz(y).cast() + } + } + }; + (scalar: $ty:ident, $scalar:ty) => { + impl BitManip for $ty { + #[inline] + fn ctpop(self) -> Self { + let mut ones = self; + for i in 0..Self::lanes() { + ones = ones + .replace(i, self.extract(i).count_ones() as $scalar); + } + ones + } + + #[inline] + fn ctlz(self) -> Self { + let mut lz = self; + for i in 0..Self::lanes() { + lz = lz.replace( + i, + self.extract(i).leading_zeros() as $scalar, + ); + } + lz + } + + #[inline] + fn cttz(self) -> Self { + let mut tz = self; + for i in 0..Self::lanes() { + tz = tz.replace( + i, + self.extract(i).trailing_zeros() as $scalar, + ); + } + tz + } + } + }; + ($uty:ident, $uscalar:ty, $ity:ident, $iscalar:ty, + $ctpop:ident, $ctlz:ident, $cttz:ident) => { + impl_bit_manip! { inner: $uty, $uscalar, $uty, $ctpop, $ctlz, $cttz } + impl_bit_manip! { inner: $ity, $iscalar, $uty, $ctpop, $ctlz, $cttz } + }; + (sized: $usize:ident, $uscalar:ty, $isize:ident, + $iscalar:ty, $ty:ident) => { + impl_bit_manip! { sized_inner: $usize, $uscalar, $ty } + impl_bit_manip! { sized_inner: $isize, $iscalar, $ty } + }; +} + +impl_bit_manip! { u8x2 , u8, i8x2, i8, ctpop_u8x2, ctlz_u8x2, cttz_u8x2 } +impl_bit_manip! { u8x4 , u8, i8x4, i8, ctpop_u8x4, ctlz_u8x4, cttz_u8x4 } +#[cfg(not(target_arch = "aarch64"))] // see below +impl_bit_manip! { u8x8 , u8, i8x8, i8, ctpop_u8x8, ctlz_u8x8, cttz_u8x8 } +impl_bit_manip! { u8x16 , u8, i8x16, i8, ctpop_u8x16, ctlz_u8x16, cttz_u8x16 } +impl_bit_manip! { u8x32 , u8, i8x32, i8, ctpop_u8x32, ctlz_u8x32, cttz_u8x32 } +impl_bit_manip! { u8x64 , u8, i8x64, i8, ctpop_u8x64, ctlz_u8x64, cttz_u8x64 } +impl_bit_manip! { u16x2 , u16, i16x2, i16, ctpop_u16x2, ctlz_u16x2, cttz_u16x2 } +impl_bit_manip! { u16x4 , u16, i16x4, i16, ctpop_u16x4, ctlz_u16x4, cttz_u16x4 } +impl_bit_manip! { u16x8 , u16, i16x8, i16, ctpop_u16x8, ctlz_u16x8, cttz_u16x8 } +impl_bit_manip! { u16x16 , u16, i16x16, i16, ctpop_u16x16, ctlz_u16x16, cttz_u16x16 } +impl_bit_manip! { u16x32 , u16, i16x32, i16, ctpop_u16x32, ctlz_u16x32, cttz_u16x32 } +impl_bit_manip! { u32x2 , u32, i32x2, i32, ctpop_u32x2, ctlz_u32x2, cttz_u32x2 } +impl_bit_manip! { u32x4 , u32, i32x4, i32, ctpop_u32x4, ctlz_u32x4, cttz_u32x4 } +impl_bit_manip! { u32x8 , u32, i32x8, i32, ctpop_u32x8, ctlz_u32x8, cttz_u32x8 } +impl_bit_manip! { u32x16 , u32, i32x16, i32, ctpop_u32x16, ctlz_u32x16, cttz_u32x16 } +impl_bit_manip! { u64x2 , u64, i64x2, i64, ctpop_u64x2, ctlz_u64x2, cttz_u64x2 } +impl_bit_manip! { u64x4 , u64, i64x4, i64, ctpop_u64x4, ctlz_u64x4, cttz_u64x4 } +impl_bit_manip! { u64x8 , u64, i64x8, i64, ctpop_u64x8, ctlz_u64x8, cttz_u64x8 } +impl_bit_manip! { u128x1 , u128, i128x1, i128, ctpop_u128x1, ctlz_u128x1, cttz_u128x1 } +impl_bit_manip! { u128x2 , u128, i128x2, i128, ctpop_u128x2, ctlz_u128x2, cttz_u128x2 } +impl_bit_manip! { u128x4 , u128, i128x4, i128, ctpop_u128x4, ctlz_u128x4, cttz_u128x4 } + +#[cfg(target_arch = "aarch64")] +impl BitManip for u8x8 { + #[inline] + fn ctpop(self) -> Self { + let y: u8x8 = self.cast(); + unsafe { ctpop_u8x8(y).cast() } + } + + #[inline] + fn ctlz(self) -> Self { + let y: u8x8 = self.cast(); + unsafe { ctlz_u8x8(y, false).cast() } + } + + #[inline] + fn cttz(self) -> Self { + // FIXME: LLVM cttz.v8i8 broken on aarch64 https://github.com/rust-lang-nursery/packed_simd/issues/191 + // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64 + // intrinsics + let mut tz = self; + for i in 0..Self::lanes() { + tz = tz.replace(i, self.extract(i).trailing_zeros() as u8); + } + tz + } +} +#[cfg(target_arch = "aarch64")] +impl BitManip for i8x8 { + #[inline] + fn ctpop(self) -> Self { + let y: u8x8 = self.cast(); + unsafe { ctpop_u8x8(y).cast() } + } + + #[inline] + fn ctlz(self) -> Self { + let y: u8x8 = self.cast(); + unsafe { ctlz_u8x8(y, false).cast() } + } + + #[inline] + fn cttz(self) -> Self { + // FIXME: LLVM cttz.v8i8 broken on aarch64 https://github.com/rust-lang-nursery/packed_simd/issues/191 + // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64 + // intrinsics + let mut tz = self; + for i in 0..Self::lanes() { + tz = tz.replace(i, self.extract(i).trailing_zeros() as i8); + } + tz + } +} + +cfg_if! { + if #[cfg(target_pointer_width = "8")] { + impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u8x2 } + impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u8x4 } + impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u8x8 } + } else if #[cfg(target_pointer_width = "16")] { + impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u16x2 } + impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u16x4 } + impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u16x8 } + } else if #[cfg(target_pointer_width = "32")] { + impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u32x2 } + impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u32x4 } + impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u32x8 } + } else if #[cfg(target_pointer_width = "64")] { + impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u64x2 } + impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u64x4 } + impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u64x8 } + } else { + compile_error!("unsupported target_pointer_width"); + } +} diff --git a/third_party/rust/packed_simd/src/codegen/llvm.rs b/third_party/rust/packed_simd/src/codegen/llvm.rs new file mode 100644 index 000000000000..91c2b0758dcf --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/llvm.rs @@ -0,0 +1,99 @@ +//! LLVM's platform intrinsics +#![allow(dead_code)] + +use crate::sealed::Shuffle; +#[allow(unused_imports)] // FIXME: spurious warning? +use crate::sealed::Simd; + +// Shuffle intrinsics: expanded in users' crates, therefore public. +extern "platform-intrinsic" { + // FIXME: Passing this intrinsics an `idx` array with an index that is + // out-of-bounds will produce a monomorphization-time error. + // https://github.com/rust-lang-nursery/packed_simd/issues/21 + pub fn simd_shuffle2(x: T, y: T, idx: [u32; 2]) -> U + where + T: Simd, + ::Element: Shuffle<[u32; 2], Output = U>; + + pub fn simd_shuffle4(x: T, y: T, idx: [u32; 4]) -> U + where + T: Simd, + ::Element: Shuffle<[u32; 4], Output = U>; + + pub fn simd_shuffle8(x: T, y: T, idx: [u32; 8]) -> U + where + T: Simd, + ::Element: Shuffle<[u32; 8], Output = U>; + + pub fn simd_shuffle16(x: T, y: T, idx: [u32; 16]) -> U + where + T: Simd, + ::Element: Shuffle<[u32; 16], Output = U>; + + pub fn simd_shuffle32(x: T, y: T, idx: [u32; 32]) -> U + where + T: Simd, + ::Element: Shuffle<[u32; 32], Output = U>; + + pub fn simd_shuffle64(x: T, y: T, idx: [u32; 64]) -> U + where + T: Simd, + ::Element: Shuffle<[u32; 64], Output = U>; +} + +pub use self::simd_shuffle16 as __shuffle_vector16; +pub use self::simd_shuffle2 as __shuffle_vector2; +pub use self::simd_shuffle32 as __shuffle_vector32; +pub use self::simd_shuffle4 as __shuffle_vector4; +pub use self::simd_shuffle64 as __shuffle_vector64; +pub use self::simd_shuffle8 as __shuffle_vector8; + +extern "platform-intrinsic" { + crate fn simd_eq(x: T, y: T) -> U; + crate fn simd_ne(x: T, y: T) -> U; + crate fn simd_lt(x: T, y: T) -> U; + crate fn simd_le(x: T, y: T) -> U; + crate fn simd_gt(x: T, y: T) -> U; + crate fn simd_ge(x: T, y: T) -> U; + + crate fn simd_insert(x: T, idx: u32, val: U) -> T; + crate fn simd_extract(x: T, idx: u32) -> U; + + crate fn simd_cast(x: T) -> U; + + crate fn simd_add(x: T, y: T) -> T; + crate fn simd_sub(x: T, y: T) -> T; + crate fn simd_mul(x: T, y: T) -> T; + crate fn simd_div(x: T, y: T) -> T; + crate fn simd_rem(x: T, y: T) -> T; + crate fn simd_shl(x: T, y: T) -> T; + crate fn simd_shr(x: T, y: T) -> T; + crate fn simd_and(x: T, y: T) -> T; + crate fn simd_or(x: T, y: T) -> T; + crate fn simd_xor(x: T, y: T) -> T; + + crate fn simd_reduce_add_unordered(x: T) -> U; + crate fn simd_reduce_mul_unordered(x: T) -> U; + crate fn simd_reduce_add_ordered(x: T, acc: U) -> U; + crate fn simd_reduce_mul_ordered(x: T, acc: U) -> U; + crate fn simd_reduce_min(x: T) -> U; + crate fn simd_reduce_max(x: T) -> U; + crate fn simd_reduce_min_nanless(x: T) -> U; + crate fn simd_reduce_max_nanless(x: T) -> U; + crate fn simd_reduce_and(x: T) -> U; + crate fn simd_reduce_or(x: T) -> U; + crate fn simd_reduce_xor(x: T) -> U; + crate fn simd_reduce_all(x: T) -> bool; + crate fn simd_reduce_any(x: T) -> bool; + + crate fn simd_select(m: M, a: T, b: T) -> T; + + crate fn simd_fmin(a: T, b: T) -> T; + crate fn simd_fmax(a: T, b: T) -> T; + + crate fn simd_fsqrt(a: T) -> T; + crate fn simd_fma(a: T, b: T, c: T) -> T; + + crate fn simd_gather(value: T, pointers: P, mask: M) -> T; + crate fn simd_scatter(value: T, pointers: P, mask: M); +} diff --git a/third_party/rust/packed_simd/src/codegen/math.rs b/third_party/rust/packed_simd/src/codegen/math.rs new file mode 100644 index 000000000000..f3997c7f1135 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/math.rs @@ -0,0 +1,3 @@ +//! Vertical math operations + +crate mod float; diff --git a/third_party/rust/packed_simd/src/codegen/math/float.rs b/third_party/rust/packed_simd/src/codegen/math/float.rs new file mode 100644 index 000000000000..5e89bf6ae6b0 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/math/float.rs @@ -0,0 +1,18 @@ +//! Vertical floating-point math operations. +#![allow(clippy::useless_transmute)] + +#[macro_use] +crate mod macros; +crate mod abs; +crate mod cos; +crate mod cos_pi; +crate mod exp; +crate mod ln; +crate mod mul_add; +crate mod mul_adde; +crate mod powf; +crate mod sin; +crate mod sin_cos_pi; +crate mod sin_pi; +crate mod sqrt; +crate mod sqrte; diff --git a/third_party/rust/packed_simd/src/codegen/math/float/abs.rs b/third_party/rust/packed_simd/src/codegen/math/float/abs.rs new file mode 100644 index 000000000000..bc4421f61de2 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/math/float/abs.rs @@ -0,0 +1,103 @@ +//! Vertical floating-point `fabs` +#![allow(unused)] + +// FIXME 64-bit 1 elem vectors fabs + +use crate::*; + +crate trait Abs { + fn abs(self) -> Self; +} + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.fabs.v2f32"] + fn fabs_v2f32(x: f32x2) -> f32x2; + #[link_name = "llvm.fabs.v4f32"] + fn fabs_v4f32(x: f32x4) -> f32x4; + #[link_name = "llvm.fabs.v8f32"] + fn fabs_v8f32(x: f32x8) -> f32x8; + #[link_name = "llvm.fabs.v16f32"] + fn fabs_v16f32(x: f32x16) -> f32x16; + /* FIXME 64-bit fabsgle elem vectors + #[link_name = "llvm.fabs.v1f64"] + fn fabs_v1f64(x: f64x1) -> f64x1; + */ + #[link_name = "llvm.fabs.v2f64"] + fn fabs_v2f64(x: f64x2) -> f64x2; + #[link_name = "llvm.fabs.v4f64"] + fn fabs_v4f64(x: f64x4) -> f64x4; + #[link_name = "llvm.fabs.v8f64"] + fn fabs_v8f64(x: f64x8) -> f64x8; + + #[link_name = "llvm.fabs.f32"] + fn fabs_f32(x: f32) -> f32; + #[link_name = "llvm.fabs.f64"] + fn fabs_f64(x: f64) -> f64; +} + +gen_unary_impl_table!(Abs, abs); + +cfg_if! { + if #[cfg(target_arch = "s390x")] { + // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 + impl_unary!(f32x2[f32; 2]: fabs_f32); + impl_unary!(f32x4[f32; 4]: fabs_f32); + impl_unary!(f32x8[f32; 8]: fabs_f32); + impl_unary!(f32x16[f32; 16]: fabs_f32); + + impl_unary!(f64x2[f64; 2]: fabs_f64); + impl_unary!(f64x4[f64; 4]: fabs_f64); + impl_unary!(f64x8[f64; 8]: fabs_f64); + } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { + use sleef_sys::*; + cfg_if! { + if #[cfg(target_feature = "avx2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_avx2128); + impl_unary!(f32x16[h => f32x8]: Sleef_fabsf8_avx2); + impl_unary!(f64x8[h => f64x4]: Sleef_fabsd4_avx2); + + impl_unary!(f32x4: Sleef_fabsf4_avx2128); + impl_unary!(f32x8: Sleef_fabsf8_avx2); + impl_unary!(f64x2: Sleef_fabsd2_avx2128); + impl_unary!(f64x4: Sleef_fabsd4_avx2); + } else if #[cfg(target_feature = "avx")] { + impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_sse4); + impl_unary!(f32x16[h => f32x8]: Sleef_fabsf8_avx); + impl_unary!(f64x8[h => f64x4]: Sleef_fabsd4_avx); + + impl_unary!(f32x4: Sleef_fabsf4_sse4); + impl_unary!(f32x8: Sleef_fabsf8_avx); + impl_unary!(f64x2: Sleef_fabsd2_sse4); + impl_unary!(f64x4: Sleef_fabsd4_avx); + } else if #[cfg(target_feature = "sse4.2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_sse4); + impl_unary!(f32x16[q => f32x4]: Sleef_fabsf4_sse4); + impl_unary!(f64x8[q => f64x2]: Sleef_fabsd2_sse4); + + impl_unary!(f32x4: Sleef_fabsf4_sse4); + impl_unary!(f32x8[h => f32x4]: Sleef_fabsf4_sse4); + impl_unary!(f64x2: Sleef_fabsd2_sse4); + impl_unary!(f64x4[h => f64x2]: Sleef_fabsd2_sse4); + } else { + impl_unary!(f32x2[f32; 2]: fabs_f32); + impl_unary!(f32x16: fabs_v16f32); + impl_unary!(f64x8: fabs_v8f64); + + impl_unary!(f32x4: fabs_v4f32); + impl_unary!(f32x8: fabs_v8f32); + impl_unary!(f64x2: fabs_v2f64); + impl_unary!(f64x4: fabs_v4f64); + } + } + } else { + impl_unary!(f32x2[f32; 2]: fabs_f32); + impl_unary!(f32x4: fabs_v4f32); + impl_unary!(f32x8: fabs_v8f32); + impl_unary!(f32x16: fabs_v16f32); + + impl_unary!(f64x2: fabs_v2f64); + impl_unary!(f64x4: fabs_v4f64); + impl_unary!(f64x8: fabs_v8f64); + } +} diff --git a/third_party/rust/packed_simd/src/codegen/math/float/cos.rs b/third_party/rust/packed_simd/src/codegen/math/float/cos.rs new file mode 100644 index 000000000000..50f6c16da255 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/math/float/cos.rs @@ -0,0 +1,103 @@ +//! Vertical floating-point `cos` +#![allow(unused)] + +// FIXME 64-bit 1 elem vector cos + +use crate::*; + +crate trait Cos { + fn cos(self) -> Self; +} + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.cos.v2f32"] + fn cos_v2f32(x: f32x2) -> f32x2; + #[link_name = "llvm.cos.v4f32"] + fn cos_v4f32(x: f32x4) -> f32x4; + #[link_name = "llvm.cos.v8f32"] + fn cos_v8f32(x: f32x8) -> f32x8; + #[link_name = "llvm.cos.v16f32"] + fn cos_v16f32(x: f32x16) -> f32x16; + /* FIXME 64-bit cosgle elem vectors + #[link_name = "llvm.cos.v1f64"] + fn cos_v1f64(x: f64x1) -> f64x1; + */ + #[link_name = "llvm.cos.v2f64"] + fn cos_v2f64(x: f64x2) -> f64x2; + #[link_name = "llvm.cos.v4f64"] + fn cos_v4f64(x: f64x4) -> f64x4; + #[link_name = "llvm.cos.v8f64"] + fn cos_v8f64(x: f64x8) -> f64x8; + + #[link_name = "llvm.cos.f32"] + fn cos_f32(x: f32) -> f32; + #[link_name = "llvm.cos.f64"] + fn cos_f64(x: f64) -> f64; +} + +gen_unary_impl_table!(Cos, cos); + +cfg_if! { + if #[cfg(target_arch = "s390x")] { + // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 + impl_unary!(f32x2[f32; 2]: cos_f32); + impl_unary!(f32x4[f32; 4]: cos_f32); + impl_unary!(f32x8[f32; 8]: cos_f32); + impl_unary!(f32x16[f32; 16]: cos_f32); + + impl_unary!(f64x2[f64; 2]: cos_f64); + impl_unary!(f64x4[f64; 4]: cos_f64); + impl_unary!(f64x8[f64; 8]: cos_f64); + } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { + use sleef_sys::*; + cfg_if! { + if #[cfg(target_feature = "avx2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10avx2128); + impl_unary!(f32x16[h => f32x8]: Sleef_cosf8_u10avx2); + impl_unary!(f64x8[h => f64x4]: Sleef_cosd4_u10avx2); + + impl_unary!(f32x4: Sleef_cosf4_u10avx2128); + impl_unary!(f32x8: Sleef_cosf8_u10avx2); + impl_unary!(f64x2: Sleef_cosd2_u10avx2128); + impl_unary!(f64x4: Sleef_cosd4_u10avx2); + } else if #[cfg(target_feature = "avx")] { + impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10sse4); + impl_unary!(f32x16[h => f32x8]: Sleef_cosf8_u10avx); + impl_unary!(f64x8[h => f64x4]: Sleef_cosd4_u10avx); + + impl_unary!(f32x4: Sleef_cosf4_u10sse4); + impl_unary!(f32x8: Sleef_cosf8_u10avx); + impl_unary!(f64x2: Sleef_cosd2_u10sse4); + impl_unary!(f64x4: Sleef_cosd4_u10avx); + } else if #[cfg(target_feature = "sse4.2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10sse4); + impl_unary!(f32x16[q => f32x4]: Sleef_cosf4_u10sse4); + impl_unary!(f64x8[q => f64x2]: Sleef_cosd2_u10sse4); + + impl_unary!(f32x4: Sleef_cosf4_u10sse4); + impl_unary!(f32x8[h => f32x4]: Sleef_cosf4_u10sse4); + impl_unary!(f64x2: Sleef_cosd2_u10sse4); + impl_unary!(f64x4[h => f64x2]: Sleef_cosd2_u10sse4); + } else { + impl_unary!(f32x2[f32; 2]: cos_f32); + impl_unary!(f32x16: cos_v16f32); + impl_unary!(f64x8: cos_v8f64); + + impl_unary!(f32x4: cos_v4f32); + impl_unary!(f32x8: cos_v8f32); + impl_unary!(f64x2: cos_v2f64); + impl_unary!(f64x4: cos_v4f64); + } + } + } else { + impl_unary!(f32x2[f32; 2]: cos_f32); + impl_unary!(f32x4: cos_v4f32); + impl_unary!(f32x8: cos_v8f32); + impl_unary!(f32x16: cos_v16f32); + + impl_unary!(f64x2: cos_v2f64); + impl_unary!(f64x4: cos_v4f64); + impl_unary!(f64x8: cos_v8f64); + } +} diff --git a/third_party/rust/packed_simd/src/codegen/math/float/cos_pi.rs b/third_party/rust/packed_simd/src/codegen/math/float/cos_pi.rs new file mode 100644 index 000000000000..ebff5fd1c751 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/math/float/cos_pi.rs @@ -0,0 +1,87 @@ +//! Vertical floating-point `cos` +#![allow(unused)] + +// FIXME 64-bit 1 elem vectors cos_pi + +use crate::*; + +crate trait CosPi { + fn cos_pi(self) -> Self; +} + +gen_unary_impl_table!(CosPi, cos_pi); + +macro_rules! impl_def { + ($vid:ident, $PI:path) => { + impl CosPi for $vid { + #[inline] + fn cos_pi(self) -> Self { + (self * Self::splat($PI)).cos() + } + } + }; +} +macro_rules! impl_def32 { + ($vid:ident) => { + impl_def!($vid, crate::f32::consts::PI); + }; +} +macro_rules! impl_def64 { + ($vid:ident) => { + impl_def!($vid, crate::f64::consts::PI); + }; +} + +cfg_if! { + if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { + use sleef_sys::*; + cfg_if! { + if #[cfg(target_feature = "avx2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05avx2128); + impl_unary!(f32x16[h => f32x8]: Sleef_cospif8_u05avx2); + impl_unary!(f64x8[h => f64x4]: Sleef_cospid4_u05avx2); + + impl_unary!(f32x4: Sleef_cospif4_u05avx2128); + impl_unary!(f32x8: Sleef_cospif8_u05avx2); + impl_unary!(f64x2: Sleef_cospid2_u05avx2128); + impl_unary!(f64x4: Sleef_cospid4_u05avx2); + } else if #[cfg(target_feature = "avx")] { + impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05sse4); + impl_unary!(f32x16[h => f32x8]: Sleef_cospif8_u05avx); + impl_unary!(f64x8[h => f64x4]: Sleef_cospid4_u05avx); + + impl_unary!(f32x4: Sleef_cospif4_u05sse4); + impl_unary!(f32x8: Sleef_cospif8_u05avx); + impl_unary!(f64x2: Sleef_cospid2_u05sse4); + impl_unary!(f64x4: Sleef_cospid4_u05avx); + } else if #[cfg(target_feature = "sse4.2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05sse4); + impl_unary!(f32x16[q => f32x4]: Sleef_cospif4_u05sse4); + impl_unary!(f64x8[q => f64x2]: Sleef_cospid2_u05sse4); + + impl_unary!(f32x4: Sleef_cospif4_u05sse4); + impl_unary!(f32x8[h => f32x4]: Sleef_cospif4_u05sse4); + impl_unary!(f64x2: Sleef_cospid2_u05sse4); + impl_unary!(f64x4[h => f64x2]: Sleef_cospid2_u05sse4); + } else { + impl_def32!(f32x2); + impl_def32!(f32x4); + impl_def32!(f32x8); + impl_def32!(f32x16); + + impl_def64!(f64x2); + impl_def64!(f64x4); + impl_def64!(f64x8); + } + } + } else { + impl_def32!(f32x2); + impl_def32!(f32x4); + impl_def32!(f32x8); + impl_def32!(f32x16); + + impl_def64!(f64x2); + impl_def64!(f64x4); + impl_def64!(f64x8); + } +} diff --git a/third_party/rust/packed_simd/src/codegen/math/float/exp.rs b/third_party/rust/packed_simd/src/codegen/math/float/exp.rs new file mode 100644 index 000000000000..00d10e9fa644 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/math/float/exp.rs @@ -0,0 +1,112 @@ +//! Vertical floating-point `exp` +#![allow(unused)] + +// FIXME 64-bit expgle elem vectors misexpg + +use crate::*; + +crate trait Exp { + fn exp(self) -> Self; +} + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.exp.v2f32"] + fn exp_v2f32(x: f32x2) -> f32x2; + #[link_name = "llvm.exp.v4f32"] + fn exp_v4f32(x: f32x4) -> f32x4; + #[link_name = "llvm.exp.v8f32"] + fn exp_v8f32(x: f32x8) -> f32x8; + #[link_name = "llvm.exp.v16f32"] + fn exp_v16f32(x: f32x16) -> f32x16; + /* FIXME 64-bit expgle elem vectors + #[link_name = "llvm.exp.v1f64"] + fn exp_v1f64(x: f64x1) -> f64x1; + */ + #[link_name = "llvm.exp.v2f64"] + fn exp_v2f64(x: f64x2) -> f64x2; + #[link_name = "llvm.exp.v4f64"] + fn exp_v4f64(x: f64x4) -> f64x4; + #[link_name = "llvm.exp.v8f64"] + fn exp_v8f64(x: f64x8) -> f64x8; + + #[link_name = "llvm.exp.f32"] + fn exp_f32(x: f32) -> f32; + #[link_name = "llvm.exp.f64"] + fn exp_f64(x: f64) -> f64; +} + +gen_unary_impl_table!(Exp, exp); + +cfg_if! { + if #[cfg(target_arch = "s390x")] { + // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 + impl_unary!(f32x2[f32; 2]: exp_f32); + impl_unary!(f32x4[f32; 4]: exp_f32); + impl_unary!(f32x8[f32; 8]: exp_f32); + impl_unary!(f32x16[f32; 16]: exp_f32); + + impl_unary!(f64x2[f64; 2]: exp_f64); + impl_unary!(f64x4[f64; 4]: exp_f64); + impl_unary!(f64x8[f64; 8]: exp_f64); + } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { + use sleef_sys::*; + cfg_if! { + if #[cfg(target_feature = "avx2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10avx2128); + impl_unary!(f32x16[h => f32x8]: Sleef_expf8_u10avx2); + impl_unary!(f64x8[h => f64x4]: Sleef_expd4_u10avx2); + + impl_unary!(f32x4: Sleef_expf4_u10avx2128); + impl_unary!(f32x8: Sleef_expf8_u10avx2); + impl_unary!(f64x2: Sleef_expd2_u10avx2128); + impl_unary!(f64x4: Sleef_expd4_u10avx2); + } else if #[cfg(target_feature = "avx")] { + impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse4); + impl_unary!(f32x16[h => f32x8]: Sleef_expf8_u10avx); + impl_unary!(f64x8[h => f64x4]: Sleef_expd4_u10avx); + + impl_unary!(f32x4: Sleef_expf4_u10sse4); + impl_unary!(f32x8: Sleef_expf8_u10avx); + impl_unary!(f64x2: Sleef_expd2_u10sse4); + impl_unary!(f64x4: Sleef_expd4_u10avx); + } else if #[cfg(target_feature = "sse4.2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse4); + impl_unary!(f32x16[q => f32x4]: Sleef_expf4_u10sse4); + impl_unary!(f64x8[q => f64x2]: Sleef_expd2_u10sse4); + + impl_unary!(f32x4: Sleef_expf4_u10sse4); + impl_unary!(f32x8[h => f32x4]: Sleef_expf4_u10sse4); + impl_unary!(f64x2: Sleef_expd2_u10sse4); + impl_unary!(f64x4[h => f64x2]: Sleef_expd2_u10sse4); + } else if #[cfg(target_feature = "sse2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse2); + impl_unary!(f32x16[q => f32x4]: Sleef_expf4_u10sse2); + impl_unary!(f64x8[q => f64x2]: Sleef_expd2_u10sse2); + + impl_unary!(f32x4: Sleef_expf4_u10sse2); + impl_unary!(f32x8[h => f32x4]: Sleef_expf4_u10sse2); + impl_unary!(f64x2: Sleef_expd2_u10sse2); + impl_unary!(f64x4[h => f64x2]: Sleef_expd2_u10sse2); + } else { + impl_unary!(f32x2[f32; 2]: exp_f32); + impl_unary!(f32x16: exp_v16f32); + impl_unary!(f64x8: exp_v8f64); + + impl_unary!(f32x4: exp_v4f32); + impl_unary!(f32x8: exp_v8f32); + impl_unary!(f64x2: exp_v2f64); + impl_unary!(f64x4: exp_v4f64); + } + } + } else { + impl_unary!(f32x2[f32; 2]: exp_f32); + impl_unary!(f32x4: exp_v4f32); + impl_unary!(f32x8: exp_v8f32); + impl_unary!(f32x16: exp_v16f32); + + impl_unary!(f64x2: exp_v2f64); + impl_unary!(f64x4: exp_v4f64); + impl_unary!(f64x8: exp_v8f64); + } +} diff --git a/third_party/rust/packed_simd/src/codegen/math/float/ln.rs b/third_party/rust/packed_simd/src/codegen/math/float/ln.rs new file mode 100644 index 000000000000..88a5a6c6c158 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/math/float/ln.rs @@ -0,0 +1,112 @@ +//! Vertical floating-point `ln` +#![allow(unused)] + +// FIXME 64-bit lngle elem vectors mislng + +use crate::*; + +crate trait Ln { + fn ln(self) -> Self; +} + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.log.v2f32"] + fn ln_v2f32(x: f32x2) -> f32x2; + #[link_name = "llvm.log.v4f32"] + fn ln_v4f32(x: f32x4) -> f32x4; + #[link_name = "llvm.log.v8f32"] + fn ln_v8f32(x: f32x8) -> f32x8; + #[link_name = "llvm.log.v16f32"] + fn ln_v16f32(x: f32x16) -> f32x16; + /* FIXME 64-bit lngle elem vectors + #[link_name = "llvm.log.v1f64"] + fn ln_v1f64(x: f64x1) -> f64x1; + */ + #[link_name = "llvm.log.v2f64"] + fn ln_v2f64(x: f64x2) -> f64x2; + #[link_name = "llvm.log.v4f64"] + fn ln_v4f64(x: f64x4) -> f64x4; + #[link_name = "llvm.log.v8f64"] + fn ln_v8f64(x: f64x8) -> f64x8; + + #[link_name = "llvm.log.f32"] + fn ln_f32(x: f32) -> f32; + #[link_name = "llvm.log.f64"] + fn ln_f64(x: f64) -> f64; +} + +gen_unary_impl_table!(Ln, ln); + +cfg_if! { + if #[cfg(target_arch = "s390x")] { + // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 + impl_unary!(f32x2[f32; 2]: ln_f32); + impl_unary!(f32x4[f32; 4]: ln_f32); + impl_unary!(f32x8[f32; 8]: ln_f32); + impl_unary!(f32x16[f32; 16]: ln_f32); + + impl_unary!(f64x2[f64; 2]: ln_f64); + impl_unary!(f64x4[f64; 4]: ln_f64); + impl_unary!(f64x8[f64; 8]: ln_f64); + } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { + use sleef_sys::*; + cfg_if! { + if #[cfg(target_feature = "avx2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10avx2128); + impl_unary!(f32x16[h => f32x8]: Sleef_logf8_u10avx2); + impl_unary!(f64x8[h => f64x4]: Sleef_logd4_u10avx2); + + impl_unary!(f32x4: Sleef_logf4_u10avx2128); + impl_unary!(f32x8: Sleef_logf8_u10avx2); + impl_unary!(f64x2: Sleef_logd2_u10avx2128); + impl_unary!(f64x4: Sleef_logd4_u10avx2); + } else if #[cfg(target_feature = "avx")] { + impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse4); + impl_unary!(f32x16[h => f32x8]: Sleef_logf8_u10avx); + impl_unary!(f64x8[h => f64x4]: Sleef_logd4_u10avx); + + impl_unary!(f32x4: Sleef_logf4_u10sse4); + impl_unary!(f32x8: Sleef_logf8_u10avx); + impl_unary!(f64x2: Sleef_logd2_u10sse4); + impl_unary!(f64x4: Sleef_logd4_u10avx); + } else if #[cfg(target_feature = "sse4.2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse4); + impl_unary!(f32x16[q => f32x4]: Sleef_logf4_u10sse4); + impl_unary!(f64x8[q => f64x2]: Sleef_logd2_u10sse4); + + impl_unary!(f32x4: Sleef_logf4_u10sse4); + impl_unary!(f32x8[h => f32x4]: Sleef_logf4_u10sse4); + impl_unary!(f64x2: Sleef_logd2_u10sse4); + impl_unary!(f64x4[h => f64x2]: Sleef_logd2_u10sse4); + } else if #[cfg(target_feature = "sse2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse2); + impl_unary!(f32x16[q => f32x4]: Sleef_logf4_u10sse2); + impl_unary!(f64x8[q => f64x2]: Sleef_logd2_u10sse2); + + impl_unary!(f32x4: Sleef_logf4_u10sse2); + impl_unary!(f32x8[h => f32x4]: Sleef_logf4_u10sse2); + impl_unary!(f64x2: Sleef_logd2_u10sse2); + impl_unary!(f64x4[h => f64x2]: Sleef_logd2_u10sse2); + } else { + impl_unary!(f32x2[f32; 2]: ln_f32); + impl_unary!(f32x16: ln_v16f32); + impl_unary!(f64x8: ln_v8f64); + + impl_unary!(f32x4: ln_v4f32); + impl_unary!(f32x8: ln_v8f32); + impl_unary!(f64x2: ln_v2f64); + impl_unary!(f64x4: ln_v4f64); + } + } + } else { + impl_unary!(f32x2[f32; 2]: ln_f32); + impl_unary!(f32x4: ln_v4f32); + impl_unary!(f32x8: ln_v8f32); + impl_unary!(f32x16: ln_v16f32); + + impl_unary!(f64x2: ln_v2f64); + impl_unary!(f64x4: ln_v4f64); + impl_unary!(f64x8: ln_v8f64); + } +} diff --git a/third_party/rust/packed_simd/src/codegen/math/float/macros.rs b/third_party/rust/packed_simd/src/codegen/math/float/macros.rs new file mode 100644 index 000000000000..02d0ca3f5c7a --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/math/float/macros.rs @@ -0,0 +1,559 @@ +//! Utility macros +#![allow(unused)] + + +macro_rules! impl_unary_ { + // implementation mapping 1:1 + (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self) -> Self { + unsafe { + use crate::mem::transmute; + transmute($fun(transmute(self))) + } + } + } + }; + // implementation mapping 1:1 for when `$fun` is a generic function + // like some of the fp math rustc intrinsics (e.g. `fn fun(x: T) -> T`). + (gen | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self) -> Self { + unsafe { + use crate::mem::transmute; + transmute($fun(self.0)) + } + } + } + }; + (scalar | $trait_id:ident, $trait_method:ident, + $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self) -> Self { + unsafe { + union U { + vec: $vec_id, + scalars: [$sid; $scount], + } + let mut scalars = U { vec: self }.scalars; + for i in &mut scalars { + *i = $fun(*i); + } + U { scalars }.vec + } + } + } + }; + // implementation calling fun twice on each of the vector halves: + (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $vech_id:ident, $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self) -> Self { + unsafe { + use crate::mem::transmute; + union U { + vec: $vec_id, + halves: [$vech_id; 2], + } + + let mut halves = U { vec: self }.halves; + + *halves.get_unchecked_mut(0) = + transmute($fun(transmute(*halves.get_unchecked(0)))); + *halves.get_unchecked_mut(1) = + transmute($fun(transmute(*halves.get_unchecked(1)))); + + U { halves }.vec + } + } + } + }; + // implementation calling fun four times on each of the vector quarters: + (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $vecq_id:ident, $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self) -> Self { + unsafe { + use crate::mem::transmute; + union U { + vec: $vec_id, + quarters: [$vecq_id; 4], + } + + let mut quarters = U { vec: self }.quarters; + + *quarters.get_unchecked_mut(0) = + transmute($fun(transmute(*quarters.get_unchecked(0)))); + *quarters.get_unchecked_mut(1) = + transmute($fun(transmute(*quarters.get_unchecked(1)))); + *quarters.get_unchecked_mut(2) = + transmute($fun(transmute(*quarters.get_unchecked(2)))); + *quarters.get_unchecked_mut(3) = + transmute($fun(transmute(*quarters.get_unchecked(3)))); + + U { quarters }.vec + } + } + } + }; + // implementation calling fun once on a vector twice as large: + (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $vect_id:ident, $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self) -> Self { + unsafe { + use crate::mem::{transmute, uninitialized}; + + union U { + vec: [$vec_id; 2], + twice: $vect_id, + } + + let twice = U { vec: [self, uninitialized()] }.twice; + let twice = transmute($fun(transmute(twice))); + + *(U { twice }.vec.get_unchecked(0)) + } + } + } + }; +} + +macro_rules! gen_unary_impl_table { + ($trait_id:ident, $trait_method:ident) => { + macro_rules! impl_unary { + ($vid:ident: $fun:ident) => { + impl_unary_!(vec | $trait_id, $trait_method, $vid, $fun); + }; + ($vid:ident[g]: $fun:ident) => { + impl_unary_!(gen | $trait_id, $trait_method, $vid, $fun); + }; + ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => { + impl_unary_!( + scalar | $trait_id, + $trait_method, + $vid, + [$sid; $sc], + $fun + ); + }; + ($vid:ident[s]: $fun:ident) => { + impl_unary_!(scalar | $trait_id, $trait_method, $vid, $fun); + }; + ($vid:ident[h => $vid_h:ident]: $fun:ident) => { + impl_unary_!( + halves | $trait_id, + $trait_method, + $vid, + $vid_h, + $fun + ); + }; + ($vid:ident[q => $vid_q:ident]: $fun:ident) => { + impl_unary_!( + quarter | $trait_id, + $trait_method, + $vid, + $vid_q, + $fun + ); + }; + ($vid:ident[t => $vid_t:ident]: $fun:ident) => { + impl_unary_!( + twice | $trait_id, + $trait_method, + $vid, + $vid_t, + $fun + ); + }; + } + }; +} + +macro_rules! impl_tertiary_ { + // implementation mapping 1:1 + (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self, y: Self, z: Self) -> Self { + unsafe { + use crate::mem::transmute; + transmute($fun( + transmute(self), + transmute(y), + transmute(z), + )) + } + } + } + }; + (scalar | $trait_id:ident, $trait_method:ident, + $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self, y: Self, z: Self) -> Self { + unsafe { + union U { + vec: $vec_id, + scalars: [$sid; $scount], + } + let mut x = U { vec: self }.scalars; + let y = U { vec: y }.scalars; + let z = U { vec: z }.scalars; + for (x, (y, z)) in (&mut scalars).zip(&y).zip(&z) { + *i = $fun(*i, *y, *z); + } + U { vec: x }.vec + } + } + } + }; + // implementation calling fun twice on each of the vector halves: + (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $vech_id:ident, $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self, y: Self, z: Self) -> Self { + unsafe { + use crate::mem::transmute; + union U { + vec: $vec_id, + halves: [$vech_id; 2], + } + + let mut x_halves = U { vec: self }.halves; + let y_halves = U { vec: y }.halves; + let z_halves = U { vec: z }.halves; + + *x_halves.get_unchecked_mut(0) = transmute($fun( + transmute(*x_halves.get_unchecked(0)), + transmute(*y_halves.get_unchecked(0)), + transmute(*z_halves.get_unchecked(0)), + )); + *x_halves.get_unchecked_mut(1) = transmute($fun( + transmute(*x_halves.get_unchecked(1)), + transmute(*y_halves.get_unchecked(1)), + transmute(*z_halves.get_unchecked(1)), + )); + + U { halves: x_halves }.vec + } + } + } + }; + // implementation calling fun four times on each of the vector quarters: + (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $vecq_id:ident, $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self, y: Self, z: Self) -> Self { + unsafe { + use crate::mem::transmute; + union U { + vec: $vec_id, + quarters: [$vecq_id; 4], + } + + let mut x_quarters = U { vec: self }.quarters; + let y_quarters = U { vec: y }.quarters; + let z_quarters = U { vec: z }.quarters; + + *x_quarters.get_unchecked_mut(0) = transmute($fun( + transmute(*x_quarters.get_unchecked(0)), + transmute(*y_quarters.get_unchecked(0)), + transmute(*z_quarters.get_unchecked(0)), + )); + + *x_quarters.get_unchecked_mut(1) = transmute($fun( + transmute(*x_quarters.get_unchecked(1)), + transmute(*y_quarters.get_unchecked(1)), + transmute(*z_quarters.get_unchecked(1)), + )); + + *x_quarters.get_unchecked_mut(2) = transmute($fun( + transmute(*x_quarters.get_unchecked(2)), + transmute(*y_quarters.get_unchecked(2)), + transmute(*z_quarters.get_unchecked(2)), + )); + + *x_quarters.get_unchecked_mut(3) = transmute($fun( + transmute(*x_quarters.get_unchecked(3)), + transmute(*y_quarters.get_unchecked(3)), + transmute(*z_quarters.get_unchecked(3)), + )); + + U { quarters: x_quarters }.vec + } + } + } + }; + // implementation calling fun once on a vector twice as large: + (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $vect_id:ident, $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self, y: Self, z: Self) -> Self { + unsafe { + use crate::mem::{transmute, uninitialized}; + + union U { + vec: [$vec_id; 2], + twice: $vect_id, + } + + let x_twice = U { vec: [self, uninitialized()] }.twice; + let y_twice = U { vec: [y, uninitialized()] }.twice; + let z_twice = U { vec: [z, uninitialized()] }.twice; + let twice: $vect_id = transmute($fun( + transmute(x_twice), + transmute(y_twice), + transmute(z_twice), + )); + + *(U { twice }.vec.get_unchecked(0)) + } + } + } + }; +} + +macro_rules! gen_tertiary_impl_table { + ($trait_id:ident, $trait_method:ident) => { + macro_rules! impl_tertiary { + ($vid:ident: $fun:ident) => { + impl_tertiary_!(vec | $trait_id, $trait_method, $vid, $fun); + }; + ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => { + impl_tertiary_!( + scalar | $trait_id, + $trait_method, + $vid, + [$sid; $sc], + $fun + ); + }; + ($vid:ident[s]: $fun:ident) => { + impl_tertiary_!(scalar | $trait_id, $trait_method, $vid, $fun); + }; + ($vid:ident[h => $vid_h:ident]: $fun:ident) => { + impl_tertiary_!( + halves | $trait_id, + $trait_method, + $vid, + $vid_h, + $fun + ); + }; + ($vid:ident[q => $vid_q:ident]: $fun:ident) => { + impl_tertiary_!( + quarter | $trait_id, + $trait_method, + $vid, + $vid_q, + $fun + ); + }; + ($vid:ident[t => $vid_t:ident]: $fun:ident) => { + impl_tertiary_!( + twice | $trait_id, + $trait_method, + $vid, + $vid_t, + $fun + ); + }; + } + }; +} + +macro_rules! impl_binary_ { + // implementation mapping 1:1 + (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self, y: Self) -> Self { + unsafe { + use crate::mem::transmute; + transmute($fun(transmute(self), transmute(y))) + } + } + } + }; + (scalar | $trait_id:ident, $trait_method:ident, + $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self, y: Self) -> Self { + unsafe { + union U { + vec: $vec_id, + scalars: [$sid; $scount], + } + let mut x = U { vec: self }.scalars; + let y = U { vec: y }.scalars; + for (x, y) in x.iter_mut().zip(&y) { + *x = $fun(*x, *y); + } + U { scalars: x }.vec + } + } + } + }; + // implementation calling fun twice on each of the vector halves: + (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $vech_id:ident, $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self, y: Self) -> Self { + unsafe { + use crate::mem::transmute; + union U { + vec: $vec_id, + halves: [$vech_id; 2], + } + + let mut x_halves = U { vec: self }.halves; + let y_halves = U { vec: y }.halves; + + *x_halves.get_unchecked_mut(0) = transmute($fun( + transmute(*x_halves.get_unchecked(0)), + transmute(*y_halves.get_unchecked(0)), + )); + *x_halves.get_unchecked_mut(1) = transmute($fun( + transmute(*x_halves.get_unchecked(1)), + transmute(*y_halves.get_unchecked(1)), + )); + + U { halves: x_halves }.vec + } + } + } + }; + // implementation calling fun four times on each of the vector quarters: + (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $vecq_id:ident, $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self, y: Self) -> Self { + unsafe { + use crate::mem::transmute; + union U { + vec: $vec_id, + quarters: [$vecq_id; 4], + } + + let mut x_quarters = U { vec: self }.quarters; + let y_quarters = U { vec: y }.quarters; + + *x_quarters.get_unchecked_mut(0) = transmute($fun( + transmute(*x_quarters.get_unchecked(0)), + transmute(*y_quarters.get_unchecked(0)), + )); + + *x_quarters.get_unchecked_mut(1) = transmute($fun( + transmute(*x_quarters.get_unchecked(1)), + transmute(*y_quarters.get_unchecked(1)), + )); + + *x_quarters.get_unchecked_mut(2) = transmute($fun( + transmute(*x_quarters.get_unchecked(2)), + transmute(*y_quarters.get_unchecked(2)), + )); + + *x_quarters.get_unchecked_mut(3) = transmute($fun( + transmute(*x_quarters.get_unchecked(3)), + transmute(*y_quarters.get_unchecked(3)), + )); + + U { quarters: x_quarters }.vec + } + } + } + }; + // implementation calling fun once on a vector twice as large: + (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident, + $vect_id:ident, $fun:ident) => { + impl $trait_id for $vec_id { + #[inline] + fn $trait_method(self, y: Self) -> Self { + unsafe { + use crate::mem::{transmute, uninitialized}; + + union U { + vec: [$vec_id; 2], + twice: $vect_id, + } + + let x_twice = U { vec: [self, uninitialized()] }.twice; + let y_twice = U { vec: [y, uninitialized()] }.twice; + let twice: $vect_id = transmute($fun( + transmute(x_twice), + transmute(y_twice), + )); + + *(U { twice }.vec.get_unchecked(0)) + } + } + } + }; +} + +macro_rules! gen_binary_impl_table { + ($trait_id:ident, $trait_method:ident) => { + macro_rules! impl_binary { + ($vid:ident: $fun:ident) => { + impl_binary_!(vec | $trait_id, $trait_method, $vid, $fun); + }; + ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => { + impl_binary_!( + scalar | $trait_id, + $trait_method, + $vid, + [$sid; $sc], + $fun + ); + }; + ($vid:ident[s]: $fun:ident) => { + impl_binary_!(scalar | $trait_id, $trait_method, $vid, $fun); + }; + ($vid:ident[h => $vid_h:ident]: $fun:ident) => { + impl_binary_!( + halves | $trait_id, + $trait_method, + $vid, + $vid_h, + $fun + ); + }; + ($vid:ident[q => $vid_q:ident]: $fun:ident) => { + impl_binary_!( + quarter | $trait_id, + $trait_method, + $vid, + $vid_q, + $fun + ); + }; + ($vid:ident[t => $vid_t:ident]: $fun:ident) => { + impl_binary_!( + twice | $trait_id, + $trait_method, + $vid, + $vid_t, + $fun + ); + }; + } + }; +} diff --git a/third_party/rust/packed_simd/src/codegen/math/float/mul_add.rs b/third_party/rust/packed_simd/src/codegen/math/float/mul_add.rs new file mode 100644 index 000000000000..f48a57dc46c6 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/math/float/mul_add.rs @@ -0,0 +1,109 @@ +//! Vertical floating-point `mul_add` +#![allow(unused)] +use crate::*; + +// FIXME: 64-bit 1 element mul_add + +crate trait MulAdd { + fn mul_add(self, y: Self, z: Self) -> Self; +} + +#[cfg(not(target_arch = "s390x"))] +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.fma.v2f32"] + fn fma_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2; + #[link_name = "llvm.fma.v4f32"] + fn fma_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4; + #[link_name = "llvm.fma.v8f32"] + fn fma_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8; + #[link_name = "llvm.fma.v16f32"] + fn fma_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16; + /* FIXME 64-bit single elem vectors + #[link_name = "llvm.fma.v1f64"] + fn fma_v1f64(x: f64x1, y: f64x1, z: f64x1) -> f64x1; + */ + #[link_name = "llvm.fma.v2f64"] + fn fma_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2; + #[link_name = "llvm.fma.v4f64"] + fn fma_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4; + #[link_name = "llvm.fma.v8f64"] + fn fma_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8; +} + +gen_tertiary_impl_table!(MulAdd, mul_add); + +cfg_if! { + if #[cfg(target_arch = "s390x")] { + // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 + macro_rules! impl_broken { + ($id:ident) => { + impl MulAdd for $id { + #[inline] + fn mul_add(self, y: Self, z: Self) -> Self { + self * y + z + } + } + }; + } + + impl_broken!(f32x2); + impl_broken!(f32x4); + impl_broken!(f32x8); + impl_broken!(f32x16); + + impl_broken!(f64x2); + impl_broken!(f64x4); + impl_broken!(f64x8); + } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { + use sleef_sys::*; + cfg_if! { + if #[cfg(target_feature = "avx2")] { + impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_avx2128); + impl_tertiary!(f32x16[h => f32x8]: Sleef_fmaf8_avx2); + impl_tertiary!(f64x8[h => f64x4]: Sleef_fmad4_avx2); + + impl_tertiary!(f32x4: Sleef_fmaf4_avx2128); + impl_tertiary!(f32x8: Sleef_fmaf8_avx2); + impl_tertiary!(f64x2: Sleef_fmad2_avx2128); + impl_tertiary!(f64x4: Sleef_fmad4_avx2); + } else if #[cfg(target_feature = "avx")] { + impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_sse4); + impl_tertiary!(f32x16[h => f32x8]: Sleef_fmaf8_avx); + impl_tertiary!(f64x8[h => f64x4]: Sleef_fmad4_avx); + + impl_tertiary!(f32x4: Sleef_fmaf4_sse4); + impl_tertiary!(f32x8: Sleef_fmaf8_avx); + impl_tertiary!(f64x2: Sleef_fmad2_sse4); + impl_tertiary!(f64x4: Sleef_fmad4_avx); + } else if #[cfg(target_feature = "sse4.2")] { + impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_sse4); + impl_tertiary!(f32x16[q => f32x4]: Sleef_fmaf4_sse4); + impl_tertiary!(f64x8[q => f64x2]: Sleef_fmad2_sse4); + + impl_tertiary!(f32x4: Sleef_fmaf4_sse4); + impl_tertiary!(f32x8[h => f32x4]: Sleef_fmaf4_sse4); + impl_tertiary!(f64x2: Sleef_fmad2_sse4); + impl_tertiary!(f64x4[h => f64x2]: Sleef_fmad2_sse4); + } else { + impl_tertiary!(f32x2: fma_v2f32); + impl_tertiary!(f32x16: fma_v16f32); + impl_tertiary!(f64x8: fma_v8f64); + + impl_tertiary!(f32x4: fma_v4f32); + impl_tertiary!(f32x8: fma_v8f32); + impl_tertiary!(f64x2: fma_v2f64); + impl_tertiary!(f64x4: fma_v4f64); + } + } + } else { + impl_tertiary!(f32x2: fma_v2f32); + impl_tertiary!(f32x4: fma_v4f32); + impl_tertiary!(f32x8: fma_v8f32); + impl_tertiary!(f32x16: fma_v16f32); + // impl_tertiary!(f64x1: fma_v1f64); // FIXME 64-bit fmagle elem vectors + impl_tertiary!(f64x2: fma_v2f64); + impl_tertiary!(f64x4: fma_v4f64); + impl_tertiary!(f64x8: fma_v8f64); + } +} diff --git a/third_party/rust/packed_simd/src/codegen/math/float/mul_adde.rs b/third_party/rust/packed_simd/src/codegen/math/float/mul_adde.rs new file mode 100644 index 000000000000..8c41fb131d94 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/math/float/mul_adde.rs @@ -0,0 +1,66 @@ +//! Approximation for floating-point `mul_add` +use crate::*; + +// FIXME: 64-bit 1 element mul_adde + +crate trait MulAddE { + fn mul_adde(self, y: Self, z: Self) -> Self; +} + +#[cfg(not(target_arch = "s390x"))] +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.fmuladd.v2f32"] + fn fmuladd_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2; + #[link_name = "llvm.fmuladd.v4f32"] + fn fmuladd_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4; + #[link_name = "llvm.fmuladd.v8f32"] + fn fmuladd_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8; + #[link_name = "llvm.fmuladd.v16f32"] + fn fmuladd_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16; + /* FIXME 64-bit single elem vectors + #[link_name = "llvm.fmuladd.v1f64"] + fn fmuladd_v1f64(x: f64x1, y: f64x1, z: f64x1) -> f64x1; + */ + #[link_name = "llvm.fmuladd.v2f64"] + fn fmuladd_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2; + #[link_name = "llvm.fmuladd.v4f64"] + fn fmuladd_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4; + #[link_name = "llvm.fmuladd.v8f64"] + fn fmuladd_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8; +} + +macro_rules! impl_mul_adde { + ($id:ident : $fn:ident) => { + impl MulAddE for $id { + #[inline] + fn mul_adde(self, y: Self, z: Self) -> Self { + #[cfg(not(target_arch = "s390x"))] + { + use crate::mem::transmute; + unsafe { + transmute($fn( + transmute(self), + transmute(y), + transmute(z), + )) + } + } + #[cfg(target_arch = "s390x")] + { + // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 + self * y + z + } + } + } + }; +} + +impl_mul_adde!(f32x2: fmuladd_v2f32); +impl_mul_adde!(f32x4: fmuladd_v4f32); +impl_mul_adde!(f32x8: fmuladd_v8f32); +impl_mul_adde!(f32x16: fmuladd_v16f32); +// impl_mul_adde!(f64x1: fma_v1f64); // FIXME 64-bit fmagle elem vectors +impl_mul_adde!(f64x2: fmuladd_v2f64); +impl_mul_adde!(f64x4: fmuladd_v4f64); +impl_mul_adde!(f64x8: fmuladd_v8f64); diff --git a/third_party/rust/packed_simd/src/codegen/math/float/powf.rs b/third_party/rust/packed_simd/src/codegen/math/float/powf.rs new file mode 100644 index 000000000000..bc15067d73a3 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/math/float/powf.rs @@ -0,0 +1,112 @@ +//! Vertical floating-point `powf` +#![allow(unused)] + +// FIXME 64-bit powfgle elem vectors mispowfg + +use crate::*; + +crate trait Powf { + fn powf(self, x: Self) -> Self; +} + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.pow.v2f32"] + fn powf_v2f32(x: f32x2, y: f32x2) -> f32x2; + #[link_name = "llvm.pow.v4f32"] + fn powf_v4f32(x: f32x4, y: f32x4) -> f32x4; + #[link_name = "llvm.pow.v8f32"] + fn powf_v8f32(x: f32x8, y: f32x8) -> f32x8; + #[link_name = "llvm.pow.v16f32"] + fn powf_v16f32(x: f32x16, y: f32x16) -> f32x16; + /* FIXME 64-bit powfgle elem vectors + #[link_name = "llvm.pow.v1f64"] + fn powf_v1f64(x: f64x1, y: f64x1) -> f64x1; + */ + #[link_name = "llvm.pow.v2f64"] + fn powf_v2f64(x: f64x2, y: f64x2) -> f64x2; + #[link_name = "llvm.pow.v4f64"] + fn powf_v4f64(x: f64x4, y: f64x4) -> f64x4; + #[link_name = "llvm.pow.v8f64"] + fn powf_v8f64(x: f64x8, y: f64x8) -> f64x8; + + #[link_name = "llvm.pow.f32"] + fn powf_f32(x: f32, y: f32) -> f32; + #[link_name = "llvm.pow.f64"] + fn powf_f64(x: f64, y: f64) -> f64; +} + +gen_binary_impl_table!(Powf, powf); + +cfg_if! { + if #[cfg(target_arch = "s390x")] { + // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 + impl_binary!(f32x2[f32; 2]: powf_f32); + impl_binary!(f32x4[f32; 4]: powf_f32); + impl_binary!(f32x8[f32; 8]: powf_f32); + impl_binary!(f32x16[f32; 16]: powf_f32); + + impl_binary!(f64x2[f64; 2]: powf_f64); + impl_binary!(f64x4[f64; 4]: powf_f64); + impl_binary!(f64x8[f64; 8]: powf_f64); + } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { + use sleef_sys::*; + cfg_if! { + if #[cfg(target_feature = "avx2")] { + impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10avx2128); + impl_binary!(f32x16[h => f32x8]: Sleef_powf8_u10avx2); + impl_binary!(f64x8[h => f64x4]: Sleef_powd4_u10avx2); + + impl_binary!(f32x4: Sleef_powf4_u10avx2128); + impl_binary!(f32x8: Sleef_powf8_u10avx2); + impl_binary!(f64x2: Sleef_powd2_u10avx2128); + impl_binary!(f64x4: Sleef_powd4_u10avx2); + } else if #[cfg(target_feature = "avx")] { + impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse4); + impl_binary!(f32x16[h => f32x8]: Sleef_powf8_u10avx); + impl_binary!(f64x8[h => f64x4]: Sleef_powd4_u10avx); + + impl_binary!(f32x4: Sleef_powf4_u10sse4); + impl_binary!(f32x8: Sleef_powf8_u10avx); + impl_binary!(f64x2: Sleef_powd2_u10sse4); + impl_binary!(f64x4: Sleef_powd4_u10avx); + } else if #[cfg(target_feature = "sse4.2")] { + impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse4); + impl_binary!(f32x16[q => f32x4]: Sleef_powf4_u10sse4); + impl_binary!(f64x8[q => f64x2]: Sleef_powd2_u10sse4); + + impl_binary!(f32x4: Sleef_powf4_u10sse4); + impl_binary!(f32x8[h => f32x4]: Sleef_powf4_u10sse4); + impl_binary!(f64x2: Sleef_powd2_u10sse4); + impl_binary!(f64x4[h => f64x2]: Sleef_powd2_u10sse4); + } else if #[cfg(target_feature = "sse2")] { + impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse2); + impl_binary!(f32x16[q => f32x4]: Sleef_powf4_u10sse2); + impl_binary!(f64x8[q => f64x2]: Sleef_powd2_u10sse2); + + impl_binary!(f32x4: Sleef_powf4_u10sse2); + impl_binary!(f32x8[h => f32x4]: Sleef_powf4_u10sse2); + impl_binary!(f64x2: Sleef_powd2_u10sse2); + impl_binary!(f64x4[h => f64x2]: Sleef_powd2_u10sse2); + } else { + impl_binary!(f32x2[f32; 2]: powf_f32); + impl_binary!(f32x4: powf_v4f32); + impl_binary!(f32x8: powf_v8f32); + impl_binary!(f32x16: powf_v16f32); + + impl_binary!(f64x2: powf_v2f64); + impl_binary!(f64x4: powf_v4f64); + impl_binary!(f64x8: powf_v8f64); + } + } + } else { + impl_binary!(f32x2[f32; 2]: powf_f32); + impl_binary!(f32x4: powf_v4f32); + impl_binary!(f32x8: powf_v8f32); + impl_binary!(f32x16: powf_v16f32); + + impl_binary!(f64x2: powf_v2f64); + impl_binary!(f64x4: powf_v4f64); + impl_binary!(f64x8: powf_v8f64); + } +} diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sin.rs b/third_party/rust/packed_simd/src/codegen/math/float/sin.rs new file mode 100644 index 000000000000..7b014d07da8d --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/math/float/sin.rs @@ -0,0 +1,103 @@ +//! Vertical floating-point `sin` +#![allow(unused)] + +// FIXME 64-bit 1 elem vectors sin + +use crate::*; + +crate trait Sin { + fn sin(self) -> Self; +} + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.sin.v2f32"] + fn sin_v2f32(x: f32x2) -> f32x2; + #[link_name = "llvm.sin.v4f32"] + fn sin_v4f32(x: f32x4) -> f32x4; + #[link_name = "llvm.sin.v8f32"] + fn sin_v8f32(x: f32x8) -> f32x8; + #[link_name = "llvm.sin.v16f32"] + fn sin_v16f32(x: f32x16) -> f32x16; + /* FIXME 64-bit single elem vectors + #[link_name = "llvm.sin.v1f64"] + fn sin_v1f64(x: f64x1) -> f64x1; + */ + #[link_name = "llvm.sin.v2f64"] + fn sin_v2f64(x: f64x2) -> f64x2; + #[link_name = "llvm.sin.v4f64"] + fn sin_v4f64(x: f64x4) -> f64x4; + #[link_name = "llvm.sin.v8f64"] + fn sin_v8f64(x: f64x8) -> f64x8; + + #[link_name = "llvm.sin.f32"] + fn sin_f32(x: f32) -> f32; + #[link_name = "llvm.sin.f64"] + fn sin_f64(x: f64) -> f64; +} + +gen_unary_impl_table!(Sin, sin); + +cfg_if! { + if #[cfg(target_arch = "s390x")] { + // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 + impl_unary!(f32x2[f32; 2]: sin_f32); + impl_unary!(f32x4[f32; 4]: sin_f32); + impl_unary!(f32x8[f32; 8]: sin_f32); + impl_unary!(f32x16[f32; 16]: sin_f32); + + impl_unary!(f64x2[f64; 2]: sin_f64); + impl_unary!(f64x4[f64; 4]: sin_f64); + impl_unary!(f64x8[f64; 8]: sin_f64); + } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { + use sleef_sys::*; + cfg_if! { + if #[cfg(target_feature = "avx2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10avx2128); + impl_unary!(f32x16[h => f32x8]: Sleef_sinf8_u10avx2); + impl_unary!(f64x8[h => f64x4]: Sleef_sind4_u10avx2); + + impl_unary!(f32x4: Sleef_sinf4_u10avx2128); + impl_unary!(f32x8: Sleef_sinf8_u10avx2); + impl_unary!(f64x2: Sleef_sind2_u10avx2128); + impl_unary!(f64x4: Sleef_sind4_u10avx2); + } else if #[cfg(target_feature = "avx")] { + impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10sse4); + impl_unary!(f32x16[h => f32x8]: Sleef_sinf8_u10avx); + impl_unary!(f64x8[h => f64x4]: Sleef_sind4_u10avx); + + impl_unary!(f32x4: Sleef_sinf4_u10sse4); + impl_unary!(f32x8: Sleef_sinf8_u10avx); + impl_unary!(f64x2: Sleef_sind2_u10sse4); + impl_unary!(f64x4: Sleef_sind4_u10avx); + } else if #[cfg(target_feature = "sse4.2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10sse4); + impl_unary!(f32x16[q => f32x4]: Sleef_sinf4_u10sse4); + impl_unary!(f64x8[q => f64x2]: Sleef_sind2_u10sse4); + + impl_unary!(f32x4: Sleef_sinf4_u10sse4); + impl_unary!(f32x8[h => f32x4]: Sleef_sinf4_u10sse4); + impl_unary!(f64x2: Sleef_sind2_u10sse4); + impl_unary!(f64x4[h => f64x2]: Sleef_sind2_u10sse4); + } else { + impl_unary!(f32x2[f32; 2]: sin_f32); + impl_unary!(f32x16: sin_v16f32); + impl_unary!(f64x8: sin_v8f64); + + impl_unary!(f32x4: sin_v4f32); + impl_unary!(f32x8: sin_v8f32); + impl_unary!(f64x2: sin_v2f64); + impl_unary!(f64x4: sin_v4f64); + } + } + } else { + impl_unary!(f32x2[f32; 2]: sin_f32); + impl_unary!(f32x4: sin_v4f32); + impl_unary!(f32x8: sin_v8f32); + impl_unary!(f32x16: sin_v16f32); + + impl_unary!(f64x2: sin_v2f64); + impl_unary!(f64x4: sin_v4f64); + impl_unary!(f64x8: sin_v8f64); + } +} diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sin_cos_pi.rs b/third_party/rust/packed_simd/src/codegen/math/float/sin_cos_pi.rs new file mode 100644 index 000000000000..0f1249ec88f0 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/math/float/sin_cos_pi.rs @@ -0,0 +1,195 @@ +//! Vertical floating-point `sin_cos` +#![allow(unused)] + +// FIXME 64-bit 1 elem vectors sin_cos + +use crate::*; + +crate trait SinCosPi: Sized { + type Output; + fn sin_cos_pi(self) -> Self::Output; +} + +macro_rules! impl_def { + ($vid:ident, $PI:path) => { + impl SinCosPi for $vid { + type Output = (Self, Self); + #[inline] + fn sin_cos_pi(self) -> Self::Output { + let v = self * Self::splat($PI); + (v.sin(), v.cos()) + } + } + }; +} + +macro_rules! impl_def32 { + ($vid:ident) => { + impl_def!($vid, crate::f32::consts::PI); + }; +} +macro_rules! impl_def64 { + ($vid:ident) => { + impl_def!($vid, crate::f64::consts::PI); + }; +} + +macro_rules! impl_unary_t { + ($vid:ident: $fun:ident) => { + impl SinCosPi for $vid { + type Output = (Self, Self); + fn sin_cos_pi(self) -> Self::Output { + unsafe { + use crate::mem::transmute; + transmute($fun(transmute(self))) + } + } + } + }; + ($vid:ident[t => $vid_t:ident]: $fun:ident) => { + impl SinCosPi for $vid { + type Output = (Self, Self); + fn sin_cos_pi(self) -> Self::Output { + unsafe { + use crate::mem::{transmute, uninitialized}; + + union U { + vec: [$vid; 2], + twice: $vid_t, + } + + let twice = U { vec: [self, uninitialized()] }.twice; + let twice = transmute($fun(transmute(twice))); + + union R { + twice: ($vid_t, $vid_t), + vecs: ([$vid; 2], [$vid; 2]), + } + let r = R { twice }.vecs; + (*r.0.get_unchecked(0), *r.0.get_unchecked(1)) + } + } + } + }; + ($vid:ident[h => $vid_h:ident]: $fun:ident) => { + impl SinCosPi for $vid { + type Output = (Self, Self); + fn sin_cos_pi(self) -> Self::Output { + unsafe { + use crate::mem::transmute; + + union U { + vec: $vid, + halves: [$vid_h; 2], + } + + let halves = U { vec: self }.halves; + + let res_0: ($vid_h, $vid_h) = + transmute($fun(transmute(*halves.get_unchecked(0)))); + let res_1: ($vid_h, $vid_h) = + transmute($fun(transmute(*halves.get_unchecked(1)))); + + union R { + result: ($vid, $vid), + halves: ([$vid_h; 2], [$vid_h; 2]), + } + R { halves: ([res_0.0, res_1.0], [res_0.1, res_1.1]) } + .result + } + } + } + }; + ($vid:ident[q => $vid_q:ident]: $fun:ident) => { + impl SinCosPi for $vid { + type Output = (Self, Self); + fn sin_cos_pi(self) -> Self::Output { + unsafe { + use crate::mem::transmute; + + union U { + vec: $vid, + quarters: [$vid_q; 4], + } + + let quarters = U { vec: self }.quarters; + + let res_0: ($vid_q, $vid_q) = + transmute($fun(transmute(*quarters.get_unchecked(0)))); + let res_1: ($vid_q, $vid_q) = + transmute($fun(transmute(*quarters.get_unchecked(1)))); + let res_2: ($vid_q, $vid_q) = + transmute($fun(transmute(*quarters.get_unchecked(2)))); + let res_3: ($vid_q, $vid_q) = + transmute($fun(transmute(*quarters.get_unchecked(3)))); + + union R { + result: ($vid, $vid), + quarters: ([$vid_q; 4], [$vid_q; 4]), + } + R { + quarters: ( + [res_0.0, res_1.0, res_2.0, res_3.0], + [res_0.1, res_1.1, res_2.1, res_3.1], + ), + } + .result + } + } + } + }; +} + +cfg_if! { + if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { + use sleef_sys::*; + cfg_if! { + if #[cfg(target_feature = "avx2")] { + impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05avx2128); + impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx2); + impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx2); + + impl_unary_t!(f32x4: Sleef_sincospif4_u05avx2128); + impl_unary_t!(f32x8: Sleef_sincospif8_u05avx2); + impl_unary_t!(f64x2: Sleef_sincospid2_u05avx2128); + impl_unary_t!(f64x4: Sleef_sincospid4_u05avx2); + } else if #[cfg(target_feature = "avx")] { + impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4); + impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx); + impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx); + + impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4); + impl_unary_t!(f32x8: Sleef_sincospif8_u05avx); + impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4); + impl_unary_t!(f64x4: Sleef_sincospid4_u05avx); + } else if #[cfg(target_feature = "sse4.2")] { + impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4); + impl_unary_t!(f32x16[q => f32x4]: Sleef_sincospif4_u05sse4); + impl_unary_t!(f64x8[q => f64x2]: Sleef_sincospid2_u05sse4); + + impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4); + impl_unary_t!(f32x8[h => f32x4]: Sleef_sincospif4_u05sse4); + impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4); + impl_unary_t!(f64x4[h => f64x2]: Sleef_sincospid2_u05sse4); + } else { + impl_def32!(f32x2); + impl_def32!(f32x4); + impl_def32!(f32x8); + impl_def32!(f32x16); + + impl_def64!(f64x2); + impl_def64!(f64x4); + impl_def64!(f64x8); + } + } + } else { + impl_def32!(f32x2); + impl_def32!(f32x4); + impl_def32!(f32x8); + impl_def32!(f32x16); + + impl_def64!(f64x2); + impl_def64!(f64x4); + impl_def64!(f64x8); + } +} diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sin_pi.rs b/third_party/rust/packed_simd/src/codegen/math/float/sin_pi.rs new file mode 100644 index 000000000000..72df98c93c91 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/math/float/sin_pi.rs @@ -0,0 +1,87 @@ +//! Vertical floating-point `sin_pi` +#![allow(unused)] + +// FIXME 64-bit 1 elem vectors sin_pi + +use crate::*; + +crate trait SinPi { + fn sin_pi(self) -> Self; +} + +gen_unary_impl_table!(SinPi, sin_pi); + +macro_rules! impl_def { + ($vid:ident, $PI:path) => { + impl SinPi for $vid { + #[inline] + fn sin_pi(self) -> Self { + (self * Self::splat($PI)).sin() + } + } + }; +} +macro_rules! impl_def32 { + ($vid:ident) => { + impl_def!($vid, crate::f32::consts::PI); + }; +} +macro_rules! impl_def64 { + ($vid:ident) => { + impl_def!($vid, crate::f64::consts::PI); + }; +} + +cfg_if! { + if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { + use sleef_sys::*; + cfg_if! { + if #[cfg(target_feature = "avx2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05avx2128); + impl_unary!(f32x16[h => f32x8]: Sleef_sinpif8_u05avx2); + impl_unary!(f64x8[h => f64x4]: Sleef_sinpid4_u05avx2); + + impl_unary!(f32x4: Sleef_sinpif4_u05avx2128); + impl_unary!(f32x8: Sleef_sinpif8_u05avx2); + impl_unary!(f64x2: Sleef_sinpid2_u05avx2128); + impl_unary!(f64x4: Sleef_sinpid4_u05avx2); + } else if #[cfg(target_feature = "avx")] { + impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05sse4); + impl_unary!(f32x16[h => f32x8]: Sleef_sinpif8_u05avx); + impl_unary!(f64x8[h => f64x4]: Sleef_sinpid4_u05avx); + + impl_unary!(f32x4: Sleef_sinpif4_u05sse4); + impl_unary!(f32x8: Sleef_sinpif8_u05avx); + impl_unary!(f64x2: Sleef_sinpid2_u05sse4); + impl_unary!(f64x4: Sleef_sinpid4_u05avx); + } else if #[cfg(target_feature = "sse4.2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05sse4); + impl_unary!(f32x16[q => f32x4]: Sleef_sinpif4_u05sse4); + impl_unary!(f64x8[q => f64x2]: Sleef_sinpid2_u05sse4); + + impl_unary!(f32x4: Sleef_sinpif4_u05sse4); + impl_unary!(f32x8[h => f32x4]: Sleef_sinpif4_u05sse4); + impl_unary!(f64x2: Sleef_sinpid2_u05sse4); + impl_unary!(f64x4[h => f64x2]: Sleef_sinpid2_u05sse4); + } else { + impl_def32!(f32x2); + impl_def32!(f32x4); + impl_def32!(f32x8); + impl_def32!(f32x16); + + impl_def64!(f64x2); + impl_def64!(f64x4); + impl_def64!(f64x8); + } + } + } else { + impl_def32!(f32x2); + impl_def32!(f32x4); + impl_def32!(f32x8); + impl_def32!(f32x16); + + impl_def64!(f64x2); + impl_def64!(f64x4); + impl_def64!(f64x8); + } +} diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sqrt.rs b/third_party/rust/packed_simd/src/codegen/math/float/sqrt.rs new file mode 100644 index 000000000000..7ce31df62662 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/math/float/sqrt.rs @@ -0,0 +1,103 @@ +//! Vertical floating-point `sqrt` +#![allow(unused)] + +// FIXME 64-bit 1 elem vectors sqrt + +use crate::*; + +crate trait Sqrt { + fn sqrt(self) -> Self; +} + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.sqrt.v2f32"] + fn sqrt_v2f32(x: f32x2) -> f32x2; + #[link_name = "llvm.sqrt.v4f32"] + fn sqrt_v4f32(x: f32x4) -> f32x4; + #[link_name = "llvm.sqrt.v8f32"] + fn sqrt_v8f32(x: f32x8) -> f32x8; + #[link_name = "llvm.sqrt.v16f32"] + fn sqrt_v16f32(x: f32x16) -> f32x16; + /* FIXME 64-bit sqrtgle elem vectors + #[link_name = "llvm.sqrt.v1f64"] + fn sqrt_v1f64(x: f64x1) -> f64x1; + */ + #[link_name = "llvm.sqrt.v2f64"] + fn sqrt_v2f64(x: f64x2) -> f64x2; + #[link_name = "llvm.sqrt.v4f64"] + fn sqrt_v4f64(x: f64x4) -> f64x4; + #[link_name = "llvm.sqrt.v8f64"] + fn sqrt_v8f64(x: f64x8) -> f64x8; + + #[link_name = "llvm.sqrt.f32"] + fn sqrt_f32(x: f32) -> f32; + #[link_name = "llvm.sqrt.f64"] + fn sqrt_f64(x: f64) -> f64; +} + +gen_unary_impl_table!(Sqrt, sqrt); + +cfg_if! { + if #[cfg(target_arch = "s390x")] { + // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 + impl_unary!(f32x2[f32; 2]: sqrt_f32); + impl_unary!(f32x4[f32; 4]: sqrt_f32); + impl_unary!(f32x8[f32; 8]: sqrt_f32); + impl_unary!(f32x16[f32; 16]: sqrt_f32); + + impl_unary!(f64x2[f64; 2]: sqrt_f64); + impl_unary!(f64x4[f64; 4]: sqrt_f64); + impl_unary!(f64x8[f64; 8]: sqrt_f64); + } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { + use sleef_sys::*; + cfg_if! { + if #[cfg(target_feature = "avx2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_avx2128); + impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_avx2); + impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_avx2); + + impl_unary!(f32x4: Sleef_sqrtf4_avx2128); + impl_unary!(f32x8: Sleef_sqrtf8_avx2); + impl_unary!(f64x2: Sleef_sqrtd2_avx2128); + impl_unary!(f64x4: Sleef_sqrtd4_avx2); + } else if #[cfg(target_feature = "avx")] { + impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_sse4); + impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_avx); + impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_avx); + + impl_unary!(f32x4: Sleef_sqrtf4_sse4); + impl_unary!(f32x8: Sleef_sqrtf8_avx); + impl_unary!(f64x2: Sleef_sqrtd2_sse4); + impl_unary!(f64x4: Sleef_sqrtd4_avx); + } else if #[cfg(target_feature = "sse4.2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_sse4); + impl_unary!(f32x16[q => f32x4]: Sleef_sqrtf4_sse4); + impl_unary!(f64x8[q => f64x2]: Sleef_sqrtd2_sse4); + + impl_unary!(f32x4: Sleef_sqrtf4_sse4); + impl_unary!(f32x8[h => f32x4]: Sleef_sqrtf4_sse4); + impl_unary!(f64x2: Sleef_sqrtd2_sse4); + impl_unary!(f64x4[h => f64x2]: Sleef_sqrtd2_sse4); + } else { + impl_unary!(f32x2[f32; 2]: sqrt_f32); + impl_unary!(f32x16: sqrt_v16f32); + impl_unary!(f64x8: sqrt_v8f64); + + impl_unary!(f32x4: sqrt_v4f32); + impl_unary!(f32x8: sqrt_v8f32); + impl_unary!(f64x2: sqrt_v2f64); + impl_unary!(f64x4: sqrt_v4f64); + } + } + } else { + impl_unary!(f32x2[f32; 2]: sqrt_f32); + impl_unary!(f32x4: sqrt_v4f32); + impl_unary!(f32x8: sqrt_v8f32); + impl_unary!(f32x16: sqrt_v16f32); + + impl_unary!(f64x2: sqrt_v2f64); + impl_unary!(f64x4: sqrt_v4f64); + impl_unary!(f64x8: sqrt_v8f64); + } +} diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sqrte.rs b/third_party/rust/packed_simd/src/codegen/math/float/sqrte.rs new file mode 100644 index 000000000000..c1e379c34241 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/math/float/sqrte.rs @@ -0,0 +1,67 @@ +//! Vertical floating-point `sqrt` +#![allow(unused)] + +// FIXME 64-bit 1 elem vectors sqrte + +use crate::llvm::simd_fsqrt; +use crate::*; + +crate trait Sqrte { + fn sqrte(self) -> Self; +} + +gen_unary_impl_table!(Sqrte, sqrte); + +cfg_if! { + if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { + use sleef_sys::*; + cfg_if! { + if #[cfg(target_feature = "avx2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35avx2128); + impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_u35avx2); + impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_u35avx2); + + impl_unary!(f32x4: Sleef_sqrtf4_u35avx2128); + impl_unary!(f32x8: Sleef_sqrtf8_u35avx2); + impl_unary!(f64x2: Sleef_sqrtd2_u35avx2128); + impl_unary!(f64x4: Sleef_sqrtd4_u35avx2); + } else if #[cfg(target_feature = "avx")] { + impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35sse4); + impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_u35avx); + impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_u35avx); + + impl_unary!(f32x4: Sleef_sqrtf4_u35sse4); + impl_unary!(f32x8: Sleef_sqrtf8_u35avx); + impl_unary!(f64x2: Sleef_sqrtd2_u35sse4); + impl_unary!(f64x4: Sleef_sqrtd4_u35avx); + } else if #[cfg(target_feature = "sse4.2")] { + impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35sse4); + impl_unary!(f32x16[q => f32x4]: Sleef_sqrtf4_u35sse4); + impl_unary!(f64x8[q => f64x2]: Sleef_sqrtd2_u35sse4); + + impl_unary!(f32x4: Sleef_sqrtf4_u35sse4); + impl_unary!(f32x8[h => f32x4]: Sleef_sqrtf4_u35sse4); + impl_unary!(f64x2: Sleef_sqrtd2_u35sse4); + impl_unary!(f64x4[h => f64x2]: Sleef_sqrtd2_u35sse4); + } else { + impl_unary!(f32x2[g]: simd_fsqrt); + impl_unary!(f32x16[g]: simd_fsqrt); + impl_unary!(f64x8[g]: simd_fsqrt); + + impl_unary!(f32x4[g]: simd_fsqrt); + impl_unary!(f32x8[g]: simd_fsqrt); + impl_unary!(f64x2[g]: simd_fsqrt); + impl_unary!(f64x4[g]: simd_fsqrt); + } + } + } else { + impl_unary!(f32x2[g]: simd_fsqrt); + impl_unary!(f32x4[g]: simd_fsqrt); + impl_unary!(f32x8[g]: simd_fsqrt); + impl_unary!(f32x16[g]: simd_fsqrt); + + impl_unary!(f64x2[g]: simd_fsqrt); + impl_unary!(f64x4[g]: simd_fsqrt); + impl_unary!(f64x8[g]: simd_fsqrt); + } +} diff --git a/third_party/rust/packed_simd/src/codegen/pointer_sized_int.rs b/third_party/rust/packed_simd/src/codegen/pointer_sized_int.rs new file mode 100644 index 000000000000..39f493d3b17f --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/pointer_sized_int.rs @@ -0,0 +1,28 @@ +//! Provides `isize` and `usize` + +use cfg_if::cfg_if; + +cfg_if! { + if #[cfg(target_pointer_width = "8")] { + crate type isize_ = i8; + crate type usize_ = u8; + } else if #[cfg(target_pointer_width = "16")] { + crate type isize_ = i16; + crate type usize_ = u16; + } else if #[cfg(target_pointer_width = "32")] { + crate type isize_ = i32; + crate type usize_ = u32; + + } else if #[cfg(target_pointer_width = "64")] { + crate type isize_ = i64; + crate type usize_ = u64; + } else if #[cfg(target_pointer_width = "64")] { + crate type isize_ = i64; + crate type usize_ = u64; + } else if #[cfg(target_pointer_width = "128")] { + crate type isize_ = i128; + crate type usize_ = u128; + } else { + compile_error!("unsupported target_pointer_width"); + } +} diff --git a/third_party/rust/packed_simd/src/codegen/reductions.rs b/third_party/rust/packed_simd/src/codegen/reductions.rs new file mode 100644 index 000000000000..7be4f5fabbea --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/reductions.rs @@ -0,0 +1 @@ +crate mod mask; diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask.rs new file mode 100644 index 000000000000..97260c6d4e03 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/reductions/mask.rs @@ -0,0 +1,69 @@ +//! Code generation workaround for `all()` mask horizontal reduction. +//! +//! Works arround [LLVM bug 36702]. +//! +//! [LLVM bug 36702]: https://bugs.llvm.org/show_bug.cgi?id=36702 +#![allow(unused_macros)] + +use crate::*; + +crate trait All: crate::marker::Sized { + unsafe fn all(self) -> bool; +} + +crate trait Any: crate::marker::Sized { + unsafe fn any(self) -> bool; +} + +#[macro_use] +mod fallback_impl; + +cfg_if! { + if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { + #[macro_use] + mod x86; + } else if #[cfg(all(target_arch = "arm", target_feature = "v7", + target_feature = "neon", + any(feature = "core_arch", libcore_neon)))] { + #[macro_use] + mod arm; + } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] { + #[macro_use] + mod aarch64; + } else { + #[macro_use] + mod fallback; + } +} + +impl_mask_reductions!(m8x2); +impl_mask_reductions!(m8x4); +impl_mask_reductions!(m8x8); +impl_mask_reductions!(m8x16); +impl_mask_reductions!(m8x32); +impl_mask_reductions!(m8x64); + +impl_mask_reductions!(m16x2); +impl_mask_reductions!(m16x4); +impl_mask_reductions!(m16x8); +impl_mask_reductions!(m16x16); +impl_mask_reductions!(m16x32); + +impl_mask_reductions!(m32x2); +impl_mask_reductions!(m32x4); +impl_mask_reductions!(m32x8); +impl_mask_reductions!(m32x16); + +// FIXME: 64-bit single element vector +// impl_mask_reductions!(m64x1); +impl_mask_reductions!(m64x2); +impl_mask_reductions!(m64x4); +impl_mask_reductions!(m64x8); + +impl_mask_reductions!(m128x1); +impl_mask_reductions!(m128x2); +impl_mask_reductions!(m128x4); + +impl_mask_reductions!(msizex2); +impl_mask_reductions!(msizex4); +impl_mask_reductions!(msizex8); diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs new file mode 100644 index 000000000000..e9586eace1ff --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs @@ -0,0 +1,71 @@ +//! Mask reductions implementation for `aarch64` targets + +/// 128-bit wide vectors +macro_rules! aarch64_128_neon_impl { + ($id:ident, $vmin:ident, $vmax:ident) => { + impl All for $id { + #[inline] + #[target_feature(enable = "neon")] + unsafe fn all(self) -> bool { + use crate::arch::aarch64::$vmin; + $vmin(crate::mem::transmute(self)) != 0 + } + } + impl Any for $id { + #[inline] + #[target_feature(enable = "neon")] + unsafe fn any(self) -> bool { + use crate::arch::aarch64::$vmax; + $vmax(crate::mem::transmute(self)) != 0 + } + } + } +} + +/// 64-bit wide vectors +macro_rules! aarch64_64_neon_impl { + ($id:ident, $vec128:ident) => { + impl All for $id { + #[inline] + #[target_feature(enable = "neon")] + unsafe fn all(self) -> bool { + // Duplicates the 64-bit vector into a 128-bit one and + // calls all on that. + union U { + halves: ($id, $id), + vec: $vec128, + } + U { + halves: (self, self), + }.vec.all() + } + } + impl Any for $id { + #[inline] + #[target_feature(enable = "neon")] + unsafe fn any(self) -> bool { + union U { + halves: ($id, $id), + vec: $vec128, + } + U { + halves: (self, self), + }.vec.any() + } + } + }; +} + +/// Mask reduction implementation for `aarch64` targets +macro_rules! impl_mask_reductions { + // 64-bit wide masks + (m8x8) => { aarch64_64_neon_impl!(m8x8, m8x16); }; + (m16x4) => { aarch64_64_neon_impl!(m16x4, m16x8); }; + (m32x2) => { aarch64_64_neon_impl!(m32x2, m32x4); }; + // 128-bit wide masks + (m8x16) => { aarch64_128_neon_impl!(m8x16, vminvq_u8, vmaxvq_u8); }; + (m16x8) => { aarch64_128_neon_impl!(m16x8, vminvq_u16, vmaxvq_u16); }; + (m32x4) => { aarch64_128_neon_impl!(m32x4, vminvq_u32, vmaxvq_u32); }; + // Fallback to LLVM's default code-generation: + ($id:ident) => { fallback_impl!($id); }; +} diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs new file mode 100644 index 000000000000..1987af7a9676 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs @@ -0,0 +1,54 @@ +//! Mask reductions implementation for `arm` targets + +/// Implementation for ARM + v7 + NEON for 64-bit or 128-bit wide vectors with +/// more than two elements. +macro_rules! arm_128_v7_neon_impl { + ($id:ident, $half:ident, $vpmin:ident, $vpmax:ident) => { + impl All for $id { + #[inline] + #[target_feature(enable = "v7,neon")] + unsafe fn all(self) -> bool { + use crate::arch::arm::$vpmin; + use crate::mem::transmute; + union U { + halves: ($half, $half), + vec: $id, + } + let halves = U { vec: self }.halves; + let h: $half = transmute($vpmin( + transmute(halves.0), + transmute(halves.1), + )); + h.all() + } + } + impl Any for $id { + #[inline] + #[target_feature(enable = "v7,neon")] + unsafe fn any(self) -> bool { + use crate::arch::arm::$vpmax; + use crate::mem::transmute; + union U { + halves: ($half, $half), + vec: $id, + } + let halves = U { vec: self }.halves; + let h: $half = transmute($vpmax( + transmute(halves.0), + transmute(halves.1), + )); + h.any() + } + } + }; +} + +/// Mask reduction implementation for `arm` targets +macro_rules! impl_mask_reductions { + // 128-bit wide masks + (m8x16) => { arm_128_v7_neon_impl!(m8x16, m8x8, vpmin_u8, vpmax_u8); }; + (m16x8) => { arm_128_v7_neon_impl!(m16x8, m16x4, vpmin_u16, vpmax_u16); }; + (m32x4) => { arm_128_v7_neon_impl!(m32x4, m32x2, vpmin_u32, vpmax_u32); }; + // Fallback to LLVM's default code-generation: + ($id:ident) => { fallback_impl!($id); }; +} diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs new file mode 100644 index 000000000000..25e5c813abca --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs @@ -0,0 +1,6 @@ +//! Default mask reduction implementations. + +/// Default mask reduction implementation +macro_rules! impl_mask_reductions { + ($id:ident) => { fallback_impl!($id); }; +} diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs new file mode 100644 index 000000000000..0d246e2fdab6 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs @@ -0,0 +1,237 @@ +//! Default implementation of a mask reduction for any target. + +macro_rules! fallback_to_other_impl { + ($id:ident, $other:ident) => { + impl All for $id { + #[inline] + unsafe fn all(self) -> bool { + let m: $other = crate::mem::transmute(self); + m.all() + } + } + impl Any for $id { + #[inline] + unsafe fn any(self) -> bool { + let m: $other = crate::mem::transmute(self); + m.any() + } + } + }; +} + +/// Fallback implementation. +macro_rules! fallback_impl { + // 16-bit wide masks: + (m8x2) => { + impl All for m8x2 { + #[inline] + unsafe fn all(self) -> bool { + let i: u16 = crate::mem::transmute(self); + i == u16::max_value() + } + } + impl Any for m8x2 { + #[inline] + unsafe fn any(self) -> bool { + let i: u16 = crate::mem::transmute(self); + i != 0 + } + } + }; + // 32-bit wide masks + (m8x4) => { + impl All for m8x4 { + #[inline] + unsafe fn all(self) -> bool { + let i: u32 = crate::mem::transmute(self); + i == u32::max_value() + } + } + impl Any for m8x4 { + #[inline] + unsafe fn any(self) -> bool { + let i: u32 = crate::mem::transmute(self); + i != 0 + } + } + }; + (m16x2) => { + fallback_to_other_impl!(m16x2, m8x4); + }; + // 64-bit wide masks: + (m8x8) => { + impl All for m8x8 { + #[inline] + unsafe fn all(self) -> bool { + let i: u64 = crate::mem::transmute(self); + i == u64::max_value() + } + } + impl Any for m8x8 { + #[inline] + unsafe fn any(self) -> bool { + let i: u64 = crate::mem::transmute(self); + i != 0 + } + } + }; + (m16x4) => { + fallback_to_other_impl!(m16x4, m8x8); + }; + (m32x2) => { + fallback_to_other_impl!(m32x2, m16x4); + }; + // FIXME: 64x1 maxk + // 128-bit wide masks: + (m8x16) => { + impl All for m8x16 { + #[inline] + unsafe fn all(self) -> bool { + let i: u128 = crate::mem::transmute(self); + i == u128::max_value() + } + } + impl Any for m8x16 { + #[inline] + unsafe fn any(self) -> bool { + let i: u128 = crate::mem::transmute(self); + i != 0 + } + } + }; + (m16x8) => { + fallback_to_other_impl!(m16x8, m8x16); + }; + (m32x4) => { + fallback_to_other_impl!(m32x4, m16x8); + }; + (m64x2) => { + fallback_to_other_impl!(m64x2, m32x4); + }; + (m128x1) => { + fallback_to_other_impl!(m128x1, m64x2); + }; + // 256-bit wide masks + (m8x32) => { + impl All for m8x32 { + #[inline] + unsafe fn all(self) -> bool { + let i: [u128; 2] = crate::mem::transmute(self); + let o: [u128; 2] = [u128::max_value(); 2]; + i == o + } + } + impl Any for m8x32 { + #[inline] + unsafe fn any(self) -> bool { + let i: [u128; 2] = crate::mem::transmute(self); + let o: [u128; 2] = [0; 2]; + i != o + } + } + }; + (m16x16) => { + fallback_to_other_impl!(m16x16, m8x32); + }; + (m32x8) => { + fallback_to_other_impl!(m32x8, m16x16); + }; + (m64x4) => { + fallback_to_other_impl!(m64x4, m32x8); + }; + (m128x2) => { + fallback_to_other_impl!(m128x2, m64x4); + }; + // 512-bit wide masks + (m8x64) => { + impl All for m8x64 { + #[inline] + unsafe fn all(self) -> bool { + let i: [u128; 4] = crate::mem::transmute(self); + let o: [u128; 4] = [u128::max_value(); 4]; + i == o + } + } + impl Any for m8x64 { + #[inline] + unsafe fn any(self) -> bool { + let i: [u128; 4] = crate::mem::transmute(self); + let o: [u128; 4] = [0; 4]; + i != o + } + } + }; + (m16x32) => { + fallback_to_other_impl!(m16x32, m8x64); + }; + (m32x16) => { + fallback_to_other_impl!(m32x16, m16x32); + }; + (m64x8) => { + fallback_to_other_impl!(m64x8, m32x16); + }; + (m128x4) => { + fallback_to_other_impl!(m128x4, m64x8); + }; + // Masks with pointer-sized elements64 + (msizex2) => { + cfg_if! { + if #[cfg(target_pointer_width = "64")] { + fallback_to_other_impl!(msizex2, m64x2); + } else if #[cfg(target_pointer_width = "32")] { + fallback_to_other_impl!(msizex2, m32x2); + } else { + compile_error!("unsupported target_pointer_width"); + } + } + }; + (msizex4) => { + cfg_if! { + if #[cfg(target_pointer_width = "64")] { + fallback_to_other_impl!(msizex4, m64x4); + } else if #[cfg(target_pointer_width = "32")] { + fallback_to_other_impl!(msizex4, m32x4); + } else { + compile_error!("unsupported target_pointer_width"); + } + } + }; + (msizex8) => { + cfg_if! { + if #[cfg(target_pointer_width = "64")] { + fallback_to_other_impl!(msizex8, m64x8); + } else if #[cfg(target_pointer_width = "32")] { + fallback_to_other_impl!(msizex8, m32x8); + } else { + compile_error!("unsupported target_pointer_width"); + } + } + }; +} + +macro_rules! recurse_half { + ($vid:ident, $vid_h:ident) => { + impl All for $vid { + #[inline] + unsafe fn all(self) -> bool { + union U { + halves: ($vid_h, $vid_h), + vec: $vid, + } + let halves = U { vec: self }.halves; + halves.0.all() && halves.1.all() + } + } + impl Any for $vid { + #[inline] + unsafe fn any(self) -> bool { + union U { + halves: ($vid_h, $vid_h), + vec: $vid, + } + let halves = U { vec: self }.halves; + halves.0.any() || halves.1.any() + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs new file mode 100644 index 000000000000..2ae4ed81c416 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs @@ -0,0 +1,194 @@ +//! Mask reductions implementation for `x86` and `x86_64` targets + +#[cfg(target_feature = "sse")] +#[macro_use] +mod sse; + +#[cfg(target_feature = "sse2")] +#[macro_use] +mod sse2; + +#[cfg(target_feature = "avx")] +#[macro_use] +mod avx; + +#[cfg(target_feature = "avx2")] +#[macro_use] +mod avx2; + +/// x86 64-bit m8x8 implementation +macro_rules! x86_m8x8_impl { + ($id:ident) => { + cfg_if! { + if #[cfg(all(target_arch = "x86_64", target_feature = "sse"))] { + x86_m8x8_sse_impl!($id); + } else { + fallback_impl!($id); + } + } + }; +} + +/// x86 128-bit m8x16 implementation +macro_rules! x86_m8x16_impl { + ($id:ident) => { + cfg_if! { + if #[cfg(target_feature = "sse2")] { + x86_m8x16_sse2_impl!($id); + } else { + fallback_impl!($id); + } + } + }; +} + +/// x86 128-bit m32x4 implementation +macro_rules! x86_m32x4_impl { + ($id:ident) => { + cfg_if! { + if #[cfg(target_feature = "sse")] { + x86_m32x4_sse_impl!($id); + } else { + fallback_impl!($id); + } + } + }; +} + +/// x86 128-bit m64x2 implementation +macro_rules! x86_m64x2_impl { + ($id:ident) => { + cfg_if! { + if #[cfg(target_feature = "sse2")] { + x86_m64x2_sse2_impl!($id); + } else if #[cfg(target_feature = "sse")] { + x86_m32x4_sse_impl!($id); + } else { + fallback_impl!($id); + } + } + }; +} + +/// x86 256-bit m8x32 implementation +macro_rules! x86_m8x32_impl { + ($id:ident, $half_id:ident) => { + cfg_if! { + if #[cfg(target_feature = "avx2")] { + x86_m8x32_avx2_impl!($id); + } else if #[cfg(target_feature = "avx")] { + x86_m8x32_avx_impl!($id); + } else if #[cfg(target_feature = "sse2")] { + recurse_half!($id, $half_id); + } else { + fallback_impl!($id); + } + } + }; +} + +/// x86 256-bit m32x8 implementation +macro_rules! x86_m32x8_impl { + ($id:ident, $half_id:ident) => { + cfg_if! { + if #[cfg(target_feature = "avx")] { + x86_m32x8_avx_impl!($id); + } else if #[cfg(target_feature = "sse")] { + recurse_half!($id, $half_id); + } else { + fallback_impl!($id); + } + } + }; +} + +/// x86 256-bit m64x4 implementation +macro_rules! x86_m64x4_impl { + ($id:ident, $half_id:ident) => { + cfg_if! { + if #[cfg(target_feature = "avx")] { + x86_m64x4_avx_impl!($id); + } else if #[cfg(target_feature = "sse")] { + recurse_half!($id, $half_id); + } else { + fallback_impl!($id); + } + } + }; +} + +/// Fallback implementation. +macro_rules! x86_intr_impl { + ($id:ident) => { + impl All for $id { + #[inline] + unsafe fn all(self) -> bool { + use crate::llvm::simd_reduce_all; + simd_reduce_all(self.0) + } + } + impl Any for $id { + #[inline] + unsafe fn any(self) -> bool { + use crate::llvm::simd_reduce_any; + simd_reduce_any(self.0) + } + } + }; +} + +/// Mask reduction implementation for `x86` and `x86_64` targets +macro_rules! impl_mask_reductions { + // 64-bit wide masks + (m8x8) => { x86_m8x8_impl!(m8x8); }; + (m16x4) => { x86_m8x8_impl!(m16x4); }; + (m32x2) => { x86_m8x8_impl!(m32x2); }; + // 128-bit wide masks + (m8x16) => { x86_m8x16_impl!(m8x16); }; + (m16x8) => { x86_m8x16_impl!(m16x8); }; + (m32x4) => { x86_m32x4_impl!(m32x4); }; + (m64x2) => { x86_m64x2_impl!(m64x2); }; + (m128x1) => { x86_intr_impl!(m128x1); }; + // 256-bit wide masks: + (m8x32) => { x86_m8x32_impl!(m8x32, m8x16); }; + (m16x16) => { x86_m8x32_impl!(m16x16, m16x8); }; + (m32x8) => { x86_m32x8_impl!(m32x8, m32x4); }; + (m64x4) => { x86_m64x4_impl!(m64x4, m64x2); }; + (m128x2) => { x86_intr_impl!(m128x2); }; + (msizex2) => { + cfg_if! { + if #[cfg(target_pointer_width = "64")] { + fallback_to_other_impl!(msizex2, m64x2); + } else if #[cfg(target_pointer_width = "32")] { + fallback_to_other_impl!(msizex2, m32x2); + } else { + compile_error!("unsupported target_pointer_width"); + } + } + }; + (msizex4) => { + cfg_if! { + if #[cfg(target_pointer_width = "64")] { + fallback_to_other_impl!(msizex4, m64x4); + } else if #[cfg(target_pointer_width = "32")] { + fallback_to_other_impl!(msizex4, m32x4); + } else { + compile_error!("unsupported target_pointer_width"); + } + } + }; + (msizex8) => { + cfg_if! { + if #[cfg(target_pointer_width = "64")] { + fallback_to_other_impl!(msizex8, m64x8); + } else if #[cfg(target_pointer_width = "32")] { + fallback_to_other_impl!(msizex8, m32x8); + } else { + compile_error!("unsupported target_pointer_width"); + } + } + }; + + // Fallback to LLVM's default code-generation: + ($id:ident) => { fallback_impl!($id); }; +} diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs new file mode 100644 index 000000000000..d18736fb0399 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs @@ -0,0 +1,101 @@ +//! Mask reductions implementation for `x86` and `x86_64` targets with `AVX` + +/// `x86`/`x86_64` 256-bit `AVX` implementation +/// FIXME: it might be faster here to do two `_mm_movmask_epi8` +#[cfg(target_feature = "avx")] +macro_rules! x86_m8x32_avx_impl { + ($id:ident) => { + impl All for $id { + #[inline] + #[target_feature(enable = "avx")] + unsafe fn all(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm256_testc_si256; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm256_testc_si256; + _mm256_testc_si256( + crate::mem::transmute(self), + crate::mem::transmute($id::splat(true)), + ) != 0 + } + } + impl Any for $id { + #[inline] + #[target_feature(enable = "avx")] + unsafe fn any(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm256_testz_si256; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm256_testz_si256; + _mm256_testz_si256( + crate::mem::transmute(self), + crate::mem::transmute(self), + ) == 0 + } + } + }; +} + +/// `x86`/`x86_64` 256-bit m32x8 `AVX` implementation +macro_rules! x86_m32x8_avx_impl { + ($id:ident) => { + impl All for $id { + #[inline] + #[target_feature(enable = "sse")] + unsafe fn all(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm256_movemask_ps; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm256_movemask_ps; + // _mm256_movemask_ps(a) creates a 8bit mask containing the + // most significant bit of each lane of `a`. If all bits are + // set, then all 8 lanes of the mask are true. + _mm256_movemask_ps(crate::mem::transmute(self)) == 0b_1111_1111_i32 + } + } + impl Any for $id { + #[inline] + #[target_feature(enable = "sse")] + unsafe fn any(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm256_movemask_ps; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm256_movemask_ps; + + _mm256_movemask_ps(crate::mem::transmute(self)) != 0 + } + } + }; +} + +/// `x86`/`x86_64` 256-bit m64x4 `AVX` implementation +macro_rules! x86_m64x4_avx_impl { + ($id:ident) => { + impl All for $id { + #[inline] + #[target_feature(enable = "sse")] + unsafe fn all(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm256_movemask_pd; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm256_movemask_pd; + // _mm256_movemask_pd(a) creates a 4bit mask containing the + // most significant bit of each lane of `a`. If all bits are + // set, then all 4 lanes of the mask are true. + _mm256_movemask_pd(crate::mem::transmute(self)) == 0b_1111_i32 + } + } + impl Any for $id { + #[inline] + #[target_feature(enable = "sse")] + unsafe fn any(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm256_movemask_pd; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm256_movemask_pd; + + _mm256_movemask_pd(crate::mem::transmute(self)) != 0 + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs new file mode 100644 index 000000000000..d37d02342092 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs @@ -0,0 +1,35 @@ +//! Mask reductions implementation for `x86` and `x86_64` targets with `AVX2`. +#![allow(unused)] + +/// x86/x86_64 256-bit m8x32 AVX2 implementation +macro_rules! x86_m8x32_avx2_impl { + ($id:ident) => { + impl All for $id { + #[inline] + #[target_feature(enable = "sse2")] + unsafe fn all(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm256_movemask_epi8; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm256_movemask_epi8; + // _mm256_movemask_epi8(a) creates a 32bit mask containing the + // most significant bit of each byte of `a`. If all + // bits are set, then all 32 lanes of the mask are + // true. + _mm256_movemask_epi8(crate::mem::transmute(self)) == -1_i32 + } + } + impl Any for $id { + #[inline] + #[target_feature(enable = "sse2")] + unsafe fn any(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm256_movemask_epi8; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm256_movemask_epi8; + + _mm256_movemask_epi8(crate::mem::transmute(self)) != 0 + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs new file mode 100644 index 000000000000..7482f9430a14 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs @@ -0,0 +1,68 @@ +//! Mask reductions implementation for `x86` and `x86_64` targets with `SSE`. +#![allow(unused)] + +/// `x86`/`x86_64` 128-bit `m32x4` `SSE` implementation +macro_rules! x86_m32x4_sse_impl { + ($id:ident) => { + impl All for $id { + #[inline] + #[target_feature(enable = "sse")] + unsafe fn all(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm_movemask_ps; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm_movemask_ps; + // _mm_movemask_ps(a) creates a 4bit mask containing the + // most significant bit of each lane of `a`. If all + // bits are set, then all 4 lanes of the mask are + // true. + _mm_movemask_ps(crate::mem::transmute(self)) + == 0b_1111_i32 + } + } + impl Any for $id { + #[inline] + #[target_feature(enable = "sse")] + unsafe fn any(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm_movemask_ps; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm_movemask_ps; + + _mm_movemask_ps(crate::mem::transmute(self)) != 0 + } + } + }; +} + +macro_rules! x86_m8x8_sse_impl { + ($id:ident) => { + impl All for $id { + #[inline] + #[target_feature(enable = "sse")] + unsafe fn all(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm_movemask_pi8; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm_movemask_pi8; + // _mm_movemask_pi8(a) creates an 8bit mask containing the most + // significant bit of each byte of `a`. If all bits are set, + // then all 8 lanes of the mask are true. + _mm_movemask_pi8(crate::mem::transmute(self)) + == u8::max_value() as i32 + } + } + impl Any for $id { + #[inline] + #[target_feature(enable = "sse")] + unsafe fn any(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm_movemask_pi8; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm_movemask_pi8; + + _mm_movemask_pi8(crate::mem::transmute(self)) != 0 + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs new file mode 100644 index 000000000000..a99c606f5268 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs @@ -0,0 +1,70 @@ +//! Mask reductions implementation for `x86` and `x86_64` targets with `SSE2`. +#![allow(unused)] + +/// `x86`/`x86_64` 128-bit m64x2 `SSE2` implementation +macro_rules! x86_m64x2_sse2_impl { + ($id:ident) => { + impl All for $id { + #[inline] + #[target_feature(enable = "sse")] + unsafe fn all(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm_movemask_pd; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm_movemask_pd; + // _mm_movemask_pd(a) creates a 2bit mask containing the + // most significant bit of each lane of `a`. If all + // bits are set, then all 2 lanes of the mask are + // true. + _mm_movemask_pd(crate::mem::transmute(self)) + == 0b_11_i32 + } + } + impl Any for $id { + #[inline] + #[target_feature(enable = "sse")] + unsafe fn any(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm_movemask_pd; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm_movemask_pd; + + _mm_movemask_pd(crate::mem::transmute(self)) != 0 + } + } + }; +} + +/// `x86`/`x86_64` 128-bit m8x16 `SSE2` implementation +macro_rules! x86_m8x16_sse2_impl { + ($id:ident) => { + impl All for $id { + #[inline] + #[target_feature(enable = "sse2")] + unsafe fn all(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm_movemask_epi8; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm_movemask_epi8; + // _mm_movemask_epi8(a) creates a 16bit mask containing the + // most significant bit of each byte of `a`. If all + // bits are set, then all 16 lanes of the mask are + // true. + _mm_movemask_epi8(crate::mem::transmute(self)) + == i32::from(u16::max_value()) + } + } + impl Any for $id { + #[inline] + #[target_feature(enable = "sse2")] + unsafe fn any(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm_movemask_epi8; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm_movemask_epi8; + + _mm_movemask_epi8(crate::mem::transmute(self)) != 0 + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/codegen/shuffle.rs b/third_party/rust/packed_simd/src/codegen/shuffle.rs new file mode 100644 index 000000000000..35a9db905339 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/shuffle.rs @@ -0,0 +1,302 @@ +//! Implementations of the `ShuffleResult` trait for the different numbers of +//! lanes and vector element types. + +use crate::masks::*; +use crate::sealed::Shuffle; + +impl Shuffle<[u32; 2]> for i8 { + type Output = crate::codegen::i8x2; +} +impl Shuffle<[u32; 4]> for i8 { + type Output = crate::codegen::i8x4; +} +impl Shuffle<[u32; 8]> for i8 { + type Output = crate::codegen::i8x8; +} +impl Shuffle<[u32; 16]> for i8 { + type Output = crate::codegen::i8x16; +} +impl Shuffle<[u32; 32]> for i8 { + type Output = crate::codegen::i8x32; +} +impl Shuffle<[u32; 64]> for i8 { + type Output = crate::codegen::i8x64; +} + +impl Shuffle<[u32; 2]> for u8 { + type Output = crate::codegen::u8x2; +} +impl Shuffle<[u32; 4]> for u8 { + type Output = crate::codegen::u8x4; +} +impl Shuffle<[u32; 8]> for u8 { + type Output = crate::codegen::u8x8; +} +impl Shuffle<[u32; 16]> for u8 { + type Output = crate::codegen::u8x16; +} +impl Shuffle<[u32; 32]> for u8 { + type Output = crate::codegen::u8x32; +} +impl Shuffle<[u32; 64]> for u8 { + type Output = crate::codegen::u8x64; +} + +impl Shuffle<[u32; 2]> for m8 { + type Output = crate::codegen::m8x2; +} +impl Shuffle<[u32; 4]> for m8 { + type Output = crate::codegen::m8x4; +} +impl Shuffle<[u32; 8]> for m8 { + type Output = crate::codegen::m8x8; +} +impl Shuffle<[u32; 16]> for m8 { + type Output = crate::codegen::m8x16; +} +impl Shuffle<[u32; 32]> for m8 { + type Output = crate::codegen::m8x32; +} +impl Shuffle<[u32; 64]> for m8 { + type Output = crate::codegen::m8x64; +} + +impl Shuffle<[u32; 2]> for i16 { + type Output = crate::codegen::i16x2; +} +impl Shuffle<[u32; 4]> for i16 { + type Output = crate::codegen::i16x4; +} +impl Shuffle<[u32; 8]> for i16 { + type Output = crate::codegen::i16x8; +} +impl Shuffle<[u32; 16]> for i16 { + type Output = crate::codegen::i16x16; +} +impl Shuffle<[u32; 32]> for i16 { + type Output = crate::codegen::i16x32; +} + +impl Shuffle<[u32; 2]> for u16 { + type Output = crate::codegen::u16x2; +} +impl Shuffle<[u32; 4]> for u16 { + type Output = crate::codegen::u16x4; +} +impl Shuffle<[u32; 8]> for u16 { + type Output = crate::codegen::u16x8; +} +impl Shuffle<[u32; 16]> for u16 { + type Output = crate::codegen::u16x16; +} +impl Shuffle<[u32; 32]> for u16 { + type Output = crate::codegen::u16x32; +} + +impl Shuffle<[u32; 2]> for m16 { + type Output = crate::codegen::m16x2; +} +impl Shuffle<[u32; 4]> for m16 { + type Output = crate::codegen::m16x4; +} +impl Shuffle<[u32; 8]> for m16 { + type Output = crate::codegen::m16x8; +} +impl Shuffle<[u32; 16]> for m16 { + type Output = crate::codegen::m16x16; +} +impl Shuffle<[u32; 32]> for m16 { + type Output = crate::codegen::m16x32; +} + +impl Shuffle<[u32; 2]> for i32 { + type Output = crate::codegen::i32x2; +} +impl Shuffle<[u32; 4]> for i32 { + type Output = crate::codegen::i32x4; +} +impl Shuffle<[u32; 8]> for i32 { + type Output = crate::codegen::i32x8; +} +impl Shuffle<[u32; 16]> for i32 { + type Output = crate::codegen::i32x16; +} + +impl Shuffle<[u32; 2]> for u32 { + type Output = crate::codegen::u32x2; +} +impl Shuffle<[u32; 4]> for u32 { + type Output = crate::codegen::u32x4; +} +impl Shuffle<[u32; 8]> for u32 { + type Output = crate::codegen::u32x8; +} +impl Shuffle<[u32; 16]> for u32 { + type Output = crate::codegen::u32x16; +} + +impl Shuffle<[u32; 2]> for f32 { + type Output = crate::codegen::f32x2; +} +impl Shuffle<[u32; 4]> for f32 { + type Output = crate::codegen::f32x4; +} +impl Shuffle<[u32; 8]> for f32 { + type Output = crate::codegen::f32x8; +} +impl Shuffle<[u32; 16]> for f32 { + type Output = crate::codegen::f32x16; +} + +impl Shuffle<[u32; 2]> for m32 { + type Output = crate::codegen::m32x2; +} +impl Shuffle<[u32; 4]> for m32 { + type Output = crate::codegen::m32x4; +} +impl Shuffle<[u32; 8]> for m32 { + type Output = crate::codegen::m32x8; +} +impl Shuffle<[u32; 16]> for m32 { + type Output = crate::codegen::m32x16; +} + +/* FIXME: 64-bit single element vector +impl Shuffle<[u32; 1]> for i64 { + type Output = crate::codegen::i64x1; +} +*/ +impl Shuffle<[u32; 2]> for i64 { + type Output = crate::codegen::i64x2; +} +impl Shuffle<[u32; 4]> for i64 { + type Output = crate::codegen::i64x4; +} +impl Shuffle<[u32; 8]> for i64 { + type Output = crate::codegen::i64x8; +} + +/* FIXME: 64-bit single element vector +impl Shuffle<[u32; 1]> for u64 { + type Output = crate::codegen::u64x1; +} +*/ +impl Shuffle<[u32; 2]> for u64 { + type Output = crate::codegen::u64x2; +} +impl Shuffle<[u32; 4]> for u64 { + type Output = crate::codegen::u64x4; +} +impl Shuffle<[u32; 8]> for u64 { + type Output = crate::codegen::u64x8; +} + +/* FIXME: 64-bit single element vector +impl Shuffle<[u32; 1]> for f64 { + type Output = crate::codegen::f64x1; +} +*/ +impl Shuffle<[u32; 2]> for f64 { + type Output = crate::codegen::f64x2; +} +impl Shuffle<[u32; 4]> for f64 { + type Output = crate::codegen::f64x4; +} +impl Shuffle<[u32; 8]> for f64 { + type Output = crate::codegen::f64x8; +} + +/* FIXME: 64-bit single element vector +impl Shuffle<[u32; 1]> for m64 { + type Output = crate::codegen::m64x1; +} +*/ +impl Shuffle<[u32; 2]> for m64 { + type Output = crate::codegen::m64x2; +} +impl Shuffle<[u32; 4]> for m64 { + type Output = crate::codegen::m64x4; +} +impl Shuffle<[u32; 8]> for m64 { + type Output = crate::codegen::m64x8; +} + +impl Shuffle<[u32; 2]> for isize { + type Output = crate::codegen::isizex2; +} +impl Shuffle<[u32; 4]> for isize { + type Output = crate::codegen::isizex4; +} +impl Shuffle<[u32; 8]> for isize { + type Output = crate::codegen::isizex8; +} + +impl Shuffle<[u32; 2]> for usize { + type Output = crate::codegen::usizex2; +} +impl Shuffle<[u32; 4]> for usize { + type Output = crate::codegen::usizex4; +} +impl Shuffle<[u32; 8]> for usize { + type Output = crate::codegen::usizex8; +} + +impl Shuffle<[u32; 2]> for *const T { + type Output = crate::codegen::cptrx2; +} +impl Shuffle<[u32; 4]> for *const T { + type Output = crate::codegen::cptrx4; +} +impl Shuffle<[u32; 8]> for *const T { + type Output = crate::codegen::cptrx8; +} + +impl Shuffle<[u32; 2]> for *mut T { + type Output = crate::codegen::mptrx2; +} +impl Shuffle<[u32; 4]> for *mut T { + type Output = crate::codegen::mptrx4; +} +impl Shuffle<[u32; 8]> for *mut T { + type Output = crate::codegen::mptrx8; +} + +impl Shuffle<[u32; 2]> for msize { + type Output = crate::codegen::msizex2; +} +impl Shuffle<[u32; 4]> for msize { + type Output = crate::codegen::msizex4; +} +impl Shuffle<[u32; 8]> for msize { + type Output = crate::codegen::msizex8; +} + +impl Shuffle<[u32; 1]> for i128 { + type Output = crate::codegen::i128x1; +} +impl Shuffle<[u32; 2]> for i128 { + type Output = crate::codegen::i128x2; +} +impl Shuffle<[u32; 4]> for i128 { + type Output = crate::codegen::i128x4; +} + +impl Shuffle<[u32; 1]> for u128 { + type Output = crate::codegen::u128x1; +} +impl Shuffle<[u32; 2]> for u128 { + type Output = crate::codegen::u128x2; +} +impl Shuffle<[u32; 4]> for u128 { + type Output = crate::codegen::u128x4; +} + +impl Shuffle<[u32; 1]> for m128 { + type Output = crate::codegen::m128x1; +} +impl Shuffle<[u32; 2]> for m128 { + type Output = crate::codegen::m128x2; +} +impl Shuffle<[u32; 4]> for m128 { + type Output = crate::codegen::m128x4; +} diff --git a/third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs b/third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs new file mode 100644 index 000000000000..1e9f5816371a --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs @@ -0,0 +1,432 @@ +//! Shuffle vector lanes with run-time indices. + +use crate::*; + +pub trait Shuffle1Dyn { + type Indices; + fn shuffle1_dyn(self, _: Self::Indices) -> Self; +} + +// Fallback implementation +macro_rules! impl_fallback { + ($id:ident) => { + impl Shuffle1Dyn for $id { + type Indices = Self; + #[inline] + fn shuffle1_dyn(self, indices: Self::Indices) -> Self { + let mut result = Self::splat(0); + for i in 0..$id::lanes() { + result = result + .replace(i, self.extract(indices.extract(i) as usize)); + } + result + } + } + }; +} + +macro_rules! impl_shuffle1_dyn { + (u8x8) => { + cfg_if! { + if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "ssse3"))] { + impl Shuffle1Dyn for u8x8 { + type Indices = Self; + #[inline] + fn shuffle1_dyn(self, indices: Self::Indices) -> Self { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm_shuffle_pi8; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm_shuffle_pi8; + + unsafe { + crate::mem::transmute( + _mm_shuffle_pi8( + crate::mem::transmute(self.0), + crate::mem::transmute(indices.0) + ) + ) + } + } + } + } else if #[cfg(all( + any( + all(target_aarch = "aarch64", target_feature = "neon"), + all(target_aarch = "arm", target_feature = "v7", + target_feature = "neon") + ), + any(feature = "core_arch", libcore_neon) + ) + )] { + impl Shuffle1Dyn for u8x8 { + type Indices = Self; + #[inline] + fn shuffle1_dyn(self, indices: Self::Indices) -> Self { + #[cfg(targt_arch = "aarch64")] + use crate::arch::aarch64::vtbl1_u8; + #[cfg(targt_arch = "arm")] + use crate::arch::arm::vtbl1_u8; + + // This is safe because the binary is compiled with + // neon enabled at compile-time and can therefore only + // run on CPUs that have it enabled. + unsafe { + Simd(mem::transmute( + vtbl1_u8(mem::transmute(self.0), + crate::mem::transmute(indices.0)) + )) + } + } + } + } else { + impl_fallback!(u8x8); + } + } + }; + (u8x16) => { + cfg_if! { + if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "ssse3"))] { + impl Shuffle1Dyn for u8x16 { + type Indices = Self; + #[inline] + fn shuffle1_dyn(self, indices: Self::Indices) -> Self { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm_shuffle_epi8; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm_shuffle_epi8; + // This is safe because the binary is compiled with + // ssse3 enabled at compile-time and can therefore only + // run on CPUs that have it enabled. + unsafe { + Simd(mem::transmute( + _mm_shuffle_epi8(mem::transmute(self.0), + crate::mem::transmute(indices)) + )) + } + } + } + } else if #[cfg(all(target_aarch = "aarch64", target_feature = "neon", + any(feature = "core_arch", libcore_neon)))] { + impl Shuffle1Dyn for u8x16 { + type Indices = Self; + #[inline] + fn shuffle1_dyn(self, indices: Self::Indices) -> Self { + use crate::arch::aarch64::vqtbl1q_u8; + + // This is safe because the binary is compiled with + // neon enabled at compile-time and can therefore only + // run on CPUs that have it enabled. + unsafe { + Simd(mem::transmute( + vqtbl1q_u8(mem::transmute(self.0), + crate::mem::transmute(indices.0)) + )) + } + } + } + } else if #[cfg(all(target_aarch = "arm", target_feature = "v7", + target_feature = "neon", + any(feature = "core_arch", libcore_neon)))] { + impl Shuffle1Dyn for u8x16 { + type Indices = Self; + #[inline] + fn shuffle1_dyn(self, indices: Self::Indices) -> Self { + use crate::arch::arm::vtbl2_u8; + + // This is safe because the binary is compiled with + // neon enabled at compile-time and can therefore only + // run on CPUs that have it enabled. + unsafe { + union U { + j: u8x16, + s: (u8x8, u8x8), + } + + let (i0, i1) = U { j: y }.s; + + let r0 = vtbl2_u8( + mem::transmute(x), + crate::mem::transmute(i0) + ); + let r1 = vtbl2_u8( + mem::transmute(x), + crate::mem::transmute(i1) + ); + + let r = U { s: (r0, r1) }.j; + + Simd(mem::transmute(r)) + } + } + } + } else { + impl_fallback!(u8x16); + } + } + }; + (u16x8) => { + impl Shuffle1Dyn for u16x8 { + type Indices = Self; + #[inline] + fn shuffle1_dyn(self, indices: Self::Indices) -> Self { + let indices: u8x8 = (indices * 2).cast(); + let indices: u8x16 = shuffle!( + indices, [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7] + ); + let v = u8x16::new( + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 + ); + let indices = indices + v; + unsafe { + let s: u8x16 =crate::mem::transmute(self); + crate::mem::transmute(s.shuffle1_dyn(indices)) + } + } + } + }; + (u32x4) => { + cfg_if! { + if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "avx"))] { + impl Shuffle1Dyn for u32x4 { + type Indices = Self; + #[inline] + fn shuffle1_dyn(self, indices: Self::Indices) -> Self { + #[cfg(target_arch = "x86")] + use crate::arch::x86::{_mm_permutevar_ps}; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::{_mm_permutevar_ps}; + + unsafe { + crate::mem::transmute( + _mm_permutevar_ps( + crate::mem::transmute(self.0), + crate::mem::transmute(indices.0) + ) + ) + } + } + } + } else { + impl Shuffle1Dyn for u32x4 { + type Indices = Self; + #[inline] + fn shuffle1_dyn(self, indices: Self::Indices) -> Self { + let indices: u8x4 = (indices * 4).cast(); + let indices: u8x16 = shuffle!( + indices, + [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3] + ); + let v = u8x16::new( + 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 + ); + let indices = indices + v; + unsafe { + let s: u8x16 =crate::mem::transmute(self); + crate::mem::transmute(s.shuffle1_dyn(indices)) + } + } + } + } + } + }; + (u64x2) => { + cfg_if! { + if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "avx"))] { + impl Shuffle1Dyn for u64x2 { + type Indices = Self; + #[inline] + fn shuffle1_dyn(self, indices: Self::Indices) -> Self { + #[cfg(target_arch = "x86")] + use crate::arch::x86::{_mm_permutevar_pd}; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::{_mm_permutevar_pd}; + // _mm_permutevar_pd uses the _second_ bit of each + // element to perform the selection, that is: 0b00 => 0, + // 0b10 => 1: + let indices = indices << 1; + unsafe { + crate::mem::transmute( + _mm_permutevar_pd( + crate::mem::transmute(self), + crate::mem::transmute(indices) + ) + ) + } + } + } + } else { + impl Shuffle1Dyn for u64x2 { + type Indices = Self; + #[inline] + fn shuffle1_dyn(self, indices: Self::Indices) -> Self { + let indices: u8x2 = (indices * 8).cast(); + let indices: u8x16 = shuffle!( + indices, + [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] + ); + let v = u8x16::new( + 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 + ); + let indices = indices + v; + unsafe { + let s: u8x16 =crate::mem::transmute(self); + crate::mem::transmute(s.shuffle1_dyn(indices)) + } + } + } + } + } + }; + (u128x1) => { + impl Shuffle1Dyn for u128x1 { + type Indices = Self; + #[inline] + fn shuffle1_dyn(self, _indices: Self::Indices) -> Self { + self + } + } + }; + ($id:ident) => { impl_fallback!($id); } +} + +impl_shuffle1_dyn!(u8x2); +impl_shuffle1_dyn!(u8x4); +impl_shuffle1_dyn!(u8x8); +impl_shuffle1_dyn!(u8x16); +impl_shuffle1_dyn!(u8x32); +impl_shuffle1_dyn!(u8x64); + +impl_shuffle1_dyn!(u16x2); +impl_shuffle1_dyn!(u16x4); +impl_shuffle1_dyn!(u16x8); +impl_shuffle1_dyn!(u16x16); +impl_shuffle1_dyn!(u16x32); + +impl_shuffle1_dyn!(u32x2); +impl_shuffle1_dyn!(u32x4); +impl_shuffle1_dyn!(u32x8); +impl_shuffle1_dyn!(u32x16); + +impl_shuffle1_dyn!(u64x2); +impl_shuffle1_dyn!(u64x4); +impl_shuffle1_dyn!(u64x8); + +impl_shuffle1_dyn!(usizex2); +impl_shuffle1_dyn!(usizex4); +impl_shuffle1_dyn!(usizex8); + +impl_shuffle1_dyn!(u128x1); +impl_shuffle1_dyn!(u128x2); +impl_shuffle1_dyn!(u128x4); + +// Implementation for non-unsigned vector types +macro_rules! impl_shuffle1_dyn_non_u { + ($id:ident, $uid:ident) => { + impl Shuffle1Dyn for $id { + type Indices = $uid; + #[inline] + fn shuffle1_dyn(self, indices: Self::Indices) -> Self { + unsafe { + let u: $uid = crate::mem::transmute(self); + crate::mem::transmute(u.shuffle1_dyn(indices)) + } + } + } + }; +} + +impl_shuffle1_dyn_non_u!(i8x2, u8x2); +impl_shuffle1_dyn_non_u!(i8x4, u8x4); +impl_shuffle1_dyn_non_u!(i8x8, u8x8); +impl_shuffle1_dyn_non_u!(i8x16, u8x16); +impl_shuffle1_dyn_non_u!(i8x32, u8x32); +impl_shuffle1_dyn_non_u!(i8x64, u8x64); + +impl_shuffle1_dyn_non_u!(i16x2, u16x2); +impl_shuffle1_dyn_non_u!(i16x4, u16x4); +impl_shuffle1_dyn_non_u!(i16x8, u16x8); +impl_shuffle1_dyn_non_u!(i16x16, u16x16); +impl_shuffle1_dyn_non_u!(i16x32, u16x32); + +impl_shuffle1_dyn_non_u!(i32x2, u32x2); +impl_shuffle1_dyn_non_u!(i32x4, u32x4); +impl_shuffle1_dyn_non_u!(i32x8, u32x8); +impl_shuffle1_dyn_non_u!(i32x16, u32x16); + +impl_shuffle1_dyn_non_u!(i64x2, u64x2); +impl_shuffle1_dyn_non_u!(i64x4, u64x4); +impl_shuffle1_dyn_non_u!(i64x8, u64x8); + +impl_shuffle1_dyn_non_u!(isizex2, usizex2); +impl_shuffle1_dyn_non_u!(isizex4, usizex4); +impl_shuffle1_dyn_non_u!(isizex8, usizex8); + +impl_shuffle1_dyn_non_u!(i128x1, u128x1); +impl_shuffle1_dyn_non_u!(i128x2, u128x2); +impl_shuffle1_dyn_non_u!(i128x4, u128x4); + +impl_shuffle1_dyn_non_u!(m8x2, u8x2); +impl_shuffle1_dyn_non_u!(m8x4, u8x4); +impl_shuffle1_dyn_non_u!(m8x8, u8x8); +impl_shuffle1_dyn_non_u!(m8x16, u8x16); +impl_shuffle1_dyn_non_u!(m8x32, u8x32); +impl_shuffle1_dyn_non_u!(m8x64, u8x64); + +impl_shuffle1_dyn_non_u!(m16x2, u16x2); +impl_shuffle1_dyn_non_u!(m16x4, u16x4); +impl_shuffle1_dyn_non_u!(m16x8, u16x8); +impl_shuffle1_dyn_non_u!(m16x16, u16x16); +impl_shuffle1_dyn_non_u!(m16x32, u16x32); + +impl_shuffle1_dyn_non_u!(m32x2, u32x2); +impl_shuffle1_dyn_non_u!(m32x4, u32x4); +impl_shuffle1_dyn_non_u!(m32x8, u32x8); +impl_shuffle1_dyn_non_u!(m32x16, u32x16); + +impl_shuffle1_dyn_non_u!(m64x2, u64x2); +impl_shuffle1_dyn_non_u!(m64x4, u64x4); +impl_shuffle1_dyn_non_u!(m64x8, u64x8); + +impl_shuffle1_dyn_non_u!(msizex2, usizex2); +impl_shuffle1_dyn_non_u!(msizex4, usizex4); +impl_shuffle1_dyn_non_u!(msizex8, usizex8); + +impl_shuffle1_dyn_non_u!(m128x1, u128x1); +impl_shuffle1_dyn_non_u!(m128x2, u128x2); +impl_shuffle1_dyn_non_u!(m128x4, u128x4); + +impl_shuffle1_dyn_non_u!(f32x2, u32x2); +impl_shuffle1_dyn_non_u!(f32x4, u32x4); +impl_shuffle1_dyn_non_u!(f32x8, u32x8); +impl_shuffle1_dyn_non_u!(f32x16, u32x16); + +impl_shuffle1_dyn_non_u!(f64x2, u64x2); +impl_shuffle1_dyn_non_u!(f64x4, u64x4); +impl_shuffle1_dyn_non_u!(f64x8, u64x8); + +// Implementation for non-unsigned vector types +macro_rules! impl_shuffle1_dyn_ptr { + ($id:ident, $uid:ident) => { + impl Shuffle1Dyn for $id { + type Indices = $uid; + #[inline] + fn shuffle1_dyn(self, indices: Self::Indices) -> Self { + unsafe { + let u: $uid = crate::mem::transmute(self); + crate::mem::transmute(u.shuffle1_dyn(indices)) + } + } + } + }; +} + +impl_shuffle1_dyn_ptr!(cptrx2, usizex2); +impl_shuffle1_dyn_ptr!(cptrx4, usizex4); +impl_shuffle1_dyn_ptr!(cptrx8, usizex8); + +impl_shuffle1_dyn_ptr!(mptrx2, usizex2); +impl_shuffle1_dyn_ptr!(mptrx4, usizex4); +impl_shuffle1_dyn_ptr!(mptrx8, usizex8); diff --git a/third_party/rust/packed_simd/src/codegen/swap_bytes.rs b/third_party/rust/packed_simd/src/codegen/swap_bytes.rs new file mode 100644 index 000000000000..b435fb5da120 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/swap_bytes.rs @@ -0,0 +1,189 @@ +//! Horizontal swap bytes reductions. + +// FIXME: investigate using `llvm.bswap` +// https://github.com/rust-lang-nursery/packed_simd/issues/19 + +use crate::*; + +crate trait SwapBytes { + fn swap_bytes(self) -> Self; +} + +macro_rules! impl_swap_bytes { + (v16: $($id:ident,)+) => { + $( + impl SwapBytes for $id { + #[inline] + fn swap_bytes(self) -> Self { + unsafe { shuffle!(self, [1, 0]) } + } + } + )+ + }; + (v32: $($id:ident,)+) => { + $( + impl SwapBytes for $id { + #[inline] + #[allow(clippy::useless_transmute)] + fn swap_bytes(self) -> Self { + unsafe { + let bytes: u8x4 = crate::mem::transmute(self); + let result: u8x4 = shuffle!(bytes, [3, 2, 1, 0]); + crate::mem::transmute(result) + } + } + } + )+ + }; + (v64: $($id:ident,)+) => { + $( + impl SwapBytes for $id { + #[inline] + #[allow(clippy::useless_transmute)] + fn swap_bytes(self) -> Self { + unsafe { + let bytes: u8x8 = crate::mem::transmute(self); + let result: u8x8 = shuffle!( + bytes, [7, 6, 5, 4, 3, 2, 1, 0] + ); + crate::mem::transmute(result) + } + } + } + )+ + }; + (v128: $($id:ident,)+) => { + $( + impl SwapBytes for $id { + #[inline] + #[allow(clippy::useless_transmute)] + fn swap_bytes(self) -> Self { + unsafe { + let bytes: u8x16 = crate::mem::transmute(self); + let result: u8x16 = shuffle!(bytes, [ + 15, 14, 13, 12, 11, 10, 9, 8, + 7, 6, 5, 4, 3, 2, 1, 0 + ]); + crate::mem::transmute(result) + } + } + } + )+ + }; + (v256: $($id:ident,)+) => { + $( + impl SwapBytes for $id { + #[inline] + #[allow(clippy::useless_transmute)] + fn swap_bytes(self) -> Self { + unsafe { + let bytes: u8x32 = crate::mem::transmute(self); + let result: u8x32 = shuffle!(bytes, [ + 31, 30, 29, 28, 27, 26, 25, 24, + 23, 22, 21, 20, 19, 18, 17, 16, + 15, 14, 13, 12, 11, 10, 9, 8, + 7, 6, 5, 4, 3, 2, 1, 0 + ]); + crate::mem::transmute(result) + } + } + } + )+ + }; + (v512: $($id:ident,)+) => { + $( + impl SwapBytes for $id { + #[inline] + #[allow(clippy::useless_transmute)] + fn swap_bytes(self) -> Self { + unsafe { + let bytes: u8x64 = crate::mem::transmute(self); + let result: u8x64 = shuffle!(bytes, [ + 63, 62, 61, 60, 59, 58, 57, 56, + 55, 54, 53, 52, 51, 50, 49, 48, + 47, 46, 45, 44, 43, 42, 41, 40, + 39, 38, 37, 36, 35, 34, 33, 32, + 31, 30, 29, 28, 27, 26, 25, 24, + 23, 22, 21, 20, 19, 18, 17, 16, + 15, 14, 13, 12, 11, 10, 9, 8, + 7, 6, 5, 4, 3, 2, 1, 0 + ]); + crate::mem::transmute(result) + } + } + } + )+ + }; +} + +impl_swap_bytes!(v16: u8x2, i8x2,); +impl_swap_bytes!(v32: u8x4, i8x4, u16x2, i16x2,); +// FIXME: 64-bit single element vector +impl_swap_bytes!( + v64: u8x8, + i8x8, + u16x4, + i16x4, + u32x2, + i32x2, /* u64x1, i64x1, */ +); + +impl_swap_bytes!( + v128: u8x16, + i8x16, + u16x8, + i16x8, + u32x4, + i32x4, + u64x2, + i64x2, + u128x1, + i128x1, +); +impl_swap_bytes!( + v256: u8x32, + i8x32, + u16x16, + i16x16, + u32x8, + i32x8, + u64x4, + i64x4, + u128x2, + i128x2, +); + +impl_swap_bytes!( + v512: u8x64, + i8x64, + u16x32, + i16x32, + u32x16, + i32x16, + u64x8, + i64x8, + u128x4, + i128x4, +); + +cfg_if! { + if #[cfg(target_pointer_width = "8")] { + impl_swap_bytes!(v16: isizex2, usizex2,); + impl_swap_bytes!(v32: isizex4, usizex4,); + impl_swap_bytes!(v64: isizex8, usizex8,); + } else if #[cfg(target_pointer_width = "16")] { + impl_swap_bytes!(v32: isizex2, usizex2,); + impl_swap_bytes!(v64: isizex4, usizex4,); + impl_swap_bytes!(v128: isizex8, usizex8,); + } else if #[cfg(target_pointer_width = "32")] { + impl_swap_bytes!(v64: isizex2, usizex2,); + impl_swap_bytes!(v128: isizex4, usizex4,); + impl_swap_bytes!(v256: isizex8, usizex8,); + } else if #[cfg(target_pointer_width = "64")] { + impl_swap_bytes!(v128: isizex2, usizex2,); + impl_swap_bytes!(v256: isizex4, usizex4,); + impl_swap_bytes!(v512: isizex8, usizex8,); + } else { + compile_error!("unsupported target_pointer_width"); + } +} diff --git a/third_party/rust/packed_simd/src/codegen/v128.rs b/third_party/rust/packed_simd/src/codegen/v128.rs new file mode 100644 index 000000000000..9506424fadad --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/v128.rs @@ -0,0 +1,46 @@ +//! Internal 128-bit wide vector types + +use crate::masks::*; + +#[rustfmt::skip] +impl_simd_array!( + [i8; 16]: i8x16 | + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8 +); +#[rustfmt::skip] +impl_simd_array!( + [u8; 16]: u8x16 | + u8, u8, u8, u8, + u8, u8, u8, u8, + u8, u8, u8, u8, + u8, u8, u8, u8 +); +#[rustfmt::skip] +impl_simd_array!( + [m8; 16]: m8x16 | + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8 +); + +impl_simd_array!([i16; 8]: i16x8 | i16, i16, i16, i16, i16, i16, i16, i16); +impl_simd_array!([u16; 8]: u16x8 | u16, u16, u16, u16, u16, u16, u16, u16); +impl_simd_array!([m16; 8]: m16x8 | i16, i16, i16, i16, i16, i16, i16, i16); + +impl_simd_array!([i32; 4]: i32x4 | i32, i32, i32, i32); +impl_simd_array!([u32; 4]: u32x4 | u32, u32, u32, u32); +impl_simd_array!([f32; 4]: f32x4 | f32, f32, f32, f32); +impl_simd_array!([m32; 4]: m32x4 | i32, i32, i32, i32); + +impl_simd_array!([i64; 2]: i64x2 | i64, i64); +impl_simd_array!([u64; 2]: u64x2 | u64, u64); +impl_simd_array!([f64; 2]: f64x2 | f64, f64); +impl_simd_array!([m64; 2]: m64x2 | i64, i64); + +impl_simd_array!([i128; 1]: i128x1 | i128); +impl_simd_array!([u128; 1]: u128x1 | u128); +impl_simd_array!([m128; 1]: m128x1 | i128); diff --git a/third_party/rust/packed_simd/src/codegen/v16.rs b/third_party/rust/packed_simd/src/codegen/v16.rs new file mode 100644 index 000000000000..4d55a6d8998e --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/v16.rs @@ -0,0 +1,7 @@ +//! Internal 16-bit wide vector types + +use crate::masks::*; + +impl_simd_array!([i8; 2]: i8x2 | i8, i8); +impl_simd_array!([u8; 2]: u8x2 | u8, u8); +impl_simd_array!([m8; 2]: m8x2 | i8, i8); diff --git a/third_party/rust/packed_simd/src/codegen/v256.rs b/third_party/rust/packed_simd/src/codegen/v256.rs new file mode 100644 index 000000000000..5ca4759f0c0a --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/v256.rs @@ -0,0 +1,78 @@ +//! Internal 256-bit wide vector types + +use crate::masks::*; + +#[rustfmt::skip] +impl_simd_array!( + [i8; 32]: i8x32 | + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8 +); +#[rustfmt::skip] +impl_simd_array!( + [u8; 32]: u8x32 | + u8, u8, u8, u8, + u8, u8, u8, u8, + u8, u8, u8, u8, + u8, u8, u8, u8, + u8, u8, u8, u8, + u8, u8, u8, u8, + u8, u8, u8, u8, + u8, u8, u8, u8 +); +#[rustfmt::skip] +impl_simd_array!( + [m8; 32]: m8x32 | + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8 +); +#[rustfmt::skip] +impl_simd_array!( + [i16; 16]: i16x16 | + i16, i16, i16, i16, + i16, i16, i16, i16, + i16, i16, i16, i16, + i16, i16, i16, i16 +); +#[rustfmt::skip] +impl_simd_array!( + [u16; 16]: u16x16 | + u16, u16, u16, u16, + u16, u16, u16, u16, + u16, u16, u16, u16, + u16, u16, u16, u16 +); +#[rustfmt::skip] +impl_simd_array!( + [m16; 16]: m16x16 | + i16, i16, i16, i16, + i16, i16, i16, i16, + i16, i16, i16, i16, + i16, i16, i16, i16 +); + +impl_simd_array!([i32; 8]: i32x8 | i32, i32, i32, i32, i32, i32, i32, i32); +impl_simd_array!([u32; 8]: u32x8 | u32, u32, u32, u32, u32, u32, u32, u32); +impl_simd_array!([f32; 8]: f32x8 | f32, f32, f32, f32, f32, f32, f32, f32); +impl_simd_array!([m32; 8]: m32x8 | i32, i32, i32, i32, i32, i32, i32, i32); + +impl_simd_array!([i64; 4]: i64x4 | i64, i64, i64, i64); +impl_simd_array!([u64; 4]: u64x4 | u64, u64, u64, u64); +impl_simd_array!([f64; 4]: f64x4 | f64, f64, f64, f64); +impl_simd_array!([m64; 4]: m64x4 | i64, i64, i64, i64); + +impl_simd_array!([i128; 2]: i128x2 | i128, i128); +impl_simd_array!([u128; 2]: u128x2 | u128, u128); +impl_simd_array!([m128; 2]: m128x2 | i128, i128); diff --git a/third_party/rust/packed_simd/src/codegen/v32.rs b/third_party/rust/packed_simd/src/codegen/v32.rs new file mode 100644 index 000000000000..ae1dabd00c22 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/v32.rs @@ -0,0 +1,11 @@ +//! Internal 32-bit wide vector types + +use crate::masks::*; + +impl_simd_array!([i8; 4]: i8x4 | i8, i8, i8, i8); +impl_simd_array!([u8; 4]: u8x4 | u8, u8, u8, u8); +impl_simd_array!([m8; 4]: m8x4 | i8, i8, i8, i8); + +impl_simd_array!([i16; 2]: i16x2 | i16, i16); +impl_simd_array!([u16; 2]: u16x2 | u16, u16); +impl_simd_array!([m16; 2]: m16x2 | i16, i16); diff --git a/third_party/rust/packed_simd/src/codegen/v512.rs b/third_party/rust/packed_simd/src/codegen/v512.rs new file mode 100644 index 000000000000..bf95110340d6 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/v512.rs @@ -0,0 +1,145 @@ +//! Internal 512-bit wide vector types + +use crate::masks::*; + +#[rustfmt::skip] +impl_simd_array!( + [i8; 64]: i8x64 | + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8 +); +#[rustfmt::skip] +impl_simd_array!( + [u8; 64]: u8x64 | + u8, u8, u8, u8, + u8, u8, u8, u8, + u8, u8, u8, u8, + u8, u8, u8, u8, + u8, u8, u8, u8, + u8, u8, u8, u8, + u8, u8, u8, u8, + u8, u8, u8, u8, + + u8, u8, u8, u8, + u8, u8, u8, u8, + u8, u8, u8, u8, + u8, u8, u8, u8, + u8, u8, u8, u8, + u8, u8, u8, u8, + u8, u8, u8, u8, + u8, u8, u8, u8 +); +#[rustfmt::skip] +impl_simd_array!( + [m8; 64]: m8x64 | + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8, + i8, i8, i8, i8 +); +#[rustfmt::skip] +impl_simd_array!( + [i16; 32]: i16x32 | + i16, i16, i16, i16, + i16, i16, i16, i16, + i16, i16, i16, i16, + i16, i16, i16, i16, + i16, i16, i16, i16, + i16, i16, i16, i16, + i16, i16, i16, i16, + i16, i16, i16, i16 +); +#[rustfmt::skip] +impl_simd_array!( + [u16; 32]: u16x32 | + u16, u16, u16, u16, + u16, u16, u16, u16, + u16, u16, u16, u16, + u16, u16, u16, u16, + u16, u16, u16, u16, + u16, u16, u16, u16, + u16, u16, u16, u16, + u16, u16, u16, u16 +); +#[rustfmt::skip] +impl_simd_array!( + [m16; 32]: m16x32 | + i16, i16, i16, i16, + i16, i16, i16, i16, + i16, i16, i16, i16, + i16, i16, i16, i16, + i16, i16, i16, i16, + i16, i16, i16, i16, + i16, i16, i16, i16, + i16, i16, i16, i16 +); + +#[rustfmt::skip] +impl_simd_array!( + [i32; 16]: i32x16 | + i32, i32, i32, i32, + i32, i32, i32, i32, + i32, i32, i32, i32, + i32, i32, i32, i32 +); +#[rustfmt::skip] +impl_simd_array!( + [u32; 16]: u32x16 | + u32, u32, u32, u32, + u32, u32, u32, u32, + u32, u32, u32, u32, + u32, u32, u32, u32 +); +#[rustfmt::skip] +impl_simd_array!( + [f32; 16]: f32x16 | + f32, f32, f32, f32, + f32, f32, f32, f32, + f32, f32, f32, f32, + f32, f32, f32, f32 +); +#[rustfmt::skip] +impl_simd_array!( + [m32; 16]: m32x16 | + i32, i32, i32, i32, + i32, i32, i32, i32, + i32, i32, i32, i32, + i32, i32, i32, i32 +); + +impl_simd_array!([i64; 8]: i64x8 | i64, i64, i64, i64, i64, i64, i64, i64); +impl_simd_array!([u64; 8]: u64x8 | u64, u64, u64, u64, u64, u64, u64, u64); +impl_simd_array!([f64; 8]: f64x8 | f64, f64, f64, f64, f64, f64, f64, f64); +impl_simd_array!([m64; 8]: m64x8 | i64, i64, i64, i64, i64, i64, i64, i64); + +impl_simd_array!([i128; 4]: i128x4 | i128, i128, i128, i128); +impl_simd_array!([u128; 4]: u128x4 | u128, u128, u128, u128); +impl_simd_array!([m128; 4]: m128x4 | i128, i128, i128, i128); diff --git a/third_party/rust/packed_simd/src/codegen/v64.rs b/third_party/rust/packed_simd/src/codegen/v64.rs new file mode 100644 index 000000000000..3cfb67c1a013 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/v64.rs @@ -0,0 +1,21 @@ +//! Internal 64-bit wide vector types + +use crate::masks::*; + +impl_simd_array!([i8; 8]: i8x8 | i8, i8, i8, i8, i8, i8, i8, i8); +impl_simd_array!([u8; 8]: u8x8 | u8, u8, u8, u8, u8, u8, u8, u8); +impl_simd_array!([m8; 8]: m8x8 | i8, i8, i8, i8, i8, i8, i8, i8); + +impl_simd_array!([i16; 4]: i16x4 | i16, i16, i16, i16); +impl_simd_array!([u16; 4]: u16x4 | u16, u16, u16, u16); +impl_simd_array!([m16; 4]: m16x4 | i16, i16, i16, i16); + +impl_simd_array!([i32; 2]: i32x2 | i32, i32); +impl_simd_array!([u32; 2]: u32x2 | u32, u32); +impl_simd_array!([f32; 2]: f32x2 | f32, f32); +impl_simd_array!([m32; 2]: m32x2 | i32, i32); + +impl_simd_array!([i64; 1]: i64x1 | i64); +impl_simd_array!([u64; 1]: u64x1 | u64); +impl_simd_array!([f64; 1]: f64x1 | f64); +impl_simd_array!([m64; 1]: m64x1 | i64); diff --git a/third_party/rust/packed_simd/src/codegen/vPtr.rs b/third_party/rust/packed_simd/src/codegen/vPtr.rs new file mode 100644 index 000000000000..1f2bc7714dd9 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/vPtr.rs @@ -0,0 +1,33 @@ +//! Pointer vector types + +macro_rules! impl_simd_ptr { + ([$ptr_ty:ty; $elem_count:expr]: $tuple_id:ident | $ty:ident + | $($tys:ty),*) => { + #[derive(Copy, Clone)] + #[repr(simd)] + pub struct $tuple_id<$ty>($(crate $tys),*); + //^^^^^^^ leaked through SimdArray + + impl<$ty> crate::sealed::SimdArray for [$ptr_ty; $elem_count] { + type Tuple = $tuple_id<$ptr_ty>; + type T = $ptr_ty; + const N: usize = $elem_count; + type NT = [u32; $elem_count]; + } + + impl<$ty> crate::sealed::Simd for $tuple_id<$ptr_ty> { + type Element = $ptr_ty; + const LANES: usize = $elem_count; + type LanesType = [u32; $elem_count]; + } + + } +} + +impl_simd_ptr!([*const T; 2]: cptrx2 | T | T, T); +impl_simd_ptr!([*const T; 4]: cptrx4 | T | T, T, T, T); +impl_simd_ptr!([*const T; 8]: cptrx8 | T | T, T, T, T, T, T, T, T); + +impl_simd_ptr!([*mut T; 2]: mptrx2 | T | T, T); +impl_simd_ptr!([*mut T; 4]: mptrx4 | T | T, T, T, T); +impl_simd_ptr!([*mut T; 8]: mptrx8 | T | T, T, T, T, T, T, T, T); diff --git a/third_party/rust/packed_simd/src/codegen/vSize.rs b/third_party/rust/packed_simd/src/codegen/vSize.rs new file mode 100644 index 000000000000..3911b21340c8 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/vSize.rs @@ -0,0 +1,43 @@ +//! Vector types with pointer-sized elements + +use crate::codegen::pointer_sized_int::{isize_, usize_}; +use crate::masks::*; + +impl_simd_array!([isize; 2]: isizex2 | isize_, isize_); +impl_simd_array!([usize; 2]: usizex2 | usize_, usize_); +impl_simd_array!([msize; 2]: msizex2 | isize_, isize_); + +impl_simd_array!([isize; 4]: isizex4 | isize_, isize_, isize_, isize_); +impl_simd_array!([usize; 4]: usizex4 | usize_, usize_, usize_, usize_); +impl_simd_array!([msize; 4]: msizex4 | isize_, isize_, isize_, isize_); + +impl_simd_array!( + [isize; 8]: isizex8 | isize_, + isize_, + isize_, + isize_, + isize_, + isize_, + isize_, + isize_ +); +impl_simd_array!( + [usize; 8]: usizex8 | usize_, + usize_, + usize_, + usize_, + usize_, + usize_, + usize_, + usize_ +); +impl_simd_array!( + [msize; 8]: msizex8 | isize_, + isize_, + isize_, + isize_, + isize_, + isize_, + isize_, + isize_ +); diff --git a/third_party/rust/packed_simd/src/lib.rs b/third_party/rust/packed_simd/src/lib.rs new file mode 100644 index 000000000000..d73645e72fbe --- /dev/null +++ b/third_party/rust/packed_simd/src/lib.rs @@ -0,0 +1,327 @@ +//! # Portable packed SIMD vectors +//! +//! This crate is proposed for stabilization as `std::packed_simd` in [RFC2366: +//! `std::simd`](https://github.com/rust-lang/rfcs/pull/2366) . +//! +//! The examples available in the +//! [`examples/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples) +//! sub-directory of the crate showcase how to use the library in practice. +//! +//! ## Table of contents +//! +//! - [Introduction](#introduction) +//! - [Vector types](#vector-types) +//! - [Conditional operations](#conditional-operations) +//! - [Conversions](#conversions) +//! - [Performance +//! guide](https://rust-lang-nursery.github.io/packed_simd/perf-guide/) +//! +//! ## Introduction +//! +//! This crate exports [`Simd<[T; N]>`][`Simd`]: a packed vector of `N` +//! elements of type `T` as well as many type aliases for this type: for +//! example, [`f32x4`], which is just an alias for `Simd<[f32; 4]>`. +//! +//! The operations on packed vectors are, by default, "vertical", that is, they +//! are applied to each vector lane in isolation of the others: +//! +//! ``` +//! # use packed_simd::*; +//! let a = i32x4::new(1, 2, 3, 4); +//! let b = i32x4::new(5, 6, 7, 8); +//! assert_eq!(a + b, i32x4::new(6, 8, 10, 12)); +//! ``` +//! +//! Many "horizontal" operations are also provided: +//! +//! ``` +//! # use packed_simd::*; +//! # let a = i32x4::new(1, 2, 3, 4); +//! assert_eq!(a.wrapping_sum(), 10); +//! ``` +//! +//! In virtually all architectures vertical operations are fast, while +//! horizontal operations are, by comparison, much slower. That is, the +//! most portably-efficient way of performing a reduction over a slice +//! is to collect the results into a vector using vertical operations, +//! and performing a single horizontal operation at the end: +//! +//! ``` +//! # use packed_simd::*; +//! fn reduce(x: &[i32]) -> i32 { +//! assert!(x.len() % 4 == 0); +//! let mut sum = i32x4::splat(0); // [0, 0, 0, 0] +//! for i in (0..x.len()).step_by(4) { +//! sum += i32x4::from_slice_unaligned(&x[i..]); +//! } +//! sum.wrapping_sum() +//! } +//! +//! let x = [0, 1, 2, 3, 4, 5, 6, 7]; +//! assert_eq!(reduce(&x), 28); +//! ``` +//! +//! ## Vector types +//! +//! The vector type aliases are named according to the following scheme: +//! +//! > `{element_type}x{number_of_lanes} == Simd<[element_type; +//! number_of_lanes]>` +//! +//! where the following element types are supported: +//! +//! * `i{element_width}`: signed integer +//! * `u{element_width}`: unsigned integer +//! * `f{element_width}`: float +//! * `m{element_width}`: mask (see below) +//! * `*{const,mut} T`: `const` and `mut` pointers +//! +//! ## Basic operations +//! +//! ``` +//! # use packed_simd::*; +//! // Sets all elements to `0`: +//! let a = i32x4::splat(0); +//! +//! // Reads a vector from a slice: +//! let mut arr = [0, 0, 0, 1, 2, 3, 4, 5]; +//! let b = i32x4::from_slice_unaligned(&arr); +//! +//! // Reads the 4-th element of a vector: +//! assert_eq!(b.extract(3), 1); +//! +//! // Returns a new vector where the 4-th element is replaced with `1`: +//! let a = a.replace(3, 1); +//! assert_eq!(a, b); +//! +//! // Writes a vector to a slice: +//! let a = a.replace(2, 1); +//! a.write_to_slice_unaligned(&mut arr[4..]); +//! assert_eq!(arr, [0, 0, 0, 1, 0, 0, 1, 1]); +//! ``` +//! +//! ## Conditional operations +//! +//! One often needs to perform an operation on some lanes of the vector. Vector +//! masks, like `m32x4`, allow selecting on which vector lanes an operation is +//! to be performed: +//! +//! ``` +//! # use packed_simd::*; +//! let a = i32x4::new(1, 1, 2, 2); +//! +//! // Add `1` to the first two lanes of the vector. +//! let m = m16x4::new(true, true, false, false); +//! let a = m.select(a + 1, a); +//! assert_eq!(a, i32x4::splat(2)); +//! ``` +//! +//! The elements of a vector mask are either `true` or `false`. Here `true` +//! means that a lane is "selected", while `false` means that a lane is not +//! selected. +//! +//! All vector masks implement a `mask.select(a: T, b: T) -> T` method that +//! works on all vectors that have the same number of lanes as the mask. The +//! resulting vector contains the elements of `a` for those lanes for which the +//! mask is `true`, and the elements of `b` otherwise. +//! +//! The example constructs a mask with the first two lanes set to `true` and +//! the last two lanes set to `false`. This selects the first two lanes of `a + +//! 1` and the last two lanes of `a`, producing a vector where the first two +//! lanes have been incremented by `1`. +//! +//! > note: mask `select` can be used on vector types that have the same number +//! > of lanes as the mask. The example shows this by using [`m16x4`] instead +//! > of [`m32x4`]. It is _typically_ more performant to use a mask element +//! > width equal to the element width of the vectors being operated upon. +//! > This is, however, not true for 512-bit wide vectors when targetting +//! > AVX-512, where the most efficient masks use only 1-bit per element. +//! +//! All vertical comparison operations returns masks: +//! +//! ``` +//! # use packed_simd::*; +//! let a = i32x4::new(1, 1, 3, 3); +//! let b = i32x4::new(2, 2, 0, 0); +//! +//! // ge: >= (Greater Eequal; see also lt, le, gt, eq, ne). +//! let m = a.ge(i32x4::splat(2)); +//! +//! if m.any() { +//! // all / any / none allow coherent control flow +//! let d = m.select(a, b); +//! assert_eq!(d, i32x4::new(2, 2, 3, 3)); +//! } +//! ``` +//! +//! ## Conversions +//! +//! * **lossless widening conversions**: [`From`]/[`Into`] are implemented for +//! vectors with the same number of lanes when the conversion is value +//! preserving (same as in `std`). +//! +//! * **safe bitwise conversions**: The cargo feature `into_bits` provides the +//! `IntoBits/FromBits` traits (`x.into_bits()`). These perform safe bitwise +//! `transmute`s when all bit patterns of the source type are valid bit +//! patterns of the target type and are also implemented for the +//! architecture-specific vector types of `std::arch`. For example, `let x: +//! u8x8 = m8x8::splat(true).into_bits();` is provided because all `m8x8` bit +//! patterns are valid `u8x8` bit patterns. However, the opposite is not +//! true, not all `u8x8` bit patterns are valid `m8x8` bit-patterns, so this +//! operation cannot be peformed safely using `x.into_bits()`; one needs to +//! use `unsafe { crate::mem::transmute(x) }` for that, making sure that the +//! value in the `u8x8` is a valid bit-pattern of `m8x8`. +//! +//! * **numeric casts** (`as`): are peformed using [`FromCast`]/[`Cast`] +//! (`x.cast()`), just like `as`: +//! +//! * casting integer vectors whose lane types have the same size (e.g. +//! `i32xN` -> `u32xN`) is a **no-op**, +//! +//! * casting from a larger integer to a smaller integer (e.g. `u32xN` -> +//! `u8xN`) will **truncate**, +//! +//! * casting from a smaller integer to a larger integer (e.g. `u8xN` -> +//! `u32xN`) will: +//! * **zero-extend** if the source is unsigned, or +//! * **sign-extend** if the source is signed, +//! +//! * casting from a float to an integer will **round the float towards +//! zero**, +//! +//! * casting from an integer to float will produce the floating point +//! representation of the integer, **rounding to nearest, ties to even**, +//! +//! * casting from an `f32` to an `f64` is perfect and lossless, +//! +//! * casting from an `f64` to an `f32` **rounds to nearest, ties to even**. +//! +//! Numeric casts are not very "precise": sometimes lossy, sometimes value +//! preserving, etc. + +#![feature( + repr_simd, + const_fn, + platform_intrinsics, + stdsimd, + aarch64_target_feature, + arm_target_feature, + link_llvm_intrinsics, + core_intrinsics, + stmt_expr_attributes, + align_offset, + mmx_target_feature, + crate_visibility_modifier, + custom_inner_attributes +)] +#![allow(non_camel_case_types, non_snake_case, + clippy::cast_possible_truncation, + clippy::cast_lossless, + clippy::cast_possible_wrap, + clippy::cast_precision_loss, + // This lint is currently broken for generic code + // See https://github.com/rust-lang/rust-clippy/issues/3410 + clippy::use_self +)] +#![cfg_attr(test, feature(hashmap_internals))] +#![deny(warnings, rust_2018_idioms, clippy::missing_inline_in_public_items)] +#![no_std] + +use cfg_if::cfg_if; + +cfg_if! { + if #[cfg(feature = "core_arch")] { + #[allow(unused_imports)] + use core_arch as arch; + } else { + #[allow(unused_imports)] + use core::arch; + } +} + +#[cfg(all(target_arch = "wasm32", test))] +use wasm_bindgen_test::*; + +#[allow(unused_imports)] +use core::{ + /* arch (handled above), */ cmp, f32, f64, fmt, hash, hint, i128, + i16, i32, i64, i8, intrinsics, isize, iter, marker, mem, ops, ptr, slice, + u128, u16, u32, u64, u8, usize, +}; + +#[macro_use] +mod testing; +#[macro_use] +mod api; +mod codegen; +mod sealed; + +/// Packed SIMD vector type. +/// +/// # Examples +/// +/// ``` +/// # use packed_simd::Simd; +/// let v = Simd::<[i32; 4]>::new(0, 1, 2, 3); +/// assert_eq!(v.extract(2), 2); +/// ``` +#[repr(transparent)] +#[derive(Copy, Clone)] +pub struct Simd( + // FIXME: this type should be private, + // but it currently must be public for the + // `shuffle!` macro to work: it needs to + // access the internal `repr(simd)` type + // to call the shuffle intrinsics. + #[doc(hidden)] pub ::Tuple, +); + +/// Wrapper over `T` implementing a lexicoraphical order via the `PartialOrd` +/// and/or `Ord` traits. +#[repr(transparent)] +#[derive(Copy, Clone, Debug)] +#[allow(clippy::missing_inline_in_public_items)] +pub struct LexicographicallyOrdered(T); + +mod masks; +pub use self::masks::*; + +mod v16; +pub use self::v16::*; + +mod v32; +pub use self::v32::*; + +mod v64; +pub use self::v64::*; + +mod v128; +pub use self::v128::*; + +mod v256; +pub use self::v256::*; + +mod v512; +pub use self::v512::*; + +mod vSize; +pub use self::vSize::*; + +mod vPtr; +pub use self::vPtr::*; + +pub use self::api::cast::*; + +#[cfg(feature = "into_bits")] +pub use self::api::into_bits::*; + +// Re-export the shuffle intrinsics required by the `shuffle!` macro. +#[doc(hidden)] +pub use self::codegen::llvm::{ + __shuffle_vector16, __shuffle_vector2, __shuffle_vector32, + __shuffle_vector4, __shuffle_vector64, __shuffle_vector8, +}; + +crate mod llvm { + crate use crate::codegen::llvm::*; +} diff --git a/third_party/rust/packed_simd/src/masks.rs b/third_party/rust/packed_simd/src/masks.rs new file mode 100644 index 000000000000..f83c4da95750 --- /dev/null +++ b/third_party/rust/packed_simd/src/masks.rs @@ -0,0 +1,128 @@ +//! Mask types + +macro_rules! impl_mask_ty { + ($id:ident : $elem_ty:ident | #[$doc:meta]) => { + #[$doc] + #[derive(Copy, Clone)] + pub struct $id($elem_ty); + + impl crate::sealed::Mask for $id { + fn test(&self) -> bool { + $id::test(self) + } + } + + impl $id { + /// Instantiate a mask with `value` + #[inline] + pub fn new(x: bool) -> Self { + if x { + $id(!0) + } else { + $id(0) + } + } + /// Test if the mask is set + #[inline] + pub fn test(&self) -> bool { + self.0 != 0 + } + } + + impl Default for $id { + #[inline] + fn default() -> Self { + $id(0) + } + } + + #[allow(clippy::partialeq_ne_impl)] + impl PartialEq<$id> for $id { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } + #[inline] + fn ne(&self, other: &Self) -> bool { + self.0 != other.0 + } + } + + impl Eq for $id {} + + impl PartialOrd<$id> for $id { + #[inline] + fn partial_cmp( + &self, other: &Self, + ) -> Option { + use crate::cmp::Ordering; + if self == other { + Some(Ordering::Equal) + } else if self.0 > other.0 { + // Note: + // * false = 0_i + // * true == !0_i == -1_i + Some(Ordering::Less) + } else { + Some(Ordering::Greater) + } + } + + #[inline] + fn lt(&self, other: &Self) -> bool { + self.0 > other.0 + } + #[inline] + fn gt(&self, other: &Self) -> bool { + self.0 < other.0 + } + #[inline] + fn le(&self, other: &Self) -> bool { + self.0 >= other.0 + } + #[inline] + fn ge(&self, other: &Self) -> bool { + self.0 <= other.0 + } + } + + impl Ord for $id { + #[inline] + fn cmp(&self, other: &Self) -> crate::cmp::Ordering { + match self.partial_cmp(other) { + Some(x) => x, + None => unsafe { crate::hint::unreachable_unchecked() }, + } + } + } + + impl crate::hash::Hash for $id { + #[inline] + fn hash(&self, state: &mut H) { + (self.0 != 0).hash(state); + } + } + + impl crate::fmt::Debug for $id { + #[inline] + fn fmt( + &self, fmtter: &mut crate::fmt::Formatter<'_>, + ) -> Result<(), crate::fmt::Error> { + write!(fmtter, "{}({})", stringify!($id), self.0 != 0) + } + } + }; +} + +impl_mask_ty!(m8: i8 | /// 8-bit wide mask. +); +impl_mask_ty!(m16: i16 | /// 16-bit wide mask. +); +impl_mask_ty!(m32: i32 | /// 32-bit wide mask. +); +impl_mask_ty!(m64: i64 | /// 64-bit wide mask. +); +impl_mask_ty!(m128: i128 | /// 128-bit wide mask. +); +impl_mask_ty!(msize: isize | /// isize-wide mask. +); diff --git a/third_party/rust/packed_simd/src/sealed.rs b/third_party/rust/packed_simd/src/sealed.rs new file mode 100644 index 000000000000..832acd3f1d54 --- /dev/null +++ b/third_party/rust/packed_simd/src/sealed.rs @@ -0,0 +1,41 @@ +//! Sealed traits + +/// Trait implemented by arrays that can be SIMD types. +#[doc(hidden)] +pub trait SimdArray { + /// The type of the #[repr(simd)] type. + type Tuple: Copy + Clone; + /// The element type of the vector. + type T; + /// The number of elements in the array. + const N: usize; + /// The type: `[u32; Self::N]`. + type NT; +} + +/// This traits is used to constraint the arguments +/// and result type of the portable shuffles. +#[doc(hidden)] +pub trait Shuffle { + // Lanes is a `[u32; N]` where `N` is the number of vector lanes + + /// The result type of the shuffle. + type Output; +} + +/// This trait is implemented by all SIMD vector types. +#[doc(hidden)] +pub trait Simd { + /// Element type of the SIMD vector + type Element; + /// The number of elements in the SIMD vector. + const LANES: usize; + /// The type: `[u32; Self::N]`. + type LanesType; +} + +/// This trait is implemented by all mask types +#[doc(hidden)] +pub trait Mask { + fn test(&self) -> bool; +} diff --git a/third_party/rust/packed_simd/src/testing.rs b/third_party/rust/packed_simd/src/testing.rs new file mode 100644 index 000000000000..fcbcf9e2ac8e --- /dev/null +++ b/third_party/rust/packed_simd/src/testing.rs @@ -0,0 +1,8 @@ +//! Testing macros and other utilities. + +#[macro_use] +mod macros; + +#[cfg(test)] +#[macro_use] +crate mod utils; diff --git a/third_party/rust/packed_simd/src/testing/macros.rs b/third_party/rust/packed_simd/src/testing/macros.rs new file mode 100644 index 000000000000..6008634c76ce --- /dev/null +++ b/third_party/rust/packed_simd/src/testing/macros.rs @@ -0,0 +1,44 @@ +//! Testing macros + +macro_rules! test_if { + ($cfg_tt:tt: $it:item) => { + #[cfg(any( + // Test everything if: + // + // * tests are enabled, + // * no features about exclusively testing + // specific vector classes are enabled + all(test, not(any( + test_v16, + test_v32, + test_v64, + test_v128, + test_v256, + test_v512, + test_none, // disables all tests + ))), + // Test if: + // + // * tests are enabled + // * a particular cfg token tree returns true + all(test, $cfg_tt), + ))] + $it + }; +} + +#[cfg(test)] +#[allow(unused)] +macro_rules! ref_ { + ($anything:tt) => { + &$anything + }; +} + +#[cfg(test)] +#[allow(unused)] +macro_rules! ref_mut_ { + ($anything:tt) => { + &mut $anything + }; +} diff --git a/third_party/rust/packed_simd/src/testing/utils.rs b/third_party/rust/packed_simd/src/testing/utils.rs new file mode 100644 index 000000000000..7b8f21ac1c55 --- /dev/null +++ b/third_party/rust/packed_simd/src/testing/utils.rs @@ -0,0 +1,135 @@ +//! Testing utilities + +#![allow(dead_code)] + +use crate::{cmp::PartialOrd, fmt::Debug, LexicographicallyOrdered}; + +/// Tests PartialOrd for `a` and `b` where `a < b` is true. +pub fn test_lt( + a: LexicographicallyOrdered, b: LexicographicallyOrdered, +) where + LexicographicallyOrdered: Debug + PartialOrd, +{ + assert!(a < b, "{:?}, {:?}", a, b); + assert!(b > a, "{:?}, {:?}", a, b); + + assert!(!(a == b), "{:?}, {:?}", a, b); + assert!(a != b, "{:?}, {:?}", a, b); + + assert!(a <= b, "{:?}, {:?}", a, b); + assert!(b >= a, "{:?}, {:?}", a, b); + + // Irreflexivity + assert!(!(a < a), "{:?}, {:?}", a, b); + assert!(!(b < b), "{:?}, {:?}", a, b); + assert!(!(a > a), "{:?}, {:?}", a, b); + assert!(!(b > b), "{:?}, {:?}", a, b); + + assert!(a <= a, "{:?}, {:?}", a, b); + assert!(b <= b, "{:?}, {:?}", a, b); +} + +/// Tests PartialOrd for `a` and `b` where `a <= b` is true. +pub fn test_le( + a: LexicographicallyOrdered, b: LexicographicallyOrdered, +) where + LexicographicallyOrdered: Debug + PartialOrd, +{ + assert!(a <= b, "{:?}, {:?}", a, b); + assert!(b >= a, "{:?}, {:?}", a, b); + + assert!(a == b || a < b, "{:?}, {:?}", a, b); + assert!(a == b || b > a, "{:?}, {:?}", a, b); + + if a == b { + assert!(!(a < b), "{:?}, {:?}", a, b); + assert!(!(b > a), "{:?}, {:?}", a, b); + + assert!(!(a != b), "{:?}, {:?}", a, b); + } else { + assert!(a != b, "{:?}, {:?}", a, b); + test_lt(a, b); + } +} + +/// Test PartialOrd::partial_cmp for `a` and `b` returning `Ordering` +pub fn test_cmp( + a: LexicographicallyOrdered, b: LexicographicallyOrdered, + o: Option, +) where + LexicographicallyOrdered: PartialOrd + Debug, + T: Debug + crate::sealed::Simd + Copy + Clone, + ::Element: Default + Copy + Clone + PartialOrd, +{ + assert!(T::LANES <= 64, "array length in these two arrays needs updating"); + let mut arr_a: [T::Element; 64] = [Default::default(); 64]; + let mut arr_b: [T::Element; 64] = [Default::default(); 64]; + + unsafe { + crate::ptr::write_unaligned( + arr_a.as_mut_ptr() as *mut LexicographicallyOrdered, + a, + ) + } + unsafe { + crate::ptr::write_unaligned( + arr_b.as_mut_ptr() as *mut LexicographicallyOrdered, + b, + ) + } + let expected = arr_a[0..T::LANES].partial_cmp(&arr_b[0..T::LANES]); + let result = a.partial_cmp(&b); + assert_eq!(expected, result, "{:?}, {:?}", a, b); + assert_eq!(o, result, "{:?}, {:?}", a, b); + match o { + Some(crate::cmp::Ordering::Less) => { + test_lt(a, b); + test_le(a, b); + } + Some(crate::cmp::Ordering::Greater) => { + test_lt(b, a); + test_le(b, a); + } + Some(crate::cmp::Ordering::Equal) => { + assert!(a == b, "{:?}, {:?}", a, b); + assert!(!(a != b), "{:?}, {:?}", a, b); + assert!(!(a < b), "{:?}, {:?}", a, b); + assert!(!(b < a), "{:?}, {:?}", a, b); + assert!(!(a > b), "{:?}, {:?}", a, b); + assert!(!(b > a), "{:?}, {:?}", a, b); + + test_le(a, b); + test_le(b, a); + } + None => { + assert!(!(a == b), "{:?}, {:?}", a, b); + assert!(!(a != b), "{:?}, {:?}", a, b); + assert!(!(a < b), "{:?}, {:?}", a, b); + assert!(!(a > b), "{:?}, {:?}", a, b); + assert!(!(b < a), "{:?}, {:?}", a, b); + assert!(!(b > a), "{:?}, {:?}", a, b); + assert!(!(a <= b), "{:?}, {:?}", a, b); + assert!(!(b <= a), "{:?}, {:?}", a, b); + assert!(!(a >= b), "{:?}, {:?}", a, b); + assert!(!(b >= a), "{:?}, {:?}", a, b); + } + } +} + +// Returns a tuple containing two distinct pointer values of the same type as +// the element type of the Simd vector `$id`. +#[allow(unused)] +macro_rules! ptr_vals { + ($id:ty) => { + // expands to an expression + #[allow(unused_unsafe)] + unsafe { + // all bits cleared + let clear: <$id as sealed::Simd>::Element = crate::mem::zeroed(); + // all bits set + let set: <$id as sealed::Simd>::Element = + crate::mem::transmute(-1_isize); + (clear, set) + } + }; +} diff --git a/third_party/rust/packed_simd/src/v128.rs b/third_party/rust/packed_simd/src/v128.rs new file mode 100644 index 000000000000..1d0282dc4278 --- /dev/null +++ b/third_party/rust/packed_simd/src/v128.rs @@ -0,0 +1,80 @@ +//! 128-bit wide vector types +#![rustfmt::skip] + +use crate::*; + +impl_i!([i8; 16]: i8x16, m8x16 | i8 | test_v128 | + x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | + From: | + /// A 128-bit vector with 16 `i8` lanes. +); +impl_u!([u8; 16]: u8x16, m8x16 | u8 | test_v128 | + x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | + From: | + /// A 128-bit vector with 16 `u8` lanes. +); +impl_m!([m8; 16]: m8x16 | i8 | test_v128 | + x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | + From: m16x16 | + /// A 128-bit vector mask with 16 `m8` lanes. +); + +impl_i!([i16; 8]: i16x8, m16x8 | i16 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 | + From: i8x8, u8x8 | + /// A 128-bit vector with 8 `i16` lanes. +); +impl_u!([u16; 8]: u16x8, m16x8 | u16| test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 | + From: u8x8 | + /// A 128-bit vector with 8 `u16` lanes. +); +impl_m!([m16; 8]: m16x8 | i16 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 | + From: m8x8, m32x8 | + /// A 128-bit vector mask with 8 `m16` lanes. +); + +impl_i!([i32; 4]: i32x4, m32x4 | i32 | test_v128 | x0, x1, x2, x3 | + From: i8x4, u8x4, i16x4, u16x4 | + /// A 128-bit vector with 4 `i32` lanes. +); +impl_u!([u32; 4]: u32x4, m32x4 | u32| test_v128 | x0, x1, x2, x3 | + From: u8x4, u16x4 | + /// A 128-bit vector with 4 `u32` lanes. +); +impl_f!([f32; 4]: f32x4, m32x4 | f32 | test_v128 | x0, x1, x2, x3 | + From: i8x4, u8x4, i16x4, u16x4 | + /// A 128-bit vector with 4 `f32` lanes. +); +impl_m!([m32; 4]: m32x4 | i32 | test_v128 | x0, x1, x2, x3 | + From: m8x4, m16x4, m64x4 | + /// A 128-bit vector mask with 4 `m32` lanes. +); + +impl_i!([i64; 2]: i64x2, m64x2 | i64 | test_v128 | x0, x1 | + From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2 | + /// A 128-bit vector with 2 `i64` lanes. +); +impl_u!([u64; 2]: u64x2, m64x2 | u64 | test_v128 | x0, x1 | + From: u8x2, u16x2, u32x2 | + /// A 128-bit vector with 2 `u64` lanes. +); +impl_f!([f64; 2]: f64x2, m64x2 | f64 | test_v128 | x0, x1 | + From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2, f32x2 | + /// A 128-bit vector with 2 `f64` lanes. +); +impl_m!([m64; 2]: m64x2 | i64 | test_v128 | x0, x1 | + From: m8x2, m16x2, m32x2, m128x2 | + /// A 128-bit vector mask with 2 `m64` lanes. +); + +impl_i!([i128; 1]: i128x1, m128x1 | i128 | test_v128 | x0 | + From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1, i64x1, u64x1 */ | // FIXME: unary small vector types + /// A 128-bit vector with 1 `i128` lane. +); +impl_u!([u128; 1]: u128x1, m128x1 | u128 | test_v128 | x0 | + From: /*u8x1, u16x1, u32x1, u64x1 */ | // FIXME: unary small vector types + /// A 128-bit vector with 1 `u128` lane. +); +impl_m!([m128; 1]: m128x1 | i128 | test_v128 | x0 | + From: /*m8x1, m16x1, m32x1, m64x1 */ | // FIXME: unary small vector types + /// A 128-bit vector mask with 1 `m128` lane. +); diff --git a/third_party/rust/packed_simd/src/v16.rs b/third_party/rust/packed_simd/src/v16.rs new file mode 100644 index 000000000000..67a3832d2530 --- /dev/null +++ b/third_party/rust/packed_simd/src/v16.rs @@ -0,0 +1,16 @@ +//! 16-bit wide vector types + +use crate::*; + +impl_i!([i8; 2]: i8x2, m8x2 | i8 | test_v16 | x0, x1 | + From: | + /// A 16-bit vector with 2 `i8` lanes. +); +impl_u!([u8; 2]: u8x2, m8x2 | u8 | test_v16 | x0, x1 | + From: | + /// A 16-bit vector with 2 `u8` lanes. +); +impl_m!([m8; 2]: m8x2 | i8 | test_v16 | x0, x1 | + From: m16x2, m32x2, m64x2, m128x2 | + /// A 16-bit vector mask with 2 `m8` lanes. +); diff --git a/third_party/rust/packed_simd/src/v256.rs b/third_party/rust/packed_simd/src/v256.rs new file mode 100644 index 000000000000..6b59336f68b6 --- /dev/null +++ b/third_party/rust/packed_simd/src/v256.rs @@ -0,0 +1,86 @@ +//! 256-bit wide vector types +#![rustfmt::skip] + +use crate::*; + +impl_i!([i8; 32]: i8x32, m8x32 | i8 | test_v256 | + x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, + x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | + From: | + /// A 256-bit vector with 32 `i8` lanes. +); +impl_u!([u8; 32]: u8x32, m8x32 | u8 | test_v256 | + x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, + x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | + From: | + /// A 256-bit vector with 32 `u8` lanes. +); +impl_m!([m8; 32]: m8x32 | i8 | test_v256 | + x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, + x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | + From: | + /// A 256-bit vector mask with 32 `m8` lanes. +); + +impl_i!([i16; 16]: i16x16, m16x16 | i16 | test_v256 | + x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | + From: i8x16, u8x16 | + /// A 256-bit vector with 16 `i16` lanes. +); +impl_u!([u16; 16]: u16x16, m16x16 | u16 | test_v256 | + x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | + From: u8x16 | + /// A 256-bit vector with 16 `u16` lanes. +); +impl_m!([m16; 16]: m16x16 | i16 | test_v256 | + x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | + From: m8x16 | + /// A 256-bit vector mask with 16 `m16` lanes. +); + +impl_i!([i32; 8]: i32x8, m32x8 | i32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 | + From: i8x8, u8x8, i16x8, u16x8 | + /// A 256-bit vector with 8 `i32` lanes. +); +impl_u!([u32; 8]: u32x8, m32x8 | u32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 | + From: u8x8, u16x8 | + /// A 256-bit vector with 8 `u32` lanes. +); +impl_f!([f32; 8]: f32x8, m32x8 | f32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 | + From: i8x8, u8x8, i16x8, u16x8 | + /// A 256-bit vector with 8 `f32` lanes. +); +impl_m!([m32; 8]: m32x8 | i32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 | + From: m8x8, m16x8 | + /// A 256-bit vector mask with 8 `m32` lanes. +); + +impl_i!([i64; 4]: i64x4, m64x4 | i64 | test_v256 | x0, x1, x2, x3 | + From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4 | + /// A 256-bit vector with 4 `i64` lanes. +); +impl_u!([u64; 4]: u64x4, m64x4 | u64 | test_v256 | x0, x1, x2, x3 | + From: u8x4, u16x4, u32x4 | + /// A 256-bit vector with 4 `u64` lanes. +); +impl_f!([f64; 4]: f64x4, m64x4 | f64 | test_v256 | x0, x1, x2, x3 | + From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4, f32x4 | + /// A 256-bit vector with 4 `f64` lanes. +); +impl_m!([m64; 4]: m64x4 | i64 | test_v256 | x0, x1, x2, x3 | + From: m8x4, m16x4, m32x4 | + /// A 256-bit vector mask with 4 `m64` lanes. +); + +impl_i!([i128; 2]: i128x2, m128x2 | i128 | test_v256 | x0, x1 | + From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2, i64x2, u64x2 | + /// A 256-bit vector with 2 `i128` lanes. +); +impl_u!([u128; 2]: u128x2, m128x2 | u128 | test_v256 | x0, x1 | + From: u8x2, u16x2, u32x2, u64x2 | + /// A 256-bit vector with 2 `u128` lanes. +); +impl_m!([m128; 2]: m128x2 | i128 | test_v256 | x0, x1 | + From: m8x2, m16x2, m32x2, m64x2 | + /// A 256-bit vector mask with 2 `m128` lanes. +); diff --git a/third_party/rust/packed_simd/src/v32.rs b/third_party/rust/packed_simd/src/v32.rs new file mode 100644 index 000000000000..09cef9bdd472 --- /dev/null +++ b/third_party/rust/packed_simd/src/v32.rs @@ -0,0 +1,29 @@ +//! 32-bit wide vector types + +use crate::*; + +impl_i!([i8; 4]: i8x4, m8x4 | i8 | test_v32 | x0, x1, x2, x3 | + From: | + /// A 32-bit vector with 4 `i8` lanes. +); +impl_u!([u8; 4]: u8x4, m8x4 | u8 | test_v32 | x0, x1, x2, x3 | + From: | + /// A 32-bit vector with 4 `u8` lanes. +); +impl_m!([m8; 4]: m8x4 | i8 | test_v32 | x0, x1, x2, x3 | + From: m16x4, m32x4, m64x4 | + /// A 32-bit vector mask with 4 `m8` lanes. +); + +impl_i!([i16; 2]: i16x2, m16x2 | i16 | test_v32 | x0, x1 | + From: i8x2, u8x2 | + /// A 32-bit vector with 2 `i16` lanes. +); +impl_u!([u16; 2]: u16x2, m16x2 | u16 | test_v32 | x0, x1 | + From: u8x2 | + /// A 32-bit vector with 2 `u16` lanes. +); +impl_m!([m16; 2]: m16x2 | i16 | test_v32 | x0, x1 | + From: m8x2, m32x2, m64x2, m128x2 | + /// A 32-bit vector mask with 2 `m16` lanes. +); diff --git a/third_party/rust/packed_simd/src/v512.rs b/third_party/rust/packed_simd/src/v512.rs new file mode 100644 index 000000000000..b1714aded369 --- /dev/null +++ b/third_party/rust/packed_simd/src/v512.rs @@ -0,0 +1,99 @@ +//! 512-bit wide vector types +#![rustfmt::skip] + +use crate::*; + +impl_i!([i8; 64]: i8x64, m8x64 | i8 | test_v512 | + x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, + x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31, + x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47, + x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 | + From: | + /// A 512-bit vector with 64 `i8` lanes. +); +impl_u!([u8; 64]: u8x64, m8x64 | u8 | test_v512 | + x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, + x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31, + x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47, + x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 | + From: | + /// A 512-bit vector with 64 `u8` lanes. +); +impl_m!([m8; 64]: m8x64 | i8 | test_v512 | + x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, + x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31, + x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47, + x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 | + From: | + /// A 512-bit vector mask with 64 `m8` lanes. +); + +impl_i!([i16; 32]: i16x32, m16x32 | i16 | test_v512 | + x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, + x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | + From: i8x32, u8x32 | + /// A 512-bit vector with 32 `i16` lanes. +); +impl_u!([u16; 32]: u16x32, m16x32 | u16 | test_v512 | + x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, + x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | + From: u8x32 | + /// A 512-bit vector with 32 `u16` lanes. +); +impl_m!([m16; 32]: m16x32 | i16 | test_v512 | + x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, + x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | + From: m8x32 | + /// A 512-bit vector mask with 32 `m16` lanes. +); + +impl_i!([i32; 16]: i32x16, m32x16 | i32 | test_v512 | + x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | + From: i8x16, u8x16, i16x16, u16x16 | + /// A 512-bit vector with 16 `i32` lanes. +); +impl_u!([u32; 16]: u32x16, m32x16 | u32 | test_v512 | + x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | + From: u8x16, u16x16 | + /// A 512-bit vector with 16 `u32` lanes. +); +impl_f!([f32; 16]: f32x16, m32x16 | f32 | test_v512 | + x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | + From: i8x16, u8x16, i16x16, u16x16 | + /// A 512-bit vector with 16 `f32` lanes. +); +impl_m!([m32; 16]: m32x16 | i32 | test_v512 | + x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | + From: m8x16, m16x16 | + /// A 512-bit vector mask with 16 `m32` lanes. +); + +impl_i!([i64; 8]: i64x8, m64x8 | i64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | + From: i8x8, u8x8, i16x8, u16x8, i32x8, u32x8 | + /// A 512-bit vector with 8 `i64` lanes. +); +impl_u!([u64; 8]: u64x8, m64x8 | u64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | + From: u8x8, u16x8, u32x8 | + /// A 512-bit vector with 8 `u64` lanes. +); +impl_f!([f64; 8]: f64x8, m64x8 | f64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | + From: i8x8, u8x8, i16x8, u16x8, i32x8, u32x8, f32x8 | + /// A 512-bit vector with 8 `f64` lanes. +); +impl_m!([m64; 8]: m64x8 | i64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | + From: m8x8, m16x8, m32x8 | + /// A 512-bit vector mask with 8 `m64` lanes. +); + +impl_i!([i128; 4]: i128x4, m128x4 | i128 | test_v512 | x0, x1, x2, x3 | + From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4, i64x4, u64x4 | + /// A 512-bit vector with 4 `i128` lanes. +); +impl_u!([u128; 4]: u128x4, m128x4 | u128 | test_v512 | x0, x1, x2, x3 | + From: u8x4, u16x4, u32x4, u64x4 | + /// A 512-bit vector with 4 `u128` lanes. +); +impl_m!([m128; 4]: m128x4 | i128 | test_v512 | x0, x1, x2, x3 | + From: m8x4, m16x4, m32x4, m64x4 | + /// A 512-bit vector mask with 4 `m128` lanes. +); diff --git a/third_party/rust/packed_simd/src/v64.rs b/third_party/rust/packed_simd/src/v64.rs new file mode 100644 index 000000000000..1ee6219c040b --- /dev/null +++ b/third_party/rust/packed_simd/src/v64.rs @@ -0,0 +1,66 @@ +//! 64-bit wide vector types +#![rustfmt::skip] + +use super::*; + +impl_i!([i8; 8]: i8x8, m8x8 | i8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 | + From: | + /// A 64-bit vector with 8 `i8` lanes. +); +impl_u!([u8; 8]: u8x8, m8x8 | u8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 | + From: | + /// A 64-bit vector with 8 `u8` lanes. +); +impl_m!([m8; 8]: m8x8 | i8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 | + From: m16x8, m32x8 | + /// A 64-bit vector mask with 8 `m8` lanes. +); + +impl_i!([i16; 4]: i16x4, m16x4 | i16 | test_v64 | x0, x1, x2, x3 | + From: i8x4, u8x4 | + /// A 64-bit vector with 4 `i16` lanes. +); +impl_u!([u16; 4]: u16x4, m16x4 | u16 | test_v64 | x0, x1, x2, x3 | + From: u8x4 | + /// A 64-bit vector with 4 `u16` lanes. +); +impl_m!([m16; 4]: m16x4 | i16 | test_v64 | x0, x1, x2, x3 | + From: m8x4, m32x4, m64x4 | + /// A 64-bit vector mask with 4 `m16` lanes. +); + +impl_i!([i32; 2]: i32x2, m32x2 | i32 | test_v64 | x0, x1 | + From: i8x2, u8x2, i16x2, u16x2 | + /// A 64-bit vector with 2 `i32` lanes. +); +impl_u!([u32; 2]: u32x2, m32x2 | u32 | test_v64 | x0, x1 | + From: u8x2, u16x2 | + /// A 64-bit vector with 2 `u32` lanes. +); +impl_m!([m32; 2]: m32x2 | i32 | test_v64 | x0, x1 | + From: m8x2, m16x2, m64x2, m128x2 | + /// A 64-bit vector mask with 2 `m32` lanes. +); +impl_f!([f32; 2]: f32x2, m32x2 | f32 | test_v64 | x0, x1 | + From: i8x2, u8x2, i16x2, u16x2 | + /// A 64-bit vector with 2 `f32` lanes. +); + +/* +impl_i!([i64; 1]: i64x1, m64x1 | i64 | test_v64 | x0 | + From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1*/ | // FIXME: primitive to vector conversion + /// A 64-bit vector with 1 `i64` lanes. +); +impl_u!([u64; 1]: u64x1, m64x1 | u64 | test_v64 | x0 | + From: /*u8x1, u16x1, u32x1*/ | // FIXME: primitive to vector conversion + /// A 64-bit vector with 1 `u64` lanes. +); +impl_m!([m64; 1]: m64x1 | i64 | test_v64 | x0 | + From: /*m8x1, m16x1, m32x1, */ m128x1 | // FIXME: unary small vector types + /// A 64-bit vector mask with 1 `m64` lanes. +); +impl_f!([f64; 1]: f64x1, m64x1 | f64 | test_v64 | x0 | + From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1, f32x1*/ | // FIXME: unary small vector types + /// A 64-bit vector with 1 `f64` lanes. +); +*/ diff --git a/third_party/rust/packed_simd/src/vPtr.rs b/third_party/rust/packed_simd/src/vPtr.rs new file mode 100644 index 000000000000..fe9fb28ffa89 --- /dev/null +++ b/third_party/rust/packed_simd/src/vPtr.rs @@ -0,0 +1,34 @@ +//! Vectors of pointers +#![rustfmt::skip] + +use crate::*; + +impl_const_p!( + [*const T; 2]: cptrx2, msizex2, usizex2, isizex2 | test_v128 | x0, x1 | From: | + /// A vector with 2 `*const T` lanes +); + +impl_mut_p!( + [*mut T; 2]: mptrx2, msizex2, usizex2, isizex2 | test_v128 | x0, x1 | From: | + /// A vector with 2 `*mut T` lanes +); + +impl_const_p!( + [*const T; 4]: cptrx4, msizex4, usizex4, isizex4 | test_v256 | x0, x1, x2, x3 | From: | + /// A vector with 4 `*const T` lanes +); + +impl_mut_p!( + [*mut T; 4]: mptrx4, msizex4, usizex4, isizex4 | test_v256 | x0, x1, x2, x3 | From: | + /// A vector with 4 `*mut T` lanes +); + +impl_const_p!( + [*const T; 8]: cptrx8, msizex8, usizex8, isizex8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | From: | + /// A vector with 8 `*const T` lanes +); + +impl_mut_p!( + [*mut T; 8]: mptrx8, msizex8, usizex8, isizex8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | From: | + /// A vector with 8 `*mut T` lanes +); diff --git a/third_party/rust/packed_simd/src/vSize.rs b/third_party/rust/packed_simd/src/vSize.rs new file mode 100644 index 000000000000..5594323372b4 --- /dev/null +++ b/third_party/rust/packed_simd/src/vSize.rs @@ -0,0 +1,53 @@ +//! Vectors with pointer-sized elements + +use crate::codegen::pointer_sized_int::{isize_, usize_}; +use crate::*; + +impl_i!([isize; 2]: isizex2, msizex2 | isize_ | test_v128 | + x0, x1| + From: | + /// A vector with 2 `isize` lanes. +); + +impl_u!([usize; 2]: usizex2, msizex2 | usize_ | test_v128 | + x0, x1| + From: | + /// A vector with 2 `usize` lanes. +); +impl_m!([msize; 2]: msizex2 | isize_ | test_v128 | + x0, x1 | + From: | + /// A vector mask with 2 `msize` lanes. +); + +impl_i!([isize; 4]: isizex4, msizex4 | isize_ | test_v256 | + x0, x1, x2, x3 | + From: | + /// A vector with 4 `isize` lanes. +); +impl_u!([usize; 4]: usizex4, msizex4 | usize_ | test_v256 | + x0, x1, x2, x3| + From: | + /// A vector with 4 `usize` lanes. +); +impl_m!([msize; 4]: msizex4 | isize_ | test_v256 | + x0, x1, x2, x3 | + From: | + /// A vector mask with 4 `msize` lanes. +); + +impl_i!([isize; 8]: isizex8, msizex8 | isize_ | test_v512 | + x0, x1, x2, x3, x4, x5, x6, x7 | + From: | + /// A vector with 4 `isize` lanes. +); +impl_u!([usize; 8]: usizex8, msizex8 | usize_ | test_v512 | + x0, x1, x2, x3, x4, x5, x6, x7 | + From: | + /// A vector with 8 `usize` lanes. +); +impl_m!([msize; 8]: msizex8 | isize_ | test_v512 | + x0, x1, x2, x3, x4, x5, x6, x7 | + From: | + /// A vector mask with 8 `msize` lanes. +); diff --git a/third_party/rust/packed_simd/tests/endianness.rs b/third_party/rust/packed_simd/tests/endianness.rs new file mode 100644 index 000000000000..1e6b4f354301 --- /dev/null +++ b/third_party/rust/packed_simd/tests/endianness.rs @@ -0,0 +1,262 @@ +#[cfg(target_arch = "wasm32")] +use wasm_bindgen_test::*; + +use packed_simd::*; +use std::{mem, slice}; + +#[cfg_attr(not(target_arch = "wasm32"), test)] +#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] +fn endian_indexing() { + let v = i32x4::new(0, 1, 2, 3); + assert_eq!(v.extract(0), 0); + assert_eq!(v.extract(1), 1); + assert_eq!(v.extract(2), 2); + assert_eq!(v.extract(3), 3); +} + +#[cfg_attr(not(target_arch = "wasm32"), test)] +#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] +fn endian_bitcasts() { + #[cfg_attr(rustfmt, rustfmt_skip)] + let x = i8x16::new( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + let t: i16x8 = unsafe { mem::transmute(x) }; + let e: i16x8 = if cfg!(target_endian = "little") { + i16x8::new(256, 770, 1284, 1798, 2312, 2826, 3340, 3854) + } else { + i16x8::new(1, 515, 1029, 1543, 2057, 2571, 3085, 3599) + }; + assert_eq!(t, e); +} + +#[cfg_attr(not(target_arch = "wasm32"), test)] +#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] +fn endian_casts() { + #[cfg_attr(rustfmt, rustfmt_skip)] + let x = i8x16::new( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + let t: i16x16 = x.into(); // simd_cast + #[cfg_attr(rustfmt, rustfmt_skip)] + let e = i16x16::new( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + assert_eq!(t, e); +} + +#[cfg_attr(not(target_arch = "wasm32"), test)] +#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] +fn endian_load_and_stores() { + #[cfg_attr(rustfmt, rustfmt_skip)] + let x = i8x16::new( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + let mut y: [i16; 8] = [0; 8]; + x.write_to_slice_unaligned(unsafe { + slice::from_raw_parts_mut(&mut y as *mut _ as *mut i8, 16) + }); + + let e: [i16; 8] = if cfg!(target_endian = "little") { + [256, 770, 1284, 1798, 2312, 2826, 3340, 3854] + } else { + [1, 515, 1029, 1543, 2057, 2571, 3085, 3599] + }; + assert_eq!(y, e); + + let z = i8x16::from_slice_unaligned(unsafe { + slice::from_raw_parts(&y as *const _ as *const i8, 16) + }); + assert_eq!(z, x); +} + +#[cfg_attr(not(target_arch = "wasm32"), test)] +#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] +fn endian_array_union() { + union A { + data: [f32; 4], + vec: f32x4, + } + let x: [f32; 4] = unsafe { A { vec: f32x4::new(0., 1., 2., 3.) }.data }; + assert_eq!(x[0], 0_f32); + assert_eq!(x[1], 1_f32); + assert_eq!(x[2], 2_f32); + assert_eq!(x[3], 3_f32); + let y: f32x4 = unsafe { A { data: [3., 2., 1., 0.] }.vec }; + assert_eq!(y, f32x4::new(3., 2., 1., 0.)); + + union B { + data: [i8; 16], + vec: i8x16, + } + #[cfg_attr(rustfmt, rustfmt_skip)] + let x = i8x16::new( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + let x: [i8; 16] = unsafe { B { vec: x }.data }; + + for i in 0..16 { + assert_eq!(x[i], i as i8); + } + + #[cfg_attr(rustfmt, rustfmt_skip)] + let y = [ + 15, 14, 13, 12, 11, 19, 9, 8, + 7, 6, 5, 4, 3, 2, 1, 0 + ]; + #[cfg_attr(rustfmt, rustfmt_skip)] + let e = i8x16::new( + 15, 14, 13, 12, 11, 19, 9, 8, + 7, 6, 5, 4, 3, 2, 1, 0 + ); + let z = unsafe { B { data: y }.vec }; + assert_eq!(z, e); + + union C { + data: [i16; 8], + vec: i8x16, + } + #[cfg_attr(rustfmt, rustfmt_skip)] + let x = i8x16::new( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + let x: [i16; 8] = unsafe { C { vec: x }.data }; + + let e: [i16; 8] = if cfg!(target_endian = "little") { + [256, 770, 1284, 1798, 2312, 2826, 3340, 3854] + } else { + [1, 515, 1029, 1543, 2057, 2571, 3085, 3599] + }; + assert_eq!(x, e); +} + +#[cfg_attr(not(target_arch = "wasm32"), test)] +#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] +fn endian_tuple_access() { + type F32x4T = (f32, f32, f32, f32); + union A { + data: F32x4T, + vec: f32x4, + } + let x: F32x4T = unsafe { A { vec: f32x4::new(0., 1., 2., 3.) }.data }; + assert_eq!(x.0, 0_f32); + assert_eq!(x.1, 1_f32); + assert_eq!(x.2, 2_f32); + assert_eq!(x.3, 3_f32); + let y: f32x4 = unsafe { A { data: (3., 2., 1., 0.) }.vec }; + assert_eq!(y, f32x4::new(3., 2., 1., 0.)); + + #[cfg_attr(rustfmt, rustfmt_skip)] + type I8x16T = (i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8); + union B { + data: I8x16T, + vec: i8x16, + } + + #[cfg_attr(rustfmt, rustfmt_skip)] + let x = i8x16::new( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + let x: I8x16T = unsafe { B { vec: x }.data }; + + assert_eq!(x.0, 0); + assert_eq!(x.1, 1); + assert_eq!(x.2, 2); + assert_eq!(x.3, 3); + assert_eq!(x.4, 4); + assert_eq!(x.5, 5); + assert_eq!(x.6, 6); + assert_eq!(x.7, 7); + assert_eq!(x.8, 8); + assert_eq!(x.9, 9); + assert_eq!(x.10, 10); + assert_eq!(x.11, 11); + assert_eq!(x.12, 12); + assert_eq!(x.13, 13); + assert_eq!(x.14, 14); + assert_eq!(x.15, 15); + + #[cfg_attr(rustfmt, rustfmt_skip)] + let y = ( + 15, 14, 13, 12, 11, 10, 9, 8, + 7, 6, 5, 4, 3, 2, 1, 0 + ); + let z: i8x16 = unsafe { B { data: y }.vec }; + #[cfg_attr(rustfmt, rustfmt_skip)] + let e = i8x16::new( + 15, 14, 13, 12, 11, 10, 9, 8, + 7, 6, 5, 4, 3, 2, 1, 0 + ); + assert_eq!(e, z); + + #[cfg_attr(rustfmt, rustfmt_skip)] + type I16x8T = (i16, i16, i16, i16, i16, i16, i16, i16); + union C { + data: I16x8T, + vec: i8x16, + } + + #[cfg_attr(rustfmt, rustfmt_skip)] + let x = i8x16::new( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + let x: I16x8T = unsafe { C { vec: x }.data }; + + let e: [i16; 8] = if cfg!(target_endian = "little") { + [256, 770, 1284, 1798, 2312, 2826, 3340, 3854] + } else { + [1, 515, 1029, 1543, 2057, 2571, 3085, 3599] + }; + assert_eq!(x.0, e[0]); + assert_eq!(x.1, e[1]); + assert_eq!(x.2, e[2]); + assert_eq!(x.3, e[3]); + assert_eq!(x.4, e[4]); + assert_eq!(x.5, e[5]); + assert_eq!(x.6, e[6]); + assert_eq!(x.7, e[7]); + + #[cfg_attr(rustfmt, rustfmt_skip)] + #[repr(C)] + #[derive(Copy ,Clone)] + pub struct Tup(pub i8, pub i8, pub i16, pub i8, pub i8, pub i16, + pub i8, pub i8, pub i16, pub i8, pub i8, pub i16); + + union D { + data: Tup, + vec: i8x16, + } + + #[cfg_attr(rustfmt, rustfmt_skip)] + let x = i8x16::new( + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + ); + let x: Tup = unsafe { D { vec: x }.data }; + + let e: [i16; 12] = if cfg!(target_endian = "little") { + [0, 1, 770, 4, 5, 1798, 8, 9, 2826, 12, 13, 3854] + } else { + [0, 1, 515, 4, 5, 1543, 8, 9, 2571, 12, 13, 3599] + }; + assert_eq!(x.0 as i16, e[0]); + assert_eq!(x.1 as i16, e[1]); + assert_eq!(x.2 as i16, e[2]); + assert_eq!(x.3 as i16, e[3]); + assert_eq!(x.4 as i16, e[4]); + assert_eq!(x.5 as i16, e[5]); + assert_eq!(x.6 as i16, e[6]); + assert_eq!(x.7 as i16, e[7]); + assert_eq!(x.8 as i16, e[8]); + assert_eq!(x.9 as i16, e[9]); + assert_eq!(x.10 as i16, e[10]); + assert_eq!(x.11 as i16, e[11]); +} diff --git a/third_party/rust/simd/.cargo-checksum.json b/third_party/rust/simd/.cargo-checksum.json deleted file mode 100644 index 5e8c154cda72..000000000000 --- a/third_party/rust/simd/.cargo-checksum.json +++ /dev/null @@ -1 +0,0 @@ -{"files":{"Cargo.toml":"0c7a480c62d7b42604098fa1dd6453be79629112569c494efa75d7fd0998fd69","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6d3a9431e65e69c73a8923e6517b889d17549b23db406b9ec027710d16af701f","README.md":"f426ca32bb44fee39d83d51e481fe6b2640d4b78cb097c952cd75800b886f2fd","benches/mandelbrot.rs":"051b5199e66bca6cf7774e9024915fd4e1349ab39726a10a14e06b60d65d87a4","benches/matrix.rs":"048a21dacdb62365e0105d00d2c8cd6bd2396ac81134f2bff7eb4f7d095fb735","build.rs":"69c9c9029ca969a4bb3f11429bc1424fa75af46143eb0d853b4db3a512859b32","examples/axpy.rs":"4307626045d64ec08361c97c9c72c5dc8d361bdc88f64453b97ac0212041a1b2","examples/convert.rs":"8e658fde050f8a0d8b84ad7570446b10fcf544afbd551b940ca340474f324840","examples/dot-product.rs":"6fe2e007c147af5353804173a593c5b9d57dbccec156e1da37e9e32537363f91","examples/fannkuch-redux-nosimd.rs":"7b2fbde35e8666929d14d67328471cb0483d038a5325232f8db148b30865312b","examples/fannkuch-redux.rs":"ea21fdbd2274488a62cc984acad6e0b65d52f24fb4ff63b7057a3a667e9c8aae","examples/mandelbrot.rs":"71be242543c1e487145d7f16341c05d05d86109de4d9e94c5d6bc9a9c6ed9766","examples/matrix-inverse.rs":"93dbc55c66a72e5f7bc730072f35682523fa20dd362755d8443ad6982143cb5d","examples/nbody-nosimd.rs":"9cf46ea02e266c20f811318f1c5856d5afb9575b2d48d552fbd978f5c1856bdb","examples/nbody.rs":"a864311affab262024479d6348ff51af43d809e9ad332ec30ea4aacceaa2eae1","examples/ops.rs":"b08ea83583df71d0052895d677320a9888da5b6729c9b70636d31ede5128bb7f","examples/spectral-norm-nosimd.rs":"ffc8512ecde779078ea467f38f423a0ea623c63da7078193f9dd370200773f79","examples/spectral-norm.rs":"edb09c9d477f83939098cfb77a27cc298bc7a0c8a8e29cece0cccae0d70d890e","src/aarch64/mod.rs":"83f52775364c98de0cecb7e1509530c18972e932469f5f1522aa24a735d0fa37","src/aarch64/neon.rs":"3c05ea43b7261b9af9c0d904b37de01c2ba99caedcb464700f16617b672965a1","src/arm/mod.rs":"dcdd90bc0b39abaf86a0c8946d442b16313563fbae1ff03248628275c74d8617","src/arm/neon.rs":"71d0bb6dac5f58599bb825449701a05cf32f6eca1918e80d060b746e69751c37","src/common.rs":"c5a7b937c5cd8c3bccf0fb20d5d77770c0d9b0dd9fa06a661c6f2ddf118e65c0","src/lib.rs":"a24a207e65468de2189297380747e2f2f33ec2317f4b83f0665d34b1c09feb08","src/sixty_four.rs":"d168776d02acf943bda8044b24e644b7a9584197a223eba1a7c3024b205dc87d","src/v256.rs":"34bfde3676e23f6925db5d0408ae838e3aab7706128fd7c33e855b8579c69318","src/x86/avx.rs":"efcf2120a904a89b0adf2d3d3bdd0ca17df2ec058410af23fb7e81915873f808","src/x86/avx2.rs":"3bcb3f391ad5f16f0a6da0bc1301329beb478ad6265bd3b2c9c124fc2e6198e5","src/x86/mod.rs":"0acc5a5e2672e2a0fddc11065663be8b8fa2da87320ea291fa86ff8c2f33edf5","src/x86/sse2.rs":"8807fb04bbfb404e17fcacf1e21d22616f8b377540a227b1fd03c121879122dd","src/x86/sse3.rs":"9bd01a4f08069ca4f445952e744d651efe887e3835b18872e757375f0d053bd2","src/x86/sse4_1.rs":"9ceb80dd70a7e7dfeef508cb935e1a2637175bc87a3b090f5dea691ff6aa0516","src/x86/sse4_2.rs":"c59321aed8decdce4d0d8570cff46aed02e1a8265647ef7702e9b180fc581254","src/x86/ssse3.rs":"2290f0269bae316b8e0491495645ee38a9bd73525c8572759c1328341c3bdb4c"},"package":"0048b17eb9577ac545c61d85c3559b41dfb4cbea41c9bd9ca6a4f73ff05fda84"} \ No newline at end of file diff --git a/third_party/rust/simd/Cargo.toml b/third_party/rust/simd/Cargo.toml deleted file mode 100644 index 30279b93556c..000000000000 --- a/third_party/rust/simd/Cargo.toml +++ /dev/null @@ -1,37 +0,0 @@ -# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO -# -# When uploading crates to the registry Cargo will automatically -# "normalize" Cargo.toml files for maximal compatibility -# with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g. crates.io) dependencies -# -# If you believe there's an error in this file please file an -# issue against the rust-lang/cargo repository. If you're -# editing this file be aware that the upstream Cargo.toml -# will likely look very different (and much more reasonable) - -[package] -name = "simd" -version = "0.2.3" -authors = ["Huon Wilson "] -description = "`simd` offers limited cross-platform access to SIMD instructions on\nCPUs, as well as raw interfaces to platform-specific instructions.\n(To be obsoleted by the `std::simd` implementation RFC 2366.)\n" -documentation = "https://docs.rs/simd/" -readme = "README.md" -keywords = ["simd", "data-parallel"] -license = "MIT/Apache-2.0" -repository = "https://github.com/hsivonen/simd" -[package.metadata.docs.rs] -features = ["doc"] -[dependencies.serde] -version = "1.0" -optional = true - -[dependencies.serde_derive] -version = "1.0" -optional = true -[dev-dependencies.cfg-if] -version = "0.1" - -[features] -doc = [] -with-serde = ["serde", "serde_derive"] diff --git a/third_party/rust/simd/README.md b/third_party/rust/simd/README.md deleted file mode 100644 index 1c34f49bcd91..000000000000 --- a/third_party/rust/simd/README.md +++ /dev/null @@ -1,11 +0,0 @@ -# `simd` - -[![Build Status](https://travis-ci.org/hsivonen/simd.svg?branch=master)](https://travis-ci.org/hsivonen/simd) -[![crates.io](https://meritbadge.herokuapp.com/simd)](https://crates.io/crates/simd) -[![docs.rs](https://docs.rs/simd/badge.svg)](https://docs.rs/simd/) - -`simd` offers a basic interface to the SIMD functionality of CPUs. (Note: This crate fails to build unless the target is aarch64, x86_64, i686 (i.e. SSE2 enabled; not i586) or an ARMv7 target (thumb or not) with NEON enabled.) - -This crate is expected to become _obsolete_ once the implementation of [RFC 2366](https://github.com/rust-lang/rfcs/pull/2366) lands in the standard library. - -[Documentation](https://docs.rs/simd) diff --git a/third_party/rust/simd/benches/mandelbrot.rs b/third_party/rust/simd/benches/mandelbrot.rs deleted file mode 100755 index 61061a4a301f..000000000000 --- a/third_party/rust/simd/benches/mandelbrot.rs +++ /dev/null @@ -1,117 +0,0 @@ -#![feature(test)] -#![feature(cfg_target_feature)] - -extern crate simd; -extern crate test; - -use test::black_box as bb; -use test::Bencher as B; -use simd::{f32x4, u32x4}; -#[cfg(any(target_feature = "avx", target_feature = "avx2"))] -use simd::x86::avx::{f32x8, u32x8}; - -fn naive(c_x: f32, c_y: f32, max_iter: u32) -> u32 { - let mut x = c_x; - let mut y = c_y; - let mut count = 0; - while count < max_iter { - let xy = x * y; - let xx = x * x; - let yy = y * y; - let sum = xx + yy; - if sum > 4.0 { - break - } - count += 1; - x = xx - yy + c_x; - y = xy * 2.0 + c_y; - } - count -} - -fn simd4(c_x: f32x4, c_y: f32x4, max_iter: u32) -> u32x4 { - let mut x = c_x; - let mut y = c_y; - - let mut count = u32x4::splat(0); - for _ in 0..max_iter as usize { - let xy = x * y; - let xx = x * x; - let yy = y * y; - let sum = xx + yy; - let mask = sum.lt(f32x4::splat(4.0)); - - if !mask.any() { break } - count = count + mask.to_i().select(u32x4::splat(1), u32x4::splat(0)); - - x = xx - yy + c_x; - y = xy + xy + c_y; - } - count -} - -#[cfg(target_feature = "avx")] -fn simd8(c_x: f32x8, c_y: f32x8, max_iter: u32) -> u32x8 { - let mut x = c_x; - let mut y = c_y; - - let mut count = u32x8::splat(0); - for _ in 0..max_iter as usize { - let xy = x * y; - let xx = x * x; - let yy = y * y; - let sum = xx + yy; - let mask = sum.lt(f32x8::splat(4.0)); - - if !mask.any() { break } - count = count + mask.to_i().select(u32x8::splat(1), u32x8::splat(0)); - - x = xx - yy + c_x; - y = xy + xy + c_y; - } - count -} - -const SCALE: f32 = 3.0 / 100.0; -const N: u32 = 100; -#[bench] -fn mandel_naive(b: &mut B) { - b.iter(|| { - for j in 0..100 { - let y = -1.5 + (j as f32) * SCALE; - for i in 0..100 { - let x = -2.2 + (i as f32) * SCALE; - bb(naive(x, y, N)); - } - } - }) -} -#[bench] -fn mandel_simd4(b: &mut B) { - let tweak = u32x4::new(0, 1, 2, 3); - b.iter(|| { - for j in 0..100 { - let y = f32x4::splat(-1.5) + f32x4::splat(SCALE) * u32x4::splat(j).to_f32(); - for i in 0..25 { - let i = u32x4::splat(i * 4) + tweak; - let x = f32x4::splat(-2.2) + f32x4::splat(SCALE) * i.to_f32(); - bb(simd4(x, y, N)); - } - } - }) -} -#[cfg(any(target_feature = "avx", target_feature = "avx2"))] -#[bench] -fn mandel_simd8(b: &mut B) { - let tweak = u32x8::new(0, 1, 2, 3, 4, 5, 6, 7); - b.iter(|| { - for j in 0..100 { - let y = f32x8::splat(-1.5) + f32x8::splat(SCALE) * u32x8::splat(j).to_f32(); - for i in 0..13 { // 100 not divisible by 8 :( - let i = u32x8::splat(i * 8) + tweak; - let x = f32x8::splat(-2.2) + f32x8::splat(SCALE) * i.to_f32(); - bb(simd8(x, y, N)); - } - } - }) -} diff --git a/third_party/rust/simd/benches/matrix.rs b/third_party/rust/simd/benches/matrix.rs deleted file mode 100755 index 36aa88237492..000000000000 --- a/third_party/rust/simd/benches/matrix.rs +++ /dev/null @@ -1,485 +0,0 @@ -#![feature(test)] -#![feature(cfg_target_feature)] -extern crate test; -extern crate simd; - -use test::black_box as bb; -use test::Bencher as B; -use simd::f32x4; -#[cfg(target_feature = "avx")] -use simd::x86::avx::{f32x8, f64x4}; -// #[cfg(target_feature = "avx2")] -// use simd::x86::avx2::Avx2F32x8; - - -#[bench] -fn multiply_naive(b: &mut B) { - let x = [[1.0_f32; 4]; 4]; - let y = [[2.0; 4]; 4]; - b.iter(|| { - for _ in 0..100 { - let (x, y) = bb((&x, &y)); - - bb(&[[x[0][0] * y[0][0] + x[1][0] * y[0][1] + x[2][0] * y[0][2] + x[3][0] * y[0][3], - x[0][1] * y[0][0] + x[1][1] * y[0][1] + x[2][1] * y[0][2] + x[3][1] * y[0][3], - x[0][2] * y[0][0] + x[1][2] * y[0][1] + x[2][2] * y[0][2] + x[3][2] * y[0][3], - x[0][3] * y[0][0] + x[1][3] * y[0][1] + x[2][3] * y[0][2] + x[3][3] * y[0][3]], - [x[0][0] * y[1][0] + x[1][0] * y[1][1] + x[2][0] * y[1][2] + x[3][0] * y[1][3], - x[0][1] * y[1][0] + x[1][1] * y[1][1] + x[2][1] * y[1][2] + x[3][1] * y[1][3], - x[0][2] * y[1][0] + x[1][2] * y[1][1] + x[2][2] * y[1][2] + x[3][2] * y[1][3], - x[0][3] * y[1][0] + x[1][3] * y[1][1] + x[2][3] * y[1][2] + x[3][3] * y[1][3]], - [x[0][0] * y[2][0] + x[1][0] * y[2][1] + x[2][0] * y[2][2] + x[3][0] * y[2][3], - x[0][1] * y[2][0] + x[1][1] * y[2][1] + x[2][1] * y[2][2] + x[3][1] * y[2][3], - x[0][2] * y[2][0] + x[1][2] * y[2][1] + x[2][2] * y[2][2] + x[3][2] * y[2][3], - x[0][3] * y[2][0] + x[1][3] * y[2][1] + x[2][3] * y[2][2] + x[3][3] * y[2][3]], - [x[0][0] * y[3][0] + x[1][0] * y[3][1] + x[2][0] * y[3][2] + x[3][0] * y[3][3], - x[0][1] * y[3][0] + x[1][1] * y[3][1] + x[2][1] * y[3][2] + x[3][1] * y[3][3], - x[0][2] * y[3][0] + x[1][2] * y[3][1] + x[2][2] * y[3][2] + x[3][2] * y[3][3], - x[0][3] * y[3][0] + x[1][3] * y[3][1] + x[2][3] * y[3][2] + x[3][3] * y[3][3]], - ]); - } - }) -} - -#[bench] -fn multiply_simd4_32(b: &mut B) { - let x = [f32x4::splat(1.0_f32); 4]; - let y = [f32x4::splat(2.0); 4]; - b.iter(|| { - for _ in 0..100 { - let (x, y) = bb((&x, &y)); - - let y0 = y[0]; - let y1 = y[1]; - let y2 = y[2]; - let y3 = y[3]; - bb(&[f32x4::splat(y0.extract(0)) * x[0] + - f32x4::splat(y0.extract(1)) * x[1] + - f32x4::splat(y0.extract(2)) * x[2] + - f32x4::splat(y0.extract(3)) * x[3], - f32x4::splat(y1.extract(0)) * x[0] + - f32x4::splat(y1.extract(1)) * x[1] + - f32x4::splat(y1.extract(2)) * x[2] + - f32x4::splat(y1.extract(3)) * x[3], - f32x4::splat(y2.extract(0)) * x[0] + - f32x4::splat(y2.extract(1)) * x[1] + - f32x4::splat(y2.extract(2)) * x[2] + - f32x4::splat(y2.extract(3)) * x[3], - f32x4::splat(y3.extract(0)) * x[0] + - f32x4::splat(y3.extract(1)) * x[1] + - f32x4::splat(y3.extract(2)) * x[2] + - f32x4::splat(y3.extract(3)) * x[3], - ]); - } - }) -} - -#[cfg(target_feature = "avx")] -#[bench] -fn multiply_simd4_64(b: &mut B) { - let x = [f64x4::splat(1.0_f64); 4]; - let y = [f64x4::splat(2.0); 4]; - b.iter(|| { - for _ in 0..100 { - let (x, y) = bb((&x, &y)); - - let y0 = y[0]; - let y1 = y[1]; - let y2 = y[2]; - let y3 = y[3]; - bb(&[f64x4::splat(y0.extract(0)) * x[0] + - f64x4::splat(y0.extract(1)) * x[1] + - f64x4::splat(y0.extract(2)) * x[2] + - f64x4::splat(y0.extract(3)) * x[3], - f64x4::splat(y1.extract(0)) * x[0] + - f64x4::splat(y1.extract(1)) * x[1] + - f64x4::splat(y1.extract(2)) * x[2] + - f64x4::splat(y1.extract(3)) * x[3], - f64x4::splat(y2.extract(0)) * x[0] + - f64x4::splat(y2.extract(1)) * x[1] + - f64x4::splat(y2.extract(2)) * x[2] + - f64x4::splat(y2.extract(3)) * x[3], - f64x4::splat(y3.extract(0)) * x[0] + - f64x4::splat(y3.extract(1)) * x[1] + - f64x4::splat(y3.extract(2)) * x[2] + - f64x4::splat(y3.extract(3)) * x[3], - ]); - } - }) -} - -#[bench] -fn inverse_naive(b: &mut B) { - let mut x = [[0_f32; 4]; 4]; - for i in 0..4 { x[i][i] = 1.0 } - - b.iter(|| { - for _ in 0..100 { - let x = bb(&x); - - let mut t = [[0_f32; 4]; 4]; - for i in 0..4 { - t[0][i] = x[i][0]; - t[1][i] = x[i][1]; - t[2][i] = x[i][2]; - t[3][i] = x[i][3]; - } - - let _0 = t[2][2] * t[3][3]; - let _1 = t[2][3] * t[3][2]; - let _2 = t[2][1] * t[3][3]; - let _3 = t[2][3] * t[3][1]; - let _4 = t[2][1] * t[3][2]; - let _5 = t[2][2] * t[3][1]; - let _6 = t[2][0] * t[3][3]; - let _7 = t[2][3] * t[3][0]; - let _8 = t[2][0] * t[3][2]; - let _9 = t[2][2] * t[3][0]; - let _10 = t[2][0] * t[3][1]; - let _11 = t[2][1] * t[3][0]; - - let d00 = _0 * t[1][1] + _3 * t[1][2] + _4 * t[1][3] - - (_1 * t[1][1] + _2 * t[1][2] + _5 * t[1][3]); - let d01 = _1 * t[1][0] + _6 * t[1][2] + _9 * t[1][3] - - (_0 * t[1][0] + _7 * t[1][2] + _8 * t[1][3]); - let d02 = _2 * t[1][0] + _7 * t[1][1] + _10 * t[1][3] - - (_3 * t[1][0] + _6 * t[1][1] + _11 * t[1][3]); - let d03 = _5 * t[1][0] + _8 * t[1][1] + _11 * t[1][2] - - (_4 * t[1][0] + _9 * t[1][1] + _10 * t[1][2]); - let d10 = _1 * t[0][1] + _2 * t[0][2] + _5 * t[0][3] - - (_0 * t[0][1] + _3 * t[0][2] + _4 * t[0][3]); - let d11 = _0 * t[0][0] + _7 * t[0][2] + _8 * t[0][3] - - (_1 * t[0][0] + _6 * t[0][2] + _9 * t[0][3]); - let d12 = _3 * t[0][0] + _6 * t[0][1] + _11 * t[0][3] - - (_2 * t[0][0] + _7 * t[0][1] + _10 * t[0][3]); - let d13 = _4 * t[0][0] + _9 * t[0][1] + _10 * t[0][2] - - (_5 * t[0][0] + _8 * t[0][1] + _11 * t[0][2]); - - let _0 = t[0][2] * t[1][3]; - let _1 = t[0][3] * t[1][2]; - let _2 = t[0][1] * t[1][3]; - let _3 = t[0][3] * t[1][1]; - let _4 = t[0][1] * t[1][2]; - let _5 = t[0][2] * t[1][1]; - let _6 = t[0][0] * t[1][3]; - let _7 = t[0][3] * t[1][0]; - let _8 = t[0][0] * t[1][2]; - let _9 = t[0][2] * t[1][0]; - let _10 = t[0][0] * t[1][1]; - let _11 = t[0][1] * t[1][0]; - - let d20 = _0*t[3][1] + _3*t[3][2] + _4*t[3][3]- - (_1*t[3][1] + _2*t[3][2] + _5*t[3][3]); - let d21 = _1*t[3][0] + _6*t[3][2] + _9*t[3][3]- - (_0*t[3][0] + _7*t[3][2] + _8*t[3][3]); - let d22 = _2*t[3][0] + _7*t[3][1] + _10*t[3][3]- - (_3*t[3][0] + _6*t[3][1] + _11*t[3][3]); - let d23 = _5*t[3][0] + _8*t[3][1] + _11*t[3][2]- - (_4*t[3][0] + _9*t[3][1] + _10*t[3][2]); - let d30 = _2*t[2][2] + _5*t[2][3] + _1*t[2][1]- - (_4*t[2][3] + _0*t[2][1] + _3*t[2][2]); - let d31 = _8*t[2][3] + _0*t[2][0] + _7*t[2][2]- - (_6*t[2][2] + _9*t[2][3] + _1*t[2][0]); - let d32 = _6*t[2][1] + _11*t[2][3] + _3*t[2][0]- - (_10*t[2][3] + _2*t[2][0] + _7*t[2][1]); - let d33 = _10*t[2][2] + _4*t[2][0] + _9*t[2][1]- - (_8*t[2][1] + _11*t[2][2] + _5*t[2][0]); - - let det = t[0][0] * d00 + t[0][1] * d01 + t[0][2] * d02 + t[0][3] * d03; - - let det = 1.0 / det; - let mut ret = [[d00, d01, d02, d03], - [d10, d11, d12, d13], - [d20, d21, d22, d23], - [d30, d31, d32, d33]]; - for i in 0..4 { - for j in 0..4 { - ret[i][j] *= det; - } - } - bb(&ret); - } - }) -} - -#[bench] -fn inverse_simd4(b: &mut B) { - let mut x = [f32x4::splat(0_f32); 4]; - for i in 0..4 { x[i] = x[i].replace(i as u32, 1.0); } - - fn shuf0145(v: f32x4, w: f32x4) -> f32x4 { - f32x4::new(v.extract(0), v.extract(1), - w.extract(4 - 4), w.extract(5 - 4)) - } - fn shuf0246(v: f32x4, w: f32x4) -> f32x4 { - f32x4::new(v.extract(0), v.extract(2), - w.extract(4 - 4), w.extract(6 - 4)) - } - fn shuf1357(v: f32x4, w: f32x4) -> f32x4 { - f32x4::new(v.extract(1), v.extract(3), - w.extract(5 - 4), w.extract(7 - 4)) - } - fn shuf2367(v: f32x4, w: f32x4) -> f32x4 { - f32x4::new(v.extract(2), v.extract(3), - w.extract(6 - 4), w.extract(7 - 4)) - } - - fn swiz1032(v: f32x4) -> f32x4 { - f32x4::new(v.extract(1), v.extract(0), - v.extract(3), v.extract(2)) - } - fn swiz2301(v: f32x4) -> f32x4 { - f32x4::new(v.extract(2), v.extract(3), - v.extract(0), v.extract(1)) - } - - b.iter(|| { - for _ in 0..100 { - let src0; - let src1; - let src2; - let src3; - let mut tmp1; - let row0; - let mut row1; - let mut row2; - let mut row3; - let mut minor0; - let mut minor1; - let mut minor2; - let mut minor3; - let mut det; - - let x = bb(&x); - src0 = x[0]; - src1 = x[1]; - src2 = x[2]; - src3 = x[3]; - - tmp1 = shuf0145(src0, src1); - row1 = shuf0145(src2, src3); - row0 = shuf0246(tmp1, row1); - row1 = shuf1357(row1, tmp1); - - tmp1 = shuf2367(src0, src1); - row3 = shuf2367(src2, src3); - row2 = shuf0246(tmp1, row3); - row3 = shuf0246(row3, tmp1); - - - tmp1 = row2 * row3; - tmp1 = swiz1032(tmp1); - minor0 = row1 * tmp1; - minor1 = row0 * tmp1; - tmp1 = swiz2301(tmp1); - minor0 = (row1 * tmp1) - minor0; - minor1 = (row0 * tmp1) - minor1; - minor1 = swiz2301(minor1); - - - tmp1 = row1 * row2; - tmp1 = swiz1032(tmp1); - minor0 = (row3 * tmp1) + minor0; - minor3 = row0 * tmp1; - tmp1 = swiz2301(tmp1); - - minor0 = minor0 - row3 * tmp1; - minor3 = row0 * tmp1 - minor3; - minor3 = swiz2301(minor3); - - - tmp1 = row3 * swiz2301(row1); - tmp1 = swiz1032(tmp1); - row2 = swiz2301(row2); - minor0 = row2 * tmp1 + minor0; - minor2 = row0 * tmp1; - tmp1 = swiz2301(tmp1); - minor0 = minor0 - row2 * tmp1; - minor2 = row0 * tmp1 - minor2; - minor2 = swiz2301(minor2); - - - tmp1 = row0 * row1; - tmp1 = swiz1032(tmp1); - minor2 = minor2 + row3 * tmp1; - minor3 = row2 * tmp1 - minor3; - tmp1 = swiz2301(tmp1); - minor2 = row3 * tmp1 - minor2; - minor3 = minor3 - row2 * tmp1; - - - - tmp1 = row0 * row3; - tmp1 = swiz1032(tmp1); - minor1 = minor1 - row2 * tmp1; - minor2 = row1 * tmp1 + minor2; - tmp1 = swiz2301(tmp1); - minor1 = row2 * tmp1 + minor1; - minor2 = minor2 - row1 * tmp1; - - tmp1 = row0 * row2; - tmp1 = swiz1032(tmp1); - minor1 = row3 * tmp1 + minor1; - minor3 = minor3 - row1 * tmp1; - tmp1 = swiz2301(tmp1); - minor1 = minor1 - row3 * tmp1; - minor3 = row1 * tmp1 + minor3; - - det = row0 * minor0; - det = swiz2301(det) + det; - det = swiz1032(det) + det; - //tmp1 = det.approx_reciprocal(); det = tmp1 * (f32x4::splat(2.0) - det * tmp1); - det = f32x4::splat(1.0) / det; - - bb(&[minor0 * det, minor1 * det, minor2 * det, minor3 * det]); - } - }) - -} - -#[bench] -fn transpose_naive(b: &mut B) { - let x = [[0_f32; 4]; 4]; - b.iter(|| { - for _ in 0..100 { - let x = bb(&x); - bb(&[[x[0][0], x[1][0], x[2][0], x[3][0]], - [x[0][1], x[1][1], x[2][1], x[3][1]], - [x[0][2], x[1][2], x[2][2], x[3][2]], - [x[0][3], x[1][3], x[2][3], x[3][3]]]); - } - }) -} - -#[bench] -fn transpose_simd4(b: &mut B) { - let x = [f32x4::splat(0_f32); 4]; - - fn shuf0246(v: f32x4, w: f32x4) -> f32x4 { - f32x4::new(v.extract(0), v.extract(2), - w.extract(4 - 4), w.extract(6 - 4)) - } - fn shuf1357(v: f32x4, w: f32x4) -> f32x4 { - f32x4::new(v.extract(1), v.extract(3), - w.extract(5 - 4), w.extract(7 - 4)) - } - b.iter(|| { - for _ in 0..100 { - let x = bb(&x); - let x0 = x[0]; - let x1 = x[1]; - let x2 = x[2]; - let x3 = x[3]; - - let a0 = shuf0246(x0, x1); - let a1 = shuf0246(x2, x3); - let a2 = shuf1357(x0, x1); - let a3 = shuf1357(x2, x3); - - let b0 = shuf0246(a0, a1); - let b1 = shuf0246(a2, a3); - let b2 = shuf1357(a0, a1); - let b3 = shuf1357(a2, a3); - bb(&[b0, b1, b2, b3]); - } - }) -} - -#[cfg(target_feature = "avx")] -#[bench] -fn transpose_simd8_naive(b: &mut B) { - let x = [f32x8::splat(0_f32); 2]; - - fn shuf0246(v: f32x8, w: f32x8) -> f32x8 { - f32x8::new(v.extract(0), v.extract(2), v.extract(4), v.extract(6), - w.extract(0), w.extract(2), w.extract(4), w.extract(6)) - } - fn shuf1357(v: f32x8, w: f32x8) -> f32x8 { - f32x8::new(v.extract(1), v.extract(3), v.extract(5), v.extract(7), - w.extract(1), w.extract(3), w.extract(5), w.extract(7),) - } - b.iter(|| { - for _ in 0..100 { - let x = bb(&x); - let x01 = x[0]; - let x23 = x[1]; - - let a01 = shuf0246(x01, x23); - let a23 = shuf1357(x01, x23); - - let b01 = shuf0246(a01, a23); - let b23 = shuf1357(a01, a23); - bb(&[b01, b23]); - } - }) -} - -#[cfg(target_feature = "avx")] -#[bench] -fn transpose_simd8_avx2_vpermps(b: &mut B) { - let x = [f32x8::splat(0_f32); 2]; - - // efficient on AVX2 using vpermps - fn perm04152637(v: f32x8) -> f32x8 { - // broken on rustc 1.7.0-nightly (1ddaf8bdf 2015-12-12) - // v.permutevar(i32x8::new(0, 4, 1, 5, 2, 6, 3, 7)) - f32x8::new(v.extract(0), v.extract(4), v.extract(1), v.extract(5), - v.extract(2), v.extract(6), v.extract(3), v.extract(7)) - } - fn shuf_lo(v: f32x8, w: f32x8) -> f32x8 { - f32x8::new(v.extract(0), v.extract(1), w.extract(0), w.extract(1), - v.extract(4), v.extract(5), w.extract(4), w.extract(5),) - } - fn shuf_hi(v: f32x8, w: f32x8) -> f32x8 { - f32x8::new(v.extract(2), v.extract(3), w.extract(2), w.extract(3), - v.extract(6), v.extract(7), w.extract(6), w.extract(7),) - } - b.iter(|| { - for _ in 0..100 { - let x = bb(&x); - let x01 = x[0]; - let x23 = x[1]; - - let a01 = perm04152637(x01); - let a23 = perm04152637(x23); - - let b01 = shuf_lo(a01, a23); - let b23 = shuf_hi(a01, a23); - bb(&[b01, b23]); - } - }) -} - -#[cfg(target_feature = "avx")] -#[bench] -fn transpose_simd8_avx2_vpermpd(b: &mut B) { - let x = [f32x8::splat(0_f32); 2]; - - // efficient on AVX2 using vpermpd - fn perm01452367(v: f32x8) -> f32x8 { - f32x8::new(v.extract(0), v.extract(1), v.extract(4), v.extract(5), - v.extract(2), v.extract(3), v.extract(6), v.extract(7)) - } - fn shuf_lo_ps(v: f32x8, w: f32x8) -> f32x8 { - f32x8::new(v.extract(0), w.extract(0), v.extract(1), w.extract(1), - v.extract(4), w.extract(4), v.extract(5), w.extract(5),) - } - fn shuf_hi_ps(v: f32x8, w: f32x8) -> f32x8 { - f32x8::new(v.extract(2), w.extract(2), v.extract(3), w.extract(3), - v.extract(6), w.extract(6), v.extract(7), w.extract(7),) - } - b.iter(|| { - for _ in 0..100 { - let x = bb(&x); - let x01 = x[0]; - let x23 = x[1]; - - let a01 = perm01452367(x01); - let a23 = perm01452367(x23); - - let b01 = shuf_lo_ps(a01, a23); - let b23 = shuf_hi_ps(a01, a23); - bb(&[b01, b23]); - } - }) -} diff --git a/third_party/rust/simd/build.rs b/third_party/rust/simd/build.rs deleted file mode 100644 index 61b5330a1846..000000000000 --- a/third_party/rust/simd/build.rs +++ /dev/null @@ -1,3 +0,0 @@ -fn main() { - println!("cargo:rustc-env=RUSTC_BOOTSTRAP=1"); -} diff --git a/third_party/rust/simd/examples/axpy.rs b/third_party/rust/simd/examples/axpy.rs deleted file mode 100755 index 7862721b254d..000000000000 --- a/third_party/rust/simd/examples/axpy.rs +++ /dev/null @@ -1,65 +0,0 @@ -#![feature(cfg_target_feature)] -extern crate simd; -use simd::f32x4; -#[cfg(target_feature = "avx")] -use simd::x86::avx::f32x8; - -#[inline(never)] -pub fn axpy(z: &mut [f32], a: f32, x: &[f32], y: &[f32]) { - assert_eq!(x.len(), y.len()); - assert_eq!(x.len(), z.len()); - - let len = std::cmp::min(std::cmp::min(x.len(), y.len()), z.len()); - - let mut i = 0; - while i < len & !3 { - let x = f32x4::load(x, i); - let y = f32x4::load(y, i); - (f32x4::splat(a) * x + y).store(z, i); - i += 4 - } -} - -#[cfg(target_feature = "avx")] -#[inline(never)] -pub fn axpy8(z: &mut [f32], a: f32, x: &[f32], y: &[f32]) { - assert_eq!(x.len(), y.len()); - assert_eq!(x.len(), z.len()); - - let len = std::cmp::min(std::cmp::min(x.len(), y.len()), z.len()); - - let mut i = 0; - while i < len & !7 { - let x = f32x8::load(x, i); - let y = f32x8::load(y, i); - (f32x8::splat(a) * x + y).store(z, i); - i += 8 - } -} - - -#[cfg(not(target_feature = "avx"))] -pub fn axpy8(_: &mut [f32], _: f32, _: &[f32], _: &[f32]) { - unimplemented!() -} - - -fn main() { - let mut z = vec![0.; 4]; - axpy(&mut z, 2., &[1.0, 3.0, 5.0, 7.0], &[2.0, 4.0, 6.0, 8.0]); - println!("{:?}", z); - let mut z = vec![0.; 8]; - axpy(&mut z, 3., &[1.0, 3.0, 6.0, 7.0, 10.0, 6.0, 3.0, 2.0], - &[2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0]); - println!("{:?}", z); - - if cfg!(target_feature = "avx") { - let mut z = vec![0.; 4]; - axpy8(&mut z, 2., &[1.0, 3.0, 5.0, 7.0], &[2.0, 4.0, 6.0, 8.0]); - println!("{:?}", z); - let mut z = vec![0.; 8]; - axpy8(&mut z, 3., &[1.0, 3.0, 6.0, 7.0, 10.0, 6.0, 3.0, 2.0], - &[2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0]); - println!("{:?}", z); - } -} diff --git a/third_party/rust/simd/examples/convert.rs b/third_party/rust/simd/examples/convert.rs deleted file mode 100644 index 11823a4b50d2..000000000000 --- a/third_party/rust/simd/examples/convert.rs +++ /dev/null @@ -1,38 +0,0 @@ -extern crate simd; -use simd::f32x4; - -#[inline(never)] -pub fn convert_scalar(x: &mut [i32], y: &[f32]) { - assert_eq!(x.len(), y.len()); - - let mut i = 0; - while i < x.len() & !3 { - x[i] = y[i] as i32; - i += 1; - } -} - -#[inline(never)] -pub fn convert(x: &mut [i32], y: &[f32]) { - assert_eq!(x.len(), y.len()); - - let mut i = 0; - while i < x.len() & !3 { - let v = f32x4::load(y, i); - v.to_i32().store(x, i); - i += 4 - } -} - -fn main() { - let x = &mut [0; 12]; - let y = [1.0; 12]; - convert(x, &y); - convert_scalar(x, &y); - println!("{:?}", x); - let x = &mut [0; 16]; - let y = [1.0; 16]; - convert(x, &y); - convert_scalar(x, &y); - println!("{:?}", x); -} diff --git a/third_party/rust/simd/examples/dot-product.rs b/third_party/rust/simd/examples/dot-product.rs deleted file mode 100755 index 9f0e1d35c799..000000000000 --- a/third_party/rust/simd/examples/dot-product.rs +++ /dev/null @@ -1,60 +0,0 @@ -#![feature(cfg_target_feature)] -extern crate simd; -use simd::f32x4; -#[cfg(target_feature = "avx")] -use simd::x86::avx::{f32x8, LowHigh128}; - -#[inline(never)] -pub fn dot(x: &[f32], y: &[f32]) -> f32 { - assert_eq!(x.len(), y.len()); - - let len = std::cmp::min(x.len(), y.len()); - - let mut sum = f32x4::splat(0.0); - let mut i = 0; - while i < len & !3 { - let x = f32x4::load(x, i); - let y = f32x4::load(y, i); - sum = sum + x * y; - i += 4 - } - sum.extract(0) + sum.extract(1) + sum.extract(2) + sum.extract(3) -} - -#[cfg(target_feature = "avx")] -#[inline(never)] -pub fn dot8(x: &[f32], y: &[f32]) -> f32 { - assert_eq!(x.len(), y.len()); - - let len = std::cmp::min(x.len(), y.len()); - - let mut sum = f32x8::splat(0.0); - let mut i = 0; - while i < len & !7 { - let x = f32x8::load(x, i); - let y = f32x8::load(y, i); - sum = sum + x * y; - i += 8 - } - let sum = sum.low() + sum.high(); - sum.extract(0) + sum.extract(1) + sum.extract(2) + sum.extract(3) -} - - -#[cfg(not(target_feature = "avx"))] -pub fn dot8(_: &[f32], _: &[f32]) -> f32 { - unimplemented!() -} - - -fn main() { - println!("{}", dot(&[1.0, 3.0, 5.0, 7.0], &[2.0, 4.0, 6.0, 8.0])); - println!("{}", dot(&[1.0, 3.0, 6.0, 7.0, 10.0, 6.0, 3.0, 2.0], - &[2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0])); - - if cfg!(target_feature = "avx") { - println!("{}", dot8(&[1.0, 3.0, 5.0, 7.0], &[2.0, 4.0, 6.0, 8.0])); - println!("{}", dot8(&[1.0, 3.0, 6.0, 7.0, 10.0, 6.0, 3.0, 2.0], - &[2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0])); - } -} diff --git a/third_party/rust/simd/examples/fannkuch-redux-nosimd.rs b/third_party/rust/simd/examples/fannkuch-redux-nosimd.rs deleted file mode 100644 index fa30b2283f93..000000000000 --- a/third_party/rust/simd/examples/fannkuch-redux-nosimd.rs +++ /dev/null @@ -1,156 +0,0 @@ -// The Computer Language Benchmarks Game -// http://benchmarksgame.alioth.debian.org/ -// -// contributed by the Rust Project Developers -// contributed by TeXitoi - -use std::{cmp, mem}; -use std::thread; - -fn rotate(x: &mut [i32]) { - let mut prev = x[0]; - for place in x.iter_mut().rev() { - prev = mem::replace(place, prev) - } -} - -fn next_permutation(perm: &mut [i32], count: &mut [i32]) { - for i in 1..perm.len() { - rotate(&mut perm[.. i + 1]); - let count_i = &mut count[i]; - if *count_i >= i as i32 { - *count_i = 0; - } else { - *count_i += 1; - break - } - } -} - -#[derive(Clone, Copy)] -struct P { - p: [i32; 16], -} - -#[derive(Clone, Copy)] -struct Perm { - cnt: [i32; 16], - fact: [u32; 16], - n: u32, - permcount: u32, - perm: P, -} - -impl Perm { - fn new(n: u32) -> Perm { - let mut fact = [1; 16]; - for i in 1 .. n as usize + 1 { - fact[i] = fact[i - 1] * i as u32; - } - Perm { - cnt: [0; 16], - fact: fact, - n: n, - permcount: 0, - perm: P { p: [0; 16 ] } - } - } - - fn get(&mut self, mut idx: i32) -> P { - let mut pp = [0u8; 16]; - self.permcount = idx as u32; - for (i, place) in self.perm.p.iter_mut().enumerate() { - *place = i as i32 + 1; - } - - for i in (1 .. self.n as usize).rev() { - let d = idx / self.fact[i] as i32; - self.cnt[i] = d; - idx %= self.fact[i] as i32; - for (place, val) in pp.iter_mut().zip(self.perm.p[..(i+1)].iter()) { - *place = (*val) as u8 - } - - let d = d as usize; - for j in 0 .. i + 1 { - self.perm.p[j] = if j + d <= i {pp[j + d]} else {pp[j+d-i-1]} as i32; - } - } - - self.perm - } - - fn count(&self) -> u32 { self.permcount } - fn max(&self) -> u32 { self.fact[self.n as usize] } - - fn next(&mut self) -> P { - next_permutation(&mut self.perm.p, &mut self.cnt); - self.permcount += 1; - - self.perm - } -} - - -fn reverse(tperm: &mut [i32], k: usize) { - tperm[..k].reverse() -} - -fn work(mut perm: Perm, n: usize, max: usize) -> (i32, i32) { - let mut checksum = 0; - let mut maxflips = 0; - - let mut p = perm.get(n as i32); - - while perm.count() < max as u32 { - let mut flips = 0; - - while p.p[0] != 1 { - let k = p.p[0] as usize; - reverse(&mut p.p, k); - flips += 1; - } - - checksum += if perm.count() % 2 == 0 {flips} else {-flips}; - maxflips = cmp::max(maxflips, flips); - - p = perm.next(); - } - - (checksum, maxflips) -} - -fn fannkuch(n: i32) -> (i32, i32) { - let perm = Perm::new(n as u32); - - let n = 1; - let mut futures = vec![]; - let k = perm.max() / n; - - for j in (0..).map(|x| x * k).take_while(|&j| j < k * n) { - let max = cmp::min(j+k, perm.max()); - - futures.push(thread::spawn(move|| { - work(perm, j as usize, max as usize) - })) - } - - let mut checksum = 0; - let mut maxflips = 0; - for fut in futures.into_iter() { - let (cs, mf) = fut.join().unwrap(); - checksum += cs; - maxflips = cmp::max(maxflips, mf); - } - (checksum, maxflips) -} - -fn main() { - let n = std::env::args_os().nth(1) - .and_then(|s| s.into_string().ok()) - .and_then(|n| n.parse().ok()) - .unwrap_or(7); - - let (checksum, maxflips) = fannkuch(n); - println!("{}\nPfannkuchen({}) = {}", checksum, n, maxflips); -} diff --git a/third_party/rust/simd/examples/fannkuch-redux.rs b/third_party/rust/simd/examples/fannkuch-redux.rs deleted file mode 100755 index 2e52ae721135..000000000000 --- a/third_party/rust/simd/examples/fannkuch-redux.rs +++ /dev/null @@ -1,233 +0,0 @@ -#![feature(cfg_target_feature)] -extern crate simd; -#[macro_use] extern crate cfg_if; -use simd::u8x16; - -use std::{env, process}; - -cfg_if! { - if #[cfg(target_arch = "aarch64")] { - #[inline(always)] - fn shuffle(x: u8x16, y: u8x16) -> u8x16 { - use simd::aarch64::neon::*; - y.table_lookup_1(x) - } - } else if #[cfg(all(target_arch = "arm", - target_feature = "neon"))] { - #[inline(always)] - fn shuffle(x: u8x16, y: u8x16) -> u8x16 { - use simd::arm::neon::*; - #[inline(always)] - fn split(x: u8x16) -> (u8x8, u8x8) { - unsafe {std::mem::transmute(x)} - } - fn join(x: u8x8, y: u8x8) -> u8x16 { - unsafe {std::mem::transmute((x, y))} - } - - let (t0, t1) = split(x); - let (i0, i1) = split(y); - join(i0.table_lookup_2(t0, t1), - i1.table_lookup_2(t0, t1)) - } - } else if #[cfg(target_feature = "ssse3")] { - #[inline(always)] - fn shuffle(x: u8x16, y: u8x16) -> u8x16 { - use simd::x86::ssse3::*; - x.shuffle_bytes(y) - } - } else { - // slow fallback, so tests work - #[inline(always)] - fn shuffle(x: u8x16, y: u8x16) -> u8x16 { - u8x16::new(x.extract(y.extract(0) as u32), - x.extract(y.extract(1) as u32), - x.extract(y.extract(2) as u32), - x.extract(y.extract(3) as u32), - x.extract(y.extract(4) as u32), - x.extract(y.extract(5) as u32), - x.extract(y.extract(6) as u32), - x.extract(y.extract(7) as u32), - x.extract(y.extract(8) as u32), - x.extract(y.extract(9) as u32), - x.extract(y.extract(10) as u32), - x.extract(y.extract(11) as u32), - x.extract(y.extract(12) as u32), - x.extract(y.extract(13) as u32), - x.extract(y.extract(14) as u32), - x.extract(y.extract(15) as u32)) - } - } -} -struct State { - s: [u8; 16], - flip_masks: [u8x16; 16], - rotate_masks: [u8x16; 16], - - maxflips: i32, - odd: u16, - checksum: i32, -} -impl State { - fn new() -> State { - State { - s: [0; 16], - flip_masks: [u8x16::splat(0); 16], - rotate_masks: [u8x16::splat(0); 16], - - maxflips: 0, - odd: 0, - checksum: 0, - } - } - #[inline(never)] - fn rotate_sisd(&mut self, n: usize) { - let c = self.s[0]; - for i in 1..(n + 1) { - self.s[i - 1] = self.s[i]; - } - self.s[n] = c; - } - #[inline(never)] - fn popmasks(&mut self) { - let mut mask = [0_u8; 16]; - for i in 0..16 { - for j in 0..16 { mask[j] = j as u8; } - - for x in 0..(i+1)/2 { - mask.swap(x, i - x); - } - - self.flip_masks[i] = u8x16::load(&mask, 0); - - for j in 0..16 { self.s[j] = j as u8; } - self.rotate_sisd(i); - self.rotate_masks[i] = self.load_s(); - } - } - fn rotate(&mut self, n: usize) { - shuffle(self.load_s(), self.rotate_masks[n]).store(&mut self.s, 0) - } - - fn load_s(&self) -> u8x16 { - u8x16::load(&self.s, 0) - } - - - #[inline(never)] - fn tk(&mut self, n: usize) { - #[derive(Copy, Clone, Debug)] - struct Perm { - perm: u8x16, - start: u8, - odd: u16 - } - - let mut perms = [Perm { perm: u8x16::splat(0), start: 0 , odd: 0 }; 60]; - - let mut i = 0; - let mut c = [0_u8; 16]; - let mut perm_max = 0; - - while i < n { - while i < n && perm_max < 60 { - self.rotate(i); - if c[i] as usize >= i { - c[i] = 0; - i += 1; - continue - } - - c[i] += 1; - i = 1; - self.odd = !self.odd; - if self.s[0] != 0 { - if self.s[self.s[0] as usize] != 0 { - perms[perm_max].perm = self.load_s(); - perms[perm_max].start = self.s[0]; - perms[perm_max].odd = self.odd; - perm_max += 1; - } else { - if self.maxflips == 0 { self.maxflips = 1 } - self.checksum += if self.odd != 0 { -1 } else { 1 }; - } - } - } - - let mut k = 0; - while k < std::cmp::max(1, perm_max) - 1 { - let pk = &perms[k]; - let pk1 = &perms[k + 1]; - //println!("perm1 {:?}\nperm2 {:?}", pk.perm, pk1.perm); - let mut perm1 = pk.perm; - let mut perm2 = pk1.perm; - - let mut f1 = 0; - let mut f2 = 0; - let mut toterm1 = pk.start; - let mut toterm2 = pk1.start; - - while toterm1 != 0 && toterm2 != 0 { - perm1 = shuffle(perm1, self.flip_masks[toterm1 as usize]); - perm2 = shuffle(perm2, self.flip_masks[toterm2 as usize]); - toterm1 = perm1.extract(0); - toterm2 = perm2.extract(0); - - f1 += 1; f2 += 1; - } - while toterm1 != 0 { - perm1 = shuffle(perm1, self.flip_masks[toterm1 as usize]); - toterm1 = perm1.extract(0); - f1 += 1; - } - while toterm2 != 0 { - perm2 = shuffle(perm2, self.flip_masks[toterm2 as usize]); - toterm2 = perm2.extract(0); - f2 += 1; - } - - if f1 > self.maxflips { self.maxflips = f1 } - if f2 > self.maxflips { self.maxflips = f2 } - self.checksum += if pk.odd != 0 { -f1 } else { f1 }; - self.checksum += if pk1.odd != 0 { -f2 } else { f2 }; - - k += 2; - } - while k < perm_max { - let pk = &perms[k]; - let mut perm = pk.perm; - let mut f = 0; - let mut toterm = pk.start; - while toterm != 0 { - perm = shuffle(perm, self.flip_masks[toterm as usize]); - toterm = perm.extract(0); - f += 1; - } - if f > self.maxflips { self.maxflips = f } - self.checksum += if pk.odd != 0 { -f } else { f }; - k += 1 - } - perm_max = 0; - } - } -} - -fn main() { - let mut state = State::new(); - state.popmasks(); - - let args = env::args().collect::>(); - if args.len() < 2 { - println!("usage: {} number", args[0]); - process::exit(1) - } - let max_n = args[1].parse().unwrap(); - if max_n < 3 || max_n > 15 { - println!("range: must be 3 <= n <= 14"); - process::exit(1); - } - for i in 0..max_n { state.s[i] = i as u8 } - state.tk(max_n); - - println!("{}\nPfannkuchen({}) = {}", state.checksum, max_n, state.maxflips); -} diff --git a/third_party/rust/simd/examples/mandelbrot.rs b/third_party/rust/simd/examples/mandelbrot.rs deleted file mode 100755 index c6f1320a0784..000000000000 --- a/third_party/rust/simd/examples/mandelbrot.rs +++ /dev/null @@ -1,125 +0,0 @@ -#![feature(iterator_step_by, test)] - -extern crate test; -extern crate simd; -use simd::{f32x4, u32x4}; -use std::io::prelude::*; - -#[inline(never)] -fn mandelbrot_naive(c_x: f32, c_y: f32, max_iter: u32) -> u32 { - let mut x = c_x; - let mut y = c_y; - let mut count = 0; - while count < max_iter { - let xy = x * y; - let xx = x * x; - let yy = y * y; - let sum = xx + yy; - if sum > 4.0 { - break - } - count += 1; - x = xx - yy + c_x; - y = xy * 2.0 + c_y; - } - count -} - -#[inline(never)] -fn mandelbrot_vector(c_x: f32x4, c_y: f32x4, max_iter: u32) -> u32x4 { - let mut x = c_x; - let mut y = c_y; - - let mut count = u32x4::splat(0); - for _ in 0..max_iter as usize { - let xy = x * y; - let xx = x * x; - let yy = y * y; - let sum = xx + yy; - let mask = sum.lt(f32x4::splat(4.0)); - - if !mask.any() { break } - count = count + mask.to_i().select(u32x4::splat(1), - u32x4::splat(0)); - - x = xx - yy + c_x; - y = xy + xy + c_y; - } - count -} - -const COLOURS: &'static [(f32, f32, f32)] = &[(0.0, 7.0, 100.0), - (32.0, 107.0, 203.0), - (237.0, 255.0, 255.0), - (255.0, 170.0, 0.0), - (0.0, 2.0, 0.0)]; -const SCALE: f32 = 12.0; -const LIMIT: u32 = 100; - -#[inline(never)] -fn output_one(buf: &mut [u8], val: u32) { - let (r, g, b); - if val == LIMIT { - r = 0; - g = 0; - b = 0; - } else { - let val = (val as f32 % SCALE) * (COLOURS.len() as f32) / SCALE; - let left = val as usize % COLOURS.len(); - let right = (left + 1) % COLOURS.len(); - - let p = val - left as f32; - let (r1, g1, b1) = COLOURS[left]; - let (r2, g2, b2) = COLOURS[right]; - r = (r1 + (r2 - r1) * p) as u8; - g = (g1 + (g2 - g1) * p) as u8; - b = (b1 + (b2 - b1) * p) as u8; - } - buf[0] = r; - buf[1] = g; - buf[2] = b; -} - -fn main() { - let mut args = std::env::args(); - args.next(); - let width = args.next().unwrap().parse().unwrap(); - let height = args.next().unwrap().parse().unwrap(); - - let left = -2.2; - let right = left + 3.0; - let top = 1.0; - let bottom = top - 2.0; - - let width_step: f32 = (right - left) / width as f32; - let height_step: f32 = (bottom - top) / height as f32; - - let adjust = f32x4::splat(width_step) * f32x4::new(0., 1., 2., 3.); - - println!("P6 {} {} 255", width, height); - let mut line = vec![0; width * 3]; - - if args.next().is_none() { - for i in 0..height { - let y = f32x4::splat(top + height_step * i as f32); - for j in (0..width).step_by(4) { - let x = f32x4::splat(left + width_step * j as f32) + adjust; - let ret = mandelbrot_vector(x, y, LIMIT); - test::black_box(ret); - for k in 0..4 { let val = ret.extract(k as u32); output_one(&mut line[3*(j + k)..3*(j + k + 1)], val); } - } - ::std::io::stdout().write(&line).unwrap(); - } - } else { - for i in 0..height { - let y = top + height_step * i as f32; - for j in 0..width { - let x = left + width_step * j as f32; - let val = mandelbrot_naive(x, y, LIMIT); - test::black_box(val); - output_one(&mut line[3*j..3*(j + 1)], val); - } - ::std::io::stdout().write(&line).unwrap(); - } - } -} diff --git a/third_party/rust/simd/examples/matrix-inverse.rs b/third_party/rust/simd/examples/matrix-inverse.rs deleted file mode 100644 index e6eb7ffc4655..000000000000 --- a/third_party/rust/simd/examples/matrix-inverse.rs +++ /dev/null @@ -1,281 +0,0 @@ -extern crate simd; -use simd::f32x4; - -fn mul(x: &[f32x4; 4], y: &[f32x4; 4]) -> [f32x4; 4] { - let y0 = y[0]; - let y1 = y[1]; - let y2 = y[2]; - let y3 = y[3]; - [f32x4::splat(y0.extract(0)) * x[0] + - f32x4::splat(y0.extract(1)) * x[1] + - f32x4::splat(y0.extract(2)) * x[2] + - f32x4::splat(y0.extract(3)) * x[3], - f32x4::splat(y1.extract(0)) * x[0] + - f32x4::splat(y1.extract(1)) * x[1] + - f32x4::splat(y1.extract(2)) * x[2] + - f32x4::splat(y1.extract(3)) * x[3], - f32x4::splat(y2.extract(0)) * x[0] + - f32x4::splat(y2.extract(1)) * x[1] + - f32x4::splat(y2.extract(2)) * x[2] + - f32x4::splat(y2.extract(3)) * x[3], - f32x4::splat(y3.extract(0)) * x[0] + - f32x4::splat(y3.extract(1)) * x[1] + - f32x4::splat(y3.extract(2)) * x[2] + - f32x4::splat(y3.extract(3)) * x[3], - ] -} - -#[allow(dead_code)] -fn inverse_naive(x: &[[f32; 4]; 4]) -> [[f32; 4]; 4] { - let mut t = [[0_f32; 4]; 4]; - for i in 0..4 { - t[0][i] = x[i][0]; - t[1][i] = x[i][1]; - t[2][i] = x[i][2]; - t[3][i] = x[i][3]; - } - println!("{:?}", t); - - let _0 = t[2][2] * t[3][3]; - let _1 = t[2][3] * t[3][2]; - let _2 = t[2][1] * t[3][3]; - let _3 = t[2][3] * t[3][1]; - let _4 = t[2][1] * t[3][2]; - let _5 = t[2][2] * t[3][1]; - let _6 = t[2][0] * t[3][3]; - let _7 = t[2][3] * t[3][0]; - let _8 = t[2][0] * t[3][2]; - let _9 = t[2][2] * t[3][0]; - let _10 = t[2][0] * t[3][1]; - let _11 = t[2][1] * t[3][0]; - let v = [_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11]; - println!("{:?}", v); - - let d00 = _0 * t[1][1] + _3 * t[1][2] + _4 * t[1][3] - - (_1 * t[1][1] + _2 * t[1][2] + _5 * t[1][3]); - let d01 = _1 * t[1][0] + _6 * t[1][2] + _9 * t[1][3] - - (_0 * t[1][0] + _7 * t[1][2] + _8 * t[1][3]); - let d02 = _2 * t[1][0] + _7 * t[1][1] + _10 * t[1][3] - - (_3 * t[1][0] + _6 * t[1][1] + _11 * t[1][3]); - let d03 = _5 * t[1][0] + _8 * t[1][1] + _11 * t[1][2] - - (_4 * t[1][0] + _9 * t[1][1] + _10 * t[1][2]); - let d10 = _1 * t[0][1] + _2 * t[0][2] + _5 * t[0][3] - - (_0 * t[0][1] + _3 * t[0][2] + _4 * t[0][3]); - let d11 = _0 * t[0][0] + _7 * t[0][2] + _8 * t[0][3] - - (_1 * t[0][0] + _6 * t[0][2] + _9 * t[0][3]); - let d12 = _3 * t[0][0] + _6 * t[0][1] + _11 * t[0][3] - - (_2 * t[0][0] + _7 * t[0][1] + _10 * t[0][3]); - let d13 = _4 * t[0][0] + _9 * t[0][1] + _10 * t[0][2] - - (_5 * t[0][0] + _8 * t[0][1] + _11 * t[0][2]); - - println!("{:?}", [d00, d01, d02, d03, d10, d11, d12, d13]); - - let _0 = t[0][2] * t[1][3]; - let _1 = t[0][3] * t[1][2]; - let _2 = t[0][1] * t[1][3]; - let _3 = t[0][3] * t[1][1]; - let _4 = t[0][1] * t[1][2]; - let _5 = t[0][2] * t[1][1]; - let _6 = t[0][0] * t[1][3]; - let _7 = t[0][3] * t[1][0]; - let _8 = t[0][0] * t[1][2]; - let _9 = t[0][2] * t[1][0]; - let _10 = t[0][0] * t[1][1]; - let _11 = t[0][1] * t[1][0]; - let v = [_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11]; - println!("{:?}", v); - - let d20 = _0*t[3][1] + _3*t[3][2] + _4*t[3][3]- - (_1*t[3][1] + _2*t[3][2] + _5*t[3][3]); - let d21 = _1*t[3][0] + _6*t[3][2] + _9*t[3][3]- - (_0*t[3][0] + _7*t[3][2] + _8*t[3][3]); - let d22 = _2*t[3][0] + _7*t[3][1] + _10*t[3][3]- - (_3*t[3][0] + _6*t[3][1] + _11*t[3][3]); - let d23 = _5*t[3][0] + _8*t[3][1] + _11*t[3][2]- - (_4*t[3][0] + _9*t[3][1] + _10*t[3][2]); - let d30 = _2*t[2][2] + _5*t[2][3] + _1*t[2][1]- - (_4*t[2][3] + _0*t[2][1] + _3*t[2][2]); - let d31 = _8*t[2][3] + _0*t[2][0] + _7*t[2][2]- - (_6*t[2][2] + _9*t[2][3] + _1*t[2][0]); - let d32 = _6*t[2][1] + _11*t[2][3] + _3*t[2][0]- - (_10*t[2][3] + _2*t[2][0] + _7*t[2][1]); - let d33 = _10*t[2][2] + _4*t[2][0] + _9*t[2][1]- - (_8*t[2][1] + _11*t[2][2] + _5*t[2][0]); - - println!("{:?}", [d20, d21, d22, d23, d30, d31, d32, d33]); - - let det = t[0][0] * d00 + t[0][1] * d01 + t[0][2] * d02 + t[0][3] * d03; - - let det = 1.0 / det; - let mut ret = [[d00, d01, d02, d03], - [d10, d11, d12, d13], - [d20, d21, d22, d23], - [d30, d31, d32, d33]]; - for i in 0..4 { - for j in 0..4 { - ret[i][j] *= det; - } - } - ret -} - -fn inverse_simd4(x: &[f32x4; 4]) -> [f32x4; 4] { - let src0 = x[0]; - let src1 = x[1]; - let src2 = x[2]; - let src3 = x[3]; - - let tmp1 = f32x4::new(src0.extract(0), src0.extract(1), - src1.extract(4 - 4), src1.extract(5 - 4)); - let row1 = f32x4::new(src2.extract(0), src2.extract(1), - src3.extract(4 - 4), src3.extract(5 - 4)); - let row0 = f32x4::new(tmp1.extract(0), tmp1.extract(2), - row1.extract(4 - 4), row1.extract(6 - 4)); - let row1 = f32x4::new(row1.extract(1), row1.extract(3), - tmp1.extract(5 - 4), tmp1.extract(7 - 4)); - - let tmp1 = f32x4::new(src0.extract(2), src0.extract(3), - src1.extract(6 - 4), src1.extract(7 - 4)); - let row3 = f32x4::new(src2.extract(2), src2.extract(3), - src3.extract(6 - 4), src3.extract(7 - 4)); - let row2 = f32x4::new(tmp1.extract(0), tmp1.extract(2), - row3.extract(4 - 4), row3.extract(6 - 4)); - let row3 = f32x4::new(row3.extract(1), row3.extract(3), - tmp1.extract(5 - 4), tmp1.extract(7 - 4)); - - - let tmp1 = row2 * row3; - let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0), - tmp1.extract(3), tmp1.extract(2)); - let minor0 = row1 * tmp1; - let minor1 = row0 * tmp1; - let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3), - tmp1.extract(0), tmp1.extract(1)); - let minor0 = (row1 * tmp1) - minor0; - let minor1 = (row0 * tmp1) - minor1; - let minor1 = f32x4::new(minor1.extract(2), minor1.extract(3), - minor1.extract(0), minor1.extract(1)); - //println!("{:?}", minor1); - - - let tmp1 = row1 * row2; - let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0), - tmp1.extract(3), tmp1.extract(2)); - let minor0 = (row3 * tmp1) + minor0; - let minor3 = row0 * tmp1; - let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3), - tmp1.extract(0), tmp1.extract(1)); - - let minor0 = minor0 - row3 * tmp1; - let minor3 = row0 * tmp1 - minor3; - let minor3 = f32x4::new(minor3.extract(2), minor3.extract(3), - minor3.extract(0), minor3.extract(1)); - //println!("{:?}", minor1); - - - let tmp1 = row3 * f32x4::new(row1.extract(2), row1.extract(3), - row1.extract(0), row1.extract(1)); - let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0), - tmp1.extract(3), tmp1.extract(2)); - let row2 = f32x4::new(row2.extract(2), row2.extract(3), - row2.extract(0), row2.extract(1)); - let minor0 = row2 * tmp1 + minor0; - let minor2 = row0 * tmp1; - let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3), - tmp1.extract(0), tmp1.extract(1)); - let minor0 = minor0 - row2 * tmp1; - let minor2 = row0 * tmp1 - minor2; - let minor2 = f32x4::new(minor2.extract(2), minor2.extract(3), - minor2.extract(0), minor2.extract(1)); - //println!("{:?}", minor1); - - - let tmp1 = row0 * row1; - let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0), - tmp1.extract(3), tmp1.extract(2)); - let minor2 = minor2 + row3 * tmp1; - let minor3 = row2 * tmp1 - minor3; - let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3), - tmp1.extract(0), tmp1.extract(1)); - let minor2 = row3 * tmp1 - minor2; - let minor3 = minor3 - row2 * tmp1; - //println!("{:?}", minor1); - - - - let tmp1 = row0 * row3; - let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0), - tmp1.extract(3), tmp1.extract(2)); - let minor1 = minor1 - row2 * tmp1; - let minor2 = row1 * tmp1 + minor2; - let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3), - tmp1.extract(0), tmp1.extract(1)); - let minor1 = row2 * tmp1 + minor1; - let minor2 = minor2 - row1 * tmp1; - //println!("{:?}", minor1); - - let tmp1 = row0 * row2; - let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0), - tmp1.extract(3), tmp1.extract(2)); - let minor1 = row3 * tmp1 + minor1; - let minor3 = minor3 - row1 * tmp1; - let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3), - tmp1.extract(0), tmp1.extract(1)); - let minor1 = minor1 - row3 * tmp1; - let minor3 = row1 * tmp1 + minor3; - //println!("{:?}", minor1); - - let det = row0 * minor0; - let det = f32x4::new(det.extract(2), det.extract(3), - det.extract(0), det.extract(1)) + det; - let det = f32x4::new(det.extract(1), det.extract(0), - det.extract(3), det.extract(2)) + det; - let tmp1 = det.approx_reciprocal(); - let det = tmp1 + tmp1 - det * tmp1 * tmp1; - -// let det = f32x4::splat(det.extract(0)); - - [minor0 * det, minor1 * det, minor2 * det, minor3 * det] -} - -fn p(x: &[f32x4; 4]) { - for xx in x { - for i in 0..4 { - let v = xx.extract(i); - if v == 0.0 { - print!("{}{:6.2}", if i > 0 {", "} else {"|"}, ""); - } else { - print!("{}{:6.2}", if i > 0 {", "} else {"|"}, xx.extract(i)); - } - } - println!(" |"); - } -} - -fn main() { - let x = [f32x4::new(-100.0, 6.0, 100.0, 1.0), - f32x4::new(3.0, 1.0, 0.0, 1.0), - f32x4::new(2.0, 1.0, 1.0, 1.0), - f32x4::new(-10.0, 1.0, 1.0, 1.0)]; - - /* let mut x_ = [[0.0; 4]; 4]; - for i in 0..4 { - for j in 0..4 { - x_[i][j] = x[i].extract(j as u32) - } - } - - let ret = inverse_naive(&x_); - let mut y = [f32x4::splat(0.0); 4]; - for i in 0..4 { - for j in 0..4 { - y[i] = y[i].replace(j as u32, ret[i][j]) - } -}*/ - let y = inverse_simd4(&x); - p(&x); - println!(""); - p(&y); - println!(""); - p(&mul(&x, &y)) -} diff --git a/third_party/rust/simd/examples/nbody-nosimd.rs b/third_party/rust/simd/examples/nbody-nosimd.rs deleted file mode 100644 index d5f1bb422ff2..000000000000 --- a/third_party/rust/simd/examples/nbody-nosimd.rs +++ /dev/null @@ -1,156 +0,0 @@ -// The Computer Language Benchmarks Game -// http://benchmarksgame.alioth.debian.org/ -// -// contributed by the Rust Project Developers -// contributed by TeXitoi - -const PI: f64 = 3.141592653589793; -const SOLAR_MASS: f64 = 4.0 * PI * PI; -const YEAR: f64 = 365.24; -const N_BODIES: usize = 5; - -static BODIES: [Planet;N_BODIES] = [ - // Sun - Planet { - x: 0.0, y: 0.0, z: 0.0, - vx: 0.0, vy: 0.0, vz: 0.0, - mass: SOLAR_MASS, - }, - // Jupiter - Planet { - x: 4.84143144246472090e+00, - y: -1.16032004402742839e+00, - z: -1.03622044471123109e-01, - vx: 1.66007664274403694e-03 * YEAR, - vy: 7.69901118419740425e-03 * YEAR, - vz: -6.90460016972063023e-05 * YEAR, - mass: 9.54791938424326609e-04 * SOLAR_MASS, - }, - // Saturn - Planet { - x: 8.34336671824457987e+00, - y: 4.12479856412430479e+00, - z: -4.03523417114321381e-01, - vx: -2.76742510726862411e-03 * YEAR, - vy: 4.99852801234917238e-03 * YEAR, - vz: 2.30417297573763929e-05 * YEAR, - mass: 2.85885980666130812e-04 * SOLAR_MASS, - }, - // Uranus - Planet { - x: 1.28943695621391310e+01, - y: -1.51111514016986312e+01, - z: -2.23307578892655734e-01, - vx: 2.96460137564761618e-03 * YEAR, - vy: 2.37847173959480950e-03 * YEAR, - vz: -2.96589568540237556e-05 * YEAR, - mass: 4.36624404335156298e-05 * SOLAR_MASS, - }, - // Neptune - Planet { - x: 1.53796971148509165e+01, - y: -2.59193146099879641e+01, - z: 1.79258772950371181e-01, - vx: 2.68067772490389322e-03 * YEAR, - vy: 1.62824170038242295e-03 * YEAR, - vz: -9.51592254519715870e-05 * YEAR, - mass: 5.15138902046611451e-05 * SOLAR_MASS, - }, -]; - -#[derive(Clone, Copy)] -struct Planet { - x: f64, y: f64, z: f64, - vx: f64, vy: f64, vz: f64, - mass: f64, -} - -fn advance(bodies: &mut [Planet;N_BODIES], dt: f64, steps: i32) { - for _ in 0..steps { - let mut b_slice: &mut [_] = bodies; - loop { - let bi = match shift_mut_ref(&mut b_slice) { - Some(bi) => bi, - None => break - }; - for bj in b_slice.iter_mut() { - let dx = bi.x - bj.x; - let dy = bi.y - bj.y; - let dz = bi.z - bj.z; - - let d2 = dx * dx + dy * dy + dz * dz; - let mag = dt / (d2 * d2.sqrt()); - - let massj_mag = bj.mass * mag; - bi.vx -= dx * massj_mag; - bi.vy -= dy * massj_mag; - bi.vz -= dz * massj_mag; - - let massi_mag = bi.mass * mag; - bj.vx += dx * massi_mag; - bj.vy += dy * massi_mag; - bj.vz += dz * massi_mag; - } - bi.x += dt * bi.vx; - bi.y += dt * bi.vy; - bi.z += dt * bi.vz; - } - } -} - -fn energy(bodies: &[Planet;N_BODIES]) -> f64 { - let mut e = 0.0; - let mut bodies = bodies.iter(); - loop { - let bi = match bodies.next() { - Some(bi) => bi, - None => break - }; - e += (bi.vx * bi.vx + bi.vy * bi.vy + bi.vz * bi.vz) * bi.mass / 2.0; - for bj in bodies.clone() { - let dx = bi.x - bj.x; - let dy = bi.y - bj.y; - let dz = bi.z - bj.z; - let dist = (dx * dx + dy * dy + dz * dz).sqrt(); - e -= bi.mass * bj.mass / dist; - } - } - e -} - -fn offset_momentum(bodies: &mut [Planet;N_BODIES]) { - let mut px = 0.0; - let mut py = 0.0; - let mut pz = 0.0; - for bi in bodies.iter() { - px += bi.vx * bi.mass; - py += bi.vy * bi.mass; - pz += bi.vz * bi.mass; - } - let sun = &mut bodies[0]; - sun.vx = - px / SOLAR_MASS; - sun.vy = - py / SOLAR_MASS; - sun.vz = - pz / SOLAR_MASS; -} - -fn main() { - let n = std::env::args().nth(1).expect("need one arg").parse().unwrap(); - let mut bodies = BODIES; - - offset_momentum(&mut bodies); - println!("{:.9}", energy(&bodies)); - - advance(&mut bodies, 0.01, n); - - println!("{:.9}", energy(&bodies)); -} - -/// Pop a mutable reference off the head of a slice, mutating the slice to no -/// longer contain the mutable reference. -fn shift_mut_ref<'a, T>(r: &mut &'a mut [T]) -> Option<&'a mut T> { - if r.len() == 0 { return None } - let tmp = std::mem::replace(r, &mut []); - let (h, t) = tmp.split_at_mut(1); - *r = t; - Some(&mut h[0]) -} diff --git a/third_party/rust/simd/examples/nbody.rs b/third_party/rust/simd/examples/nbody.rs deleted file mode 100755 index d6d4e88e3741..000000000000 --- a/third_party/rust/simd/examples/nbody.rs +++ /dev/null @@ -1,170 +0,0 @@ -#![feature(cfg_target_feature)] - -extern crate simd; - -#[cfg(target_feature = "sse2")] -use simd::x86::sse2::*; -#[cfg(target_arch = "aarch64")] -use simd::aarch64::neon::*; - -const PI: f64 = 3.141592653589793; -const SOLAR_MASS: f64 = 4.0 * PI * PI; -const DAYS_PER_YEAR: f64 = 365.24; - -struct Body { - x: [f64; 3], - _fill: f64, - v: [f64; 3], - mass: f64, -} - -impl Body { - fn new(x0: f64, x1: f64, x2: f64, - v0: f64, v1: f64, v2: f64, - mass: f64) -> Body { - Body { - x: [x0, x1, x2], - _fill: 0.0, - v: [v0, v1, v2], - mass: mass, - } - } -} - -const N_BODIES: usize = 5; -const N: usize = N_BODIES * (N_BODIES - 1) / 2; -fn offset_momentum(bodies: &mut [Body; N_BODIES]) { - let (sun, rest) = bodies.split_at_mut(1); - let sun = &mut sun[0]; - for body in rest { - for k in 0..3 { - sun.v[k] -= body.v[k] * body.mass / SOLAR_MASS; - } - } -} -fn advance(bodies: &mut [Body; N_BODIES], dt: f64) { - let mut r = [[0.0; 4]; N]; - let mut mag = [0.0; N]; - - let mut dx = [f64x2::splat(0.0); 3]; - let mut dsquared; - let mut distance; - let mut dmag; - - let mut i = 0; - for j in 0..N_BODIES { - for k in j+1..N_BODIES { - for m in 0..3 { - r[i][m] = bodies[j].x[m] - bodies[k].x[m]; - } - i += 1; - } - } - - i = 0; - while i < N { - for m in 0..3 { - dx[m] = f64x2::new(r[i][m], r[i+1][m]); - } - - dsquared = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - distance = dsquared.to_f32().approx_rsqrt().to_f64(); - for _ in 0..2 { - distance = distance * f64x2::splat(1.5) - - ((f64x2::splat(0.5) * dsquared) * distance) * (distance * distance) - } - dmag = f64x2::splat(dt) / dsquared * distance; - dmag.store(&mut mag, i); - - i += 2; - } - - i = 0; - for j in 0..N_BODIES { - for k in j+1..N_BODIES { - for m in 0..3 { - bodies[j].v[m] -= r[i][m] * bodies[k].mass * mag[i]; - bodies[k].v[m] += r[i][m] * bodies[j].mass * mag[i]; - } - i += 1 - } - } - for body in bodies { - for m in 0..3 { - body.x[m] += dt * body.v[m] - } - } -} - -fn energy(bodies: &[Body; N_BODIES]) -> f64 { - let mut e = 0.0; - for i in 0..N_BODIES { - let bi = &bodies[i]; - e += bi.mass * (bi.v[0] * bi.v[0] + bi.v[1] * bi.v[1] + bi.v[2] * bi.v[2]) / 2.0; - for j in i+1..N_BODIES { - let bj = &bodies[j]; - let mut dx = [0.0; 3]; - for k in 0..3 { - dx[k] = bi.x[k] - bj.x[k]; - } - let mut distance = 0.0; - for &d in &dx { distance += d * d } - e -= bi.mass * bj.mass / distance.sqrt() - } - } - e -} - -fn main() { - let mut bodies: [Body; N_BODIES] = [ - /* sun */ - Body::new(0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, - SOLAR_MASS), - /* jupiter */ - Body::new(4.84143144246472090e+00, - -1.16032004402742839e+00, - -1.03622044471123109e-01 , - 1.66007664274403694e-03 * DAYS_PER_YEAR, - 7.69901118419740425e-03 * DAYS_PER_YEAR, - -6.90460016972063023e-05 * DAYS_PER_YEAR , - 9.54791938424326609e-04 * SOLAR_MASS - ), - /* saturn */ - Body::new(8.34336671824457987e+00, - 4.12479856412430479e+00, - -4.03523417114321381e-01 , - -2.76742510726862411e-03 * DAYS_PER_YEAR, - 4.99852801234917238e-03 * DAYS_PER_YEAR, - 2.30417297573763929e-05 * DAYS_PER_YEAR , - 2.85885980666130812e-04 * SOLAR_MASS - ), - /* uranus */ - Body::new(1.28943695621391310e+01, - -1.51111514016986312e+01, - -2.23307578892655734e-01 , - 2.96460137564761618e-03 * DAYS_PER_YEAR, - 2.37847173959480950e-03 * DAYS_PER_YEAR, - -2.96589568540237556e-05 * DAYS_PER_YEAR , - 4.36624404335156298e-05 * SOLAR_MASS - ), - /* neptune */ - Body::new(1.53796971148509165e+01, - -2.59193146099879641e+01, - 1.79258772950371181e-01 , - 2.68067772490389322e-03 * DAYS_PER_YEAR, - 1.62824170038242295e-03 * DAYS_PER_YEAR, - -9.51592254519715870e-05 * DAYS_PER_YEAR , - 5.15138902046611451e-05 * SOLAR_MASS - ) - ]; - - let n: usize = std::env::args().nth(1).expect("need one arg").parse().unwrap(); - - offset_momentum(&mut bodies); - println!("{:.9}", energy(&bodies)); - for _ in 0..n { - advance(&mut bodies, 0.01); - } - println!("{:.9}", energy(&bodies)); -} diff --git a/third_party/rust/simd/examples/ops.rs b/third_party/rust/simd/examples/ops.rs deleted file mode 100644 index f8c919101e3c..000000000000 --- a/third_party/rust/simd/examples/ops.rs +++ /dev/null @@ -1,10 +0,0 @@ -extern crate simd; - -use simd::*; - -#[allow(unused_variables)] -fn main() { - let x = i32x4::splat(1_i32); - let y = -x; - let z = !x; -} diff --git a/third_party/rust/simd/examples/spectral-norm-nosimd.rs b/third_party/rust/simd/examples/spectral-norm-nosimd.rs deleted file mode 100644 index 919f9c61990f..000000000000 --- a/third_party/rust/simd/examples/spectral-norm-nosimd.rs +++ /dev/null @@ -1,106 +0,0 @@ -// The Computer Language Benchmarks Game -// http://benchmarksgame.alioth.debian.org/ -// -// contributed by the Rust Project Developers -// contributed by TeXitoi - -#![allow(non_snake_case)] - -use std::iter::repeat; -//use std::thread; - -// As std::simd::f64x2 is unstable, we provide a similar interface, -// expecting llvm to autovectorize its usage. -#[allow(non_camel_case_types)] -struct f64x2(f64, f64); -impl std::ops::Add for f64x2 { - type Output = Self; - fn add(self, rhs: Self) -> Self { - f64x2(self.0 + rhs.0, self.1 + rhs.1) - } -} -impl std::ops::Div for f64x2 { - type Output = Self; - fn div(self, rhs: Self) -> Self { - f64x2(self.0 / rhs.0, self.1 / rhs.1) - } -} - -fn main() { - let n: usize = std::env::args().nth(1).expect("need one arg").parse().unwrap(); - let answer = spectralnorm(n); - println!("{:.9}", answer); -} - -fn spectralnorm(n: usize) -> f64 { - assert!(n % 2 == 0, "only even lengths are accepted"); - let mut u = repeat(1.0).take(n).collect::>(); - let mut v = u.clone(); - let mut tmp = v.clone(); - for _ in 0..10 { - mult_AtAv(&u, &mut v, &mut tmp); - mult_AtAv(&v, &mut u, &mut tmp); - } - (dot(&u, &v) / dot(&v, &v)).sqrt() -} - -fn mult_AtAv(v: &[f64], out: &mut [f64], tmp: &mut [f64]) { - mult_Av(v, tmp); - mult_Atv(tmp, out); -} - -fn mult_Av(v: &[f64], out: &mut [f64]) { - parallel(out, |start, out| mult(v, out, start, |i, j| A(i, j))); -} - -fn mult_Atv(v: &[f64], out: &mut [f64]) { - parallel(out, |start, out| mult(v, out, start, |i, j| A(j, i))); -} - -fn mult(v: &[f64], out: &mut [f64], start: usize, a: F) - where F: Fn(usize, usize) -> f64 { - for (i, slot) in out.iter_mut().enumerate().map(|(i, s)| (i + start, s)) { - let mut sum = f64x2(0.0, 0.0); - for (j, chunk) in v.chunks(2).enumerate().map(|(j, s)| (2 * j, s)) { - let top = f64x2(chunk[0], chunk[1]); - let bot = f64x2(a(i, j), a(i, j + 1)); - sum = sum + top / bot; - } - let f64x2(a, b) = sum; - *slot = a + b; - } -} - -fn A(i: usize, j: usize) -> f64 { - ((i + j) * (i + j + 1) / 2 + i + 1) as f64 -} - -fn dot(v: &[f64], u: &[f64]) -> f64 { - v.iter().zip(u.iter()).map(|(a, b)| *a * *b).fold(0., |acc, i| acc + i) -} - -//struct Racy(T); -//unsafe impl Send for Racy {} - -// Executes a closure in parallel over the given mutable slice. The closure `f` -// is run in parallel and yielded the starting index within `v` as well as a -// sub-slice of `v`. -fn parallel<'a, T, F>(v: &mut [T], ref f: F) - where T: 'static + Send + Sync, -F: Fn(usize, &mut [T]) + Sync -{ - f(0, v); - /*let size = v.len() / 4 + 1; - let jhs = v.chunks_mut(size).enumerate().map(|(i, chunk)| { - // Need to convert `f` and `chunk` to something that can cross the task - // boundary. - let f = Racy(f as *const F as *const usize); - let raw = Racy((&mut chunk[0] as *mut T, chunk.len())); - thread::spawn(move|| { - let f = f.0 as *const F; - let raw = raw.0; - unsafe { (*f)(i * size, std::slice::from_raw_parts_mut(raw.0, raw.1)) } - }) - }).collect::>(); - for jh in jhs { jh.join().unwrap(); }*/ -} diff --git a/third_party/rust/simd/examples/spectral-norm.rs b/third_party/rust/simd/examples/spectral-norm.rs deleted file mode 100755 index 656f52e4fad0..000000000000 --- a/third_party/rust/simd/examples/spectral-norm.rs +++ /dev/null @@ -1,74 +0,0 @@ -#![feature(cfg_target_feature)] -#![allow(non_snake_case)] - -extern crate simd; - -#[cfg(target_feature = "sse2")] -use simd::x86::sse2::f64x2; -#[cfg(target_arch = "aarch64")] -use simd::aarch64::neon::f64x2; - -fn A(i: usize, j: usize) -> f64 { - ((i + j) * (i + j + 1) / 2 + i + 1) as f64 -} - -fn dot(x: &[f64], y: &[f64]) -> f64 { - x.iter().zip(y).map(|(&x, &y)| x * y).fold(0.0, |a, b| a + b) -} - -fn mult_Av(v: &[f64], out: &mut [f64]) { - assert!(v.len() == out.len()); - assert!(v.len() % 2 == 0); - - for i in 0..v.len() { - let mut sum = f64x2::splat(0.0); - - let mut j = 0; - while j < v.len() { - let b = f64x2::load(v, j); - let a = f64x2::new(A(i, j), A(i, j + 1)); - sum = sum + b / a; - j += 2 - } - out[i] = sum.extract(0) + sum.extract(1); - } -} - -fn mult_Atv(v: &[f64], out: &mut [f64]) { - assert!(v.len() == out.len()); - assert!(v.len() % 2 == 0); - - for i in 0..v.len() { - let mut sum = f64x2::splat(0.0); - - let mut j = 0; - while j < v.len() { - let b = f64x2::load(v, j); - let a = f64x2::new(A(j, i), A(j + 1, i)); - sum = sum + b / a; - j += 2 - } - out[i] = sum.extract(0) + sum.extract(1); - } -} - -fn mult_AtAv(v: &[f64], out: &mut [f64], tmp: &mut [f64]) { - mult_Av(v, tmp); - mult_Atv(tmp, out); -} - -fn main() { - let mut n: usize = std::env::args().nth(1).expect("need one arg").parse().unwrap(); - if n % 2 == 1 { n += 1 } - - let mut u = vec![1.0; n]; - let mut v = u.clone(); - let mut tmp = u.clone(); - - for _ in 0..10 { - mult_AtAv(&u, &mut v, &mut tmp); - mult_AtAv(&v, &mut u, &mut tmp); - } - - println!("{:.9}", (dot(&u, &v) / dot(&v, &v)).sqrt()); -} diff --git a/third_party/rust/simd/src/aarch64/mod.rs b/third_party/rust/simd/src/aarch64/mod.rs deleted file mode 100644 index 5ba0a302b4d1..000000000000 --- a/third_party/rust/simd/src/aarch64/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -//! Features specific to AArch64 CPUs. - -pub mod neon; diff --git a/third_party/rust/simd/src/aarch64/neon.rs b/third_party/rust/simd/src/aarch64/neon.rs deleted file mode 100644 index 0cca05a52788..000000000000 --- a/third_party/rust/simd/src/aarch64/neon.rs +++ /dev/null @@ -1,681 +0,0 @@ -use super::super::*; -use {simd_cast, f32x2}; - -pub use sixty_four::{f64x2, i64x2, u64x2, bool64ix2, bool64fx2}; -#[repr(simd)] -#[derive(Copy, Clone)] -pub struct u32x2(u32, u32); -#[repr(simd)] -#[derive(Copy, Clone)] -pub struct i32x2(i32, i32); - -#[repr(simd)] -#[derive(Copy, Clone)] -pub struct u16x4(u16, u16, u16, u16); -#[repr(simd)] -#[derive(Copy, Clone)] -pub struct i16x4(i16, i16, i16, i16); -#[repr(simd)] -#[derive(Copy, Clone)] -pub struct u8x8(u8, u8, u8, u8, - u8, u8, u8, u8); -#[repr(simd)] -#[derive(Copy, Clone)] -pub struct i8x8(i8, i8, i8, i8, - i8, i8, i8, i8); - -#[repr(simd)] -#[derive(Copy, Clone)] -pub struct i64x1(i64); -#[repr(simd)] -#[derive(Copy, Clone)] -pub struct u64x1(u64); -#[repr(simd)] -#[derive(Copy, Clone)] -pub struct f64x1(f64); - -#[allow(dead_code)] -extern "platform-intrinsic" { - fn aarch64_vhadd_s8(x: i8x8, y: i8x8) -> i8x8; - fn aarch64_vhadd_u8(x: u8x8, y: u8x8) -> u8x8; - fn aarch64_vhadd_s16(x: i16x4, y: i16x4) -> i16x4; - fn aarch64_vhadd_u16(x: u16x4, y: u16x4) -> u16x4; - fn aarch64_vhadd_s32(x: i32x2, y: i32x2) -> i32x2; - fn aarch64_vhadd_u32(x: u32x2, y: u32x2) -> u32x2; - fn aarch64_vhaddq_s8(x: i8x16, y: i8x16) -> i8x16; - fn aarch64_vhaddq_u8(x: u8x16, y: u8x16) -> u8x16; - fn aarch64_vhaddq_s16(x: i16x8, y: i16x8) -> i16x8; - fn aarch64_vhaddq_u16(x: u16x8, y: u16x8) -> u16x8; - fn aarch64_vhaddq_s32(x: i32x4, y: i32x4) -> i32x4; - fn aarch64_vhaddq_u32(x: u32x4, y: u32x4) -> u32x4; - fn aarch64_vrhadd_s8(x: i8x8, y: i8x8) -> i8x8; - fn aarch64_vrhadd_u8(x: u8x8, y: u8x8) -> u8x8; - fn aarch64_vrhadd_s16(x: i16x4, y: i16x4) -> i16x4; - fn aarch64_vrhadd_u16(x: u16x4, y: u16x4) -> u16x4; - fn aarch64_vrhadd_s32(x: i32x2, y: i32x2) -> i32x2; - fn aarch64_vrhadd_u32(x: u32x2, y: u32x2) -> u32x2; - fn aarch64_vrhaddq_s8(x: i8x16, y: i8x16) -> i8x16; - fn aarch64_vrhaddq_u8(x: u8x16, y: u8x16) -> u8x16; - fn aarch64_vrhaddq_s16(x: i16x8, y: i16x8) -> i16x8; - fn aarch64_vrhaddq_u16(x: u16x8, y: u16x8) -> u16x8; - fn aarch64_vrhaddq_s32(x: i32x4, y: i32x4) -> i32x4; - fn aarch64_vrhaddq_u32(x: u32x4, y: u32x4) -> u32x4; - fn aarch64_vqadd_s8(x: i8x8, y: i8x8) -> i8x8; - fn aarch64_vqadd_u8(x: u8x8, y: u8x8) -> u8x8; - fn aarch64_vqadd_s16(x: i16x4, y: i16x4) -> i16x4; - fn aarch64_vqadd_u16(x: u16x4, y: u16x4) -> u16x4; - fn aarch64_vqadd_s32(x: i32x2, y: i32x2) -> i32x2; - fn aarch64_vqadd_u32(x: u32x2, y: u32x2) -> u32x2; - fn aarch64_vqadd_s64(x: i64x1, y: i64x1) -> i64x1; - fn aarch64_vqadd_u64(x: u64x1, y: u64x1) -> u64x1; - fn aarch64_vqaddq_s8(x: i8x16, y: i8x16) -> i8x16; - fn aarch64_vqaddq_u8(x: u8x16, y: u8x16) -> u8x16; - fn aarch64_vqaddq_s16(x: i16x8, y: i16x8) -> i16x8; - fn aarch64_vqaddq_u16(x: u16x8, y: u16x8) -> u16x8; - fn aarch64_vqaddq_s32(x: i32x4, y: i32x4) -> i32x4; - fn aarch64_vqaddq_u32(x: u32x4, y: u32x4) -> u32x4; - fn aarch64_vqaddq_s64(x: i64x2, y: i64x2) -> i64x2; - fn aarch64_vqaddq_u64(x: u64x2, y: u64x2) -> u64x2; - fn aarch64_vuqadd_s8(x: i8x16, y: u8x16) -> i8x16; - fn aarch64_vuqadd_s16(x: i16x8, y: u16x8) -> i16x8; - fn aarch64_vuqadd_s32(x: i32x4, y: u32x4) -> i32x4; - fn aarch64_vuqadd_s64(x: i64x2, y: u64x2) -> i64x2; - fn aarch64_vsqadd_u8(x: u8x16, y: i8x16) -> u8x16; - fn aarch64_vsqadd_u16(x: u16x8, y: i16x8) -> u16x8; - fn aarch64_vsqadd_u32(x: u32x4, y: i32x4) -> u32x4; - fn aarch64_vsqadd_u64(x: u64x2, y: i64x2) -> u64x2; - fn aarch64_vraddhn_s16(x: i16x8, y: i16x8) -> i8x8; - fn aarch64_vraddhn_u16(x: u16x8, y: u16x8) -> u8x8; - fn aarch64_vraddhn_s32(x: i32x4, y: i32x4) -> i16x4; - fn aarch64_vraddhn_u32(x: u32x4, y: u32x4) -> u16x4; - fn aarch64_vraddhn_s64(x: i64x2, y: i64x2) -> i32x2; - fn aarch64_vraddhn_u64(x: u64x2, y: u64x2) -> u32x2; - fn aarch64_vfmulx_f32(x: f32x2, y: f32x2) -> f32x2; - fn aarch64_vfmulx_f64(x: f64x1, y: f64x1) -> f64x1; - fn aarch64_vfmulxq_f32(x: f32x4, y: f32x4) -> f32x4; - fn aarch64_vfmulxq_f64(x: f64x2, y: f64x2) -> f64x2; - fn aarch64_vfma_f32(x: f32x2, y: f32x2) -> f32x2; - fn aarch64_vfma_f64(x: f64x1, y: f64x1) -> f64x1; - fn aarch64_vfmaq_f32(x: f32x4, y: f32x4) -> f32x4; - fn aarch64_vfmaq_f64(x: f64x2, y: f64x2) -> f64x2; - fn aarch64_vqdmulh_s16(x: i16x4, y: i16x4) -> i16x4; - fn aarch64_vqdmulh_s32(x: i32x2, y: i32x2) -> i32x2; - fn aarch64_vqdmulhq_s16(x: i16x8, y: i16x8) -> i16x8; - fn aarch64_vqdmulhq_s32(x: i32x4, y: i32x4) -> i32x4; - fn aarch64_vqrdmulh_s16(x: i16x4, y: i16x4) -> i16x4; - fn aarch64_vqrdmulh_s32(x: i32x2, y: i32x2) -> i32x2; - fn aarch64_vqrdmulhq_s16(x: i16x8, y: i16x8) -> i16x8; - fn aarch64_vqrdmulhq_s32(x: i32x4, y: i32x4) -> i32x4; - fn aarch64_vmull_s8(x: i8x8, y: i8x8) -> i16x8; - fn aarch64_vmull_u8(x: u8x8, y: u8x8) -> u16x8; - fn aarch64_vmull_s16(x: i16x4, y: i16x4) -> i32x4; - fn aarch64_vmull_u16(x: u16x4, y: u16x4) -> u32x4; - fn aarch64_vmull_s32(x: i32x2, y: i32x2) -> i64x2; - fn aarch64_vmull_u32(x: u32x2, y: u32x2) -> u64x2; - fn aarch64_vqdmullq_s8(x: i8x8, y: i8x8) -> i16x8; - fn aarch64_vqdmullq_s16(x: i16x4, y: i16x4) -> i32x4; - fn aarch64_vhsub_s8(x: i8x8, y: i8x8) -> i8x8; - fn aarch64_vhsub_u8(x: u8x8, y: u8x8) -> u8x8; - fn aarch64_vhsub_s16(x: i16x4, y: i16x4) -> i16x4; - fn aarch64_vhsub_u16(x: u16x4, y: u16x4) -> u16x4; - fn aarch64_vhsub_s32(x: i32x2, y: i32x2) -> i32x2; - fn aarch64_vhsub_u32(x: u32x2, y: u32x2) -> u32x2; - fn aarch64_vhsubq_s8(x: i8x16, y: i8x16) -> i8x16; - fn aarch64_vhsubq_u8(x: u8x16, y: u8x16) -> u8x16; - fn aarch64_vhsubq_s16(x: i16x8, y: i16x8) -> i16x8; - fn aarch64_vhsubq_u16(x: u16x8, y: u16x8) -> u16x8; - fn aarch64_vhsubq_s32(x: i32x4, y: i32x4) -> i32x4; - fn aarch64_vhsubq_u32(x: u32x4, y: u32x4) -> u32x4; - fn aarch64_vqsub_s8(x: i8x8, y: i8x8) -> i8x8; - fn aarch64_vqsub_u8(x: u8x8, y: u8x8) -> u8x8; - fn aarch64_vqsub_s16(x: i16x4, y: i16x4) -> i16x4; - fn aarch64_vqsub_u16(x: u16x4, y: u16x4) -> u16x4; - fn aarch64_vqsub_s32(x: i32x2, y: i32x2) -> i32x2; - fn aarch64_vqsub_u32(x: u32x2, y: u32x2) -> u32x2; - fn aarch64_vqsub_s64(x: i64x1, y: i64x1) -> i64x1; - fn aarch64_vqsub_u64(x: u64x1, y: u64x1) -> u64x1; - fn aarch64_vqsubq_s8(x: i8x16, y: i8x16) -> i8x16; - fn aarch64_vqsubq_u8(x: u8x16, y: u8x16) -> u8x16; - fn aarch64_vqsubq_s16(x: i16x8, y: i16x8) -> i16x8; - fn aarch64_vqsubq_u16(x: u16x8, y: u16x8) -> u16x8; - fn aarch64_vqsubq_s32(x: i32x4, y: i32x4) -> i32x4; - fn aarch64_vqsubq_u32(x: u32x4, y: u32x4) -> u32x4; - fn aarch64_vqsubq_s64(x: i64x2, y: i64x2) -> i64x2; - fn aarch64_vqsubq_u64(x: u64x2, y: u64x2) -> u64x2; - fn aarch64_vrsubhn_s16(x: i16x8, y: i16x8) -> i8x8; - fn aarch64_vrsubhn_u16(x: u16x8, y: u16x8) -> u8x8; - fn aarch64_vrsubhn_s32(x: i32x4, y: i32x4) -> i16x4; - fn aarch64_vrsubhn_u32(x: u32x4, y: u32x4) -> u16x4; - fn aarch64_vrsubhn_s64(x: i64x2, y: i64x2) -> i32x2; - fn aarch64_vrsubhn_u64(x: u64x2, y: u64x2) -> u32x2; - fn aarch64_vabd_s8(x: i8x8, y: i8x8) -> i8x8; - fn aarch64_vabd_u8(x: u8x8, y: u8x8) -> u8x8; - fn aarch64_vabd_s16(x: i16x4, y: i16x4) -> i16x4; - fn aarch64_vabd_u16(x: u16x4, y: u16x4) -> u16x4; - fn aarch64_vabd_s32(x: i32x2, y: i32x2) -> i32x2; - fn aarch64_vabd_u32(x: u32x2, y: u32x2) -> u32x2; - fn aarch64_vabd_f32(x: f32x2, y: f32x2) -> f32x2; - fn aarch64_vabd_f64(x: f64x1, y: f64x1) -> f64x1; - fn aarch64_vabdq_s8(x: i8x16, y: i8x16) -> i8x16; - fn aarch64_vabdq_u8(x: u8x16, y: u8x16) -> u8x16; - fn aarch64_vabdq_s16(x: i16x8, y: i16x8) -> i16x8; - fn aarch64_vabdq_u16(x: u16x8, y: u16x8) -> u16x8; - fn aarch64_vabdq_s32(x: i32x4, y: i32x4) -> i32x4; - fn aarch64_vabdq_u32(x: u32x4, y: u32x4) -> u32x4; - fn aarch64_vabdq_f32(x: f32x4, y: f32x4) -> f32x4; - fn aarch64_vabdq_f64(x: f64x2, y: f64x2) -> f64x2; - fn aarch64_vmax_s8(x: i8x8, y: i8x8) -> i8x8; - fn aarch64_vmax_u8(x: u8x8, y: u8x8) -> u8x8; - fn aarch64_vmax_s16(x: i16x4, y: i16x4) -> i16x4; - fn aarch64_vmax_u16(x: u16x4, y: u16x4) -> u16x4; - fn aarch64_vmax_s32(x: i32x2, y: i32x2) -> i32x2; - fn aarch64_vmax_u32(x: u32x2, y: u32x2) -> u32x2; - fn aarch64_vmax_f32(x: f32x2, y: f32x2) -> f32x2; - fn aarch64_vmax_f64(x: f64x1, y: f64x1) -> f64x1; - fn aarch64_vmaxq_s8(x: i8x16, y: i8x16) -> i8x16; - fn aarch64_vmaxq_u8(x: u8x16, y: u8x16) -> u8x16; - fn aarch64_vmaxq_s16(x: i16x8, y: i16x8) -> i16x8; - fn aarch64_vmaxq_u16(x: u16x8, y: u16x8) -> u16x8; - fn aarch64_vmaxq_s32(x: i32x4, y: i32x4) -> i32x4; - fn aarch64_vmaxq_u32(x: u32x4, y: u32x4) -> u32x4; - fn aarch64_vmaxq_f32(x: f32x4, y: f32x4) -> f32x4; - fn aarch64_vmaxq_f64(x: f64x2, y: f64x2) -> f64x2; - fn aarch64_vmin_s8(x: i8x8, y: i8x8) -> i8x8; - fn aarch64_vmin_u8(x: u8x8, y: u8x8) -> u8x8; - fn aarch64_vmin_s16(x: i16x4, y: i16x4) -> i16x4; - fn aarch64_vmin_u16(x: u16x4, y: u16x4) -> u16x4; - fn aarch64_vmin_s32(x: i32x2, y: i32x2) -> i32x2; - fn aarch64_vmin_u32(x: u32x2, y: u32x2) -> u32x2; - fn aarch64_vmin_f32(x: f32x2, y: f32x2) -> f32x2; - fn aarch64_vmin_f64(x: f64x1, y: f64x1) -> f64x1; - fn aarch64_vminq_s8(x: i8x16, y: i8x16) -> i8x16; - fn aarch64_vminq_u8(x: u8x16, y: u8x16) -> u8x16; - fn aarch64_vminq_s16(x: i16x8, y: i16x8) -> i16x8; - fn aarch64_vminq_u16(x: u16x8, y: u16x8) -> u16x8; - fn aarch64_vminq_s32(x: i32x4, y: i32x4) -> i32x4; - fn aarch64_vminq_u32(x: u32x4, y: u32x4) -> u32x4; - fn aarch64_vminq_f32(x: f32x4, y: f32x4) -> f32x4; - fn aarch64_vminq_f64(x: f64x2, y: f64x2) -> f64x2; - fn aarch64_vmaxnm_f32(x: f32x2, y: f32x2) -> f32x2; - fn aarch64_vmaxnm_f64(x: f64x1, y: f64x1) -> f64x1; - fn aarch64_vmaxnmq_f32(x: f32x4, y: f32x4) -> f32x4; - fn aarch64_vmaxnmq_f64(x: f64x2, y: f64x2) -> f64x2; - fn aarch64_vminnm_f32(x: f32x2, y: f32x2) -> f32x2; - fn aarch64_vminnm_f64(x: f64x1, y: f64x1) -> f64x1; - fn aarch64_vminnmq_f32(x: f32x4, y: f32x4) -> f32x4; - fn aarch64_vminnmq_f64(x: f64x2, y: f64x2) -> f64x2; - fn aarch64_vshl_s8(x: i8x8, y: i8x8) -> i8x8; - fn aarch64_vshl_u8(x: u8x8, y: i8x8) -> u8x8; - fn aarch64_vshl_s16(x: i16x4, y: i16x4) -> i16x4; - fn aarch64_vshl_u16(x: u16x4, y: i16x4) -> u16x4; - fn aarch64_vshl_s32(x: i32x2, y: i32x2) -> i32x2; - fn aarch64_vshl_u32(x: u32x2, y: i32x2) -> u32x2; - fn aarch64_vshl_s64(x: i64x1, y: i64x1) -> i64x1; - fn aarch64_vshl_u64(x: u64x1, y: i64x1) -> u64x1; - fn aarch64_vshlq_s8(x: i8x16, y: i8x16) -> i8x16; - fn aarch64_vshlq_u8(x: u8x16, y: i8x16) -> u8x16; - fn aarch64_vshlq_s16(x: i16x8, y: i16x8) -> i16x8; - fn aarch64_vshlq_u16(x: u16x8, y: i16x8) -> u16x8; - fn aarch64_vshlq_s32(x: i32x4, y: i32x4) -> i32x4; - fn aarch64_vshlq_u32(x: u32x4, y: i32x4) -> u32x4; - fn aarch64_vshlq_s64(x: i64x2, y: i64x2) -> i64x2; - fn aarch64_vshlq_u64(x: u64x2, y: i64x2) -> u64x2; - fn aarch64_vqshl_s8(x: i8x8, y: i8x8) -> i8x8; - fn aarch64_vqshl_u8(x: u8x8, y: i8x8) -> u8x8; - fn aarch64_vqshl_s16(x: i16x4, y: i16x4) -> i16x4; - fn aarch64_vqshl_u16(x: u16x4, y: i16x4) -> u16x4; - fn aarch64_vqshl_s32(x: i32x2, y: i32x2) -> i32x2; - fn aarch64_vqshl_u32(x: u32x2, y: i32x2) -> u32x2; - fn aarch64_vqshl_s64(x: i64x1, y: i64x1) -> i64x1; - fn aarch64_vqshl_u64(x: u64x1, y: i64x1) -> u64x1; - fn aarch64_vqshlq_s8(x: i8x16, y: i8x16) -> i8x16; - fn aarch64_vqshlq_u8(x: u8x16, y: i8x16) -> u8x16; - fn aarch64_vqshlq_s16(x: i16x8, y: i16x8) -> i16x8; - fn aarch64_vqshlq_u16(x: u16x8, y: i16x8) -> u16x8; - fn aarch64_vqshlq_s32(x: i32x4, y: i32x4) -> i32x4; - fn aarch64_vqshlq_u32(x: u32x4, y: i32x4) -> u32x4; - fn aarch64_vqshlq_s64(x: i64x2, y: i64x2) -> i64x2; - fn aarch64_vqshlq_u64(x: u64x2, y: i64x2) -> u64x2; - fn aarch64_vrshl_s8(x: i8x8, y: i8x8) -> i8x8; - fn aarch64_vrshl_u8(x: u8x8, y: i8x8) -> u8x8; - fn aarch64_vrshl_s16(x: i16x4, y: i16x4) -> i16x4; - fn aarch64_vrshl_u16(x: u16x4, y: i16x4) -> u16x4; - fn aarch64_vrshl_s32(x: i32x2, y: i32x2) -> i32x2; - fn aarch64_vrshl_u32(x: u32x2, y: i32x2) -> u32x2; - fn aarch64_vrshl_s64(x: i64x1, y: i64x1) -> i64x1; - fn aarch64_vrshl_u64(x: u64x1, y: i64x1) -> u64x1; - fn aarch64_vrshlq_s8(x: i8x16, y: i8x16) -> i8x16; - fn aarch64_vrshlq_u8(x: u8x16, y: i8x16) -> u8x16; - fn aarch64_vrshlq_s16(x: i16x8, y: i16x8) -> i16x8; - fn aarch64_vrshlq_u16(x: u16x8, y: i16x8) -> u16x8; - fn aarch64_vrshlq_s32(x: i32x4, y: i32x4) -> i32x4; - fn aarch64_vrshlq_u32(x: u32x4, y: i32x4) -> u32x4; - fn aarch64_vrshlq_s64(x: i64x2, y: i64x2) -> i64x2; - fn aarch64_vrshlq_u64(x: u64x2, y: i64x2) -> u64x2; - fn aarch64_vqrshl_s8(x: i8x8, y: i8x8) -> i8x8; - fn aarch64_vqrshl_u8(x: u8x8, y: i8x8) -> u8x8; - fn aarch64_vqrshl_s16(x: i16x4, y: i16x4) -> i16x4; - fn aarch64_vqrshl_u16(x: u16x4, y: i16x4) -> u16x4; - fn aarch64_vqrshl_s32(x: i32x2, y: i32x2) -> i32x2; - fn aarch64_vqrshl_u32(x: u32x2, y: i32x2) -> u32x2; - fn aarch64_vqrshl_s64(x: i64x1, y: i64x1) -> i64x1; - fn aarch64_vqrshl_u64(x: u64x1, y: i64x1) -> u64x1; - fn aarch64_vqrshlq_s8(x: i8x16, y: i8x16) -> i8x16; - fn aarch64_vqrshlq_u8(x: u8x16, y: i8x16) -> u8x16; - fn aarch64_vqrshlq_s16(x: i16x8, y: i16x8) -> i16x8; - fn aarch64_vqrshlq_u16(x: u16x8, y: i16x8) -> u16x8; - fn aarch64_vqrshlq_s32(x: i32x4, y: i32x4) -> i32x4; - fn aarch64_vqrshlq_u32(x: u32x4, y: i32x4) -> u32x4; - fn aarch64_vqrshlq_s64(x: i64x2, y: i64x2) -> i64x2; - fn aarch64_vqrshlq_u64(x: u64x2, y: i64x2) -> u64x2; - fn aarch64_vqshrun_n_s16(x: i16x8, y: u32) -> i8x8; - fn aarch64_vqshrun_n_s32(x: i32x4, y: u32) -> i16x4; - fn aarch64_vqshrun_n_s64(x: i64x2, y: u32) -> i32x2; - fn aarch64_vqrshrun_n_s16(x: i16x8, y: u32) -> i8x8; - fn aarch64_vqrshrun_n_s32(x: i32x4, y: u32) -> i16x4; - fn aarch64_vqrshrun_n_s64(x: i64x2, y: u32) -> i32x2; - fn aarch64_vqshrn_n_s16(x: i16x8, y: u32) -> i8x8; - fn aarch64_vqshrn_n_u16(x: u16x8, y: u32) -> u8x8; - fn aarch64_vqshrn_n_s32(x: i32x4, y: u32) -> i16x4; - fn aarch64_vqshrn_n_u32(x: u32x4, y: u32) -> u16x4; - fn aarch64_vqshrn_n_s64(x: i64x2, y: u32) -> i32x2; - fn aarch64_vqshrn_n_u64(x: u64x2, y: u32) -> u32x2; - fn aarch64_vrshrn_n_s16(x: i16x8, y: u32) -> i8x8; - fn aarch64_vrshrn_n_u16(x: u16x8, y: u32) -> u8x8; - fn aarch64_vrshrn_n_s32(x: i32x4, y: u32) -> i16x4; - fn aarch64_vrshrn_n_u32(x: u32x4, y: u32) -> u16x4; - fn aarch64_vrshrn_n_s64(x: i64x2, y: u32) -> i32x2; - fn aarch64_vrshrn_n_u64(x: u64x2, y: u32) -> u32x2; - fn aarch64_vqrshrn_n_s16(x: i16x8, y: u32) -> i8x8; - fn aarch64_vqrshrn_n_u16(x: u16x8, y: u32) -> u8x8; - fn aarch64_vqrshrn_n_s32(x: i32x4, y: u32) -> i16x4; - fn aarch64_vqrshrn_n_u32(x: u32x4, y: u32) -> u16x4; - fn aarch64_vqrshrn_n_s64(x: i64x2, y: u32) -> i32x2; - fn aarch64_vqrshrn_n_u64(x: u64x2, y: u32) -> u32x2; - fn aarch64_vsri_s8(x: i8x8, y: i8x8) -> i8x8; - fn aarch64_vsri_u8(x: u8x8, y: u8x8) -> u8x8; - fn aarch64_vsri_s16(x: i16x4, y: i16x4) -> i16x4; - fn aarch64_vsri_u16(x: u16x4, y: u16x4) -> u16x4; - fn aarch64_vsri_s32(x: i32x2, y: i32x2) -> i32x2; - fn aarch64_vsri_u32(x: u32x2, y: u32x2) -> u32x2; - fn aarch64_vsri_s64(x: i64x1, y: i64x1) -> i64x1; - fn aarch64_vsri_u64(x: u64x1, y: u64x1) -> u64x1; - fn aarch64_vsriq_s8(x: i8x16, y: i8x16) -> i8x16; - fn aarch64_vsriq_u8(x: u8x16, y: u8x16) -> u8x16; - fn aarch64_vsriq_s16(x: i16x8, y: i16x8) -> i16x8; - fn aarch64_vsriq_u16(x: u16x8, y: u16x8) -> u16x8; - fn aarch64_vsriq_s32(x: i32x4, y: i32x4) -> i32x4; - fn aarch64_vsriq_u32(x: u32x4, y: u32x4) -> u32x4; - fn aarch64_vsriq_s64(x: i64x2, y: i64x2) -> i64x2; - fn aarch64_vsriq_u64(x: u64x2, y: u64x2) -> u64x2; - fn aarch64_vsli_s8(x: i8x8, y: i8x8) -> i8x8; - fn aarch64_vsli_u8(x: u8x8, y: u8x8) -> u8x8; - fn aarch64_vsli_s16(x: i16x4, y: i16x4) -> i16x4; - fn aarch64_vsli_u16(x: u16x4, y: u16x4) -> u16x4; - fn aarch64_vsli_s32(x: i32x2, y: i32x2) -> i32x2; - fn aarch64_vsli_u32(x: u32x2, y: u32x2) -> u32x2; - fn aarch64_vsli_s64(x: i64x1, y: i64x1) -> i64x1; - fn aarch64_vsli_u64(x: u64x1, y: u64x1) -> u64x1; - fn aarch64_vsliq_s8(x: i8x16, y: i8x16) -> i8x16; - fn aarch64_vsliq_u8(x: u8x16, y: u8x16) -> u8x16; - fn aarch64_vsliq_s16(x: i16x8, y: i16x8) -> i16x8; - fn aarch64_vsliq_u16(x: u16x8, y: u16x8) -> u16x8; - fn aarch64_vsliq_s32(x: i32x4, y: i32x4) -> i32x4; - fn aarch64_vsliq_u32(x: u32x4, y: u32x4) -> u32x4; - fn aarch64_vsliq_s64(x: i64x2, y: i64x2) -> i64x2; - fn aarch64_vsliq_u64(x: u64x2, y: u64x2) -> u64x2; - fn aarch64_vvqmovn_s16(x: i16x8) -> i8x8; - fn aarch64_vvqmovn_u16(x: u16x8) -> u8x8; - fn aarch64_vvqmovn_s32(x: i32x4) -> i16x4; - fn aarch64_vvqmovn_u32(x: u32x4) -> u16x4; - fn aarch64_vvqmovn_s64(x: i64x2) -> i32x2; - fn aarch64_vvqmovn_u64(x: u64x2) -> u32x2; - fn aarch64_vabs_s8(x: i8x8) -> i8x8; - fn aarch64_vabs_s16(x: i16x4) -> i16x4; - fn aarch64_vabs_s32(x: i32x2) -> i32x2; - fn aarch64_vabs_s64(x: i64x1) -> i64x1; - fn aarch64_vabsq_s8(x: i8x16) -> i8x16; - fn aarch64_vabsq_s16(x: i16x8) -> i16x8; - fn aarch64_vabsq_s32(x: i32x4) -> i32x4; - fn aarch64_vabsq_s64(x: i64x2) -> i64x2; - fn aarch64_vabs_f32(x: f32x2) -> f32x2; - fn aarch64_vabs_f64(x: f64x1) -> f64x1; - fn aarch64_vabsq_f32(x: f32x4) -> f32x4; - fn aarch64_vabsq_f64(x: f64x2) -> f64x2; - fn aarch64_vqabs_s8(x: i8x8) -> i8x8; - fn aarch64_vqabs_s16(x: i16x4) -> i16x4; - fn aarch64_vqabs_s32(x: i32x2) -> i32x2; - fn aarch64_vqabs_s64(x: i64x1) -> i64x1; - fn aarch64_vqabsq_s8(x: i8x16) -> i8x16; - fn aarch64_vqabsq_s16(x: i16x8) -> i16x8; - fn aarch64_vqabsq_s32(x: i32x4) -> i32x4; - fn aarch64_vqabsq_s64(x: i64x2) -> i64x2; - fn aarch64_vqneg_s8(x: i8x8) -> i8x8; - fn aarch64_vqneg_s16(x: i16x4) -> i16x4; - fn aarch64_vqneg_s32(x: i32x2) -> i32x2; - fn aarch64_vqneg_s64(x: i64x1) -> i64x1; - fn aarch64_vqnegq_s8(x: i8x16) -> i8x16; - fn aarch64_vqnegq_s16(x: i16x8) -> i16x8; - fn aarch64_vqnegq_s32(x: i32x4) -> i32x4; - fn aarch64_vqnegq_s64(x: i64x2) -> i64x2; - fn aarch64_vclz_s8(x: i8x8) -> i8x8; - fn aarch64_vclz_u8(x: u8x8) -> u8x8; - fn aarch64_vclz_s16(x: i16x4) -> i16x4; - fn aarch64_vclz_u16(x: u16x4) -> u16x4; - fn aarch64_vclz_s32(x: i32x2) -> i32x2; - fn aarch64_vclz_u32(x: u32x2) -> u32x2; - fn aarch64_vclzq_s8(x: i8x16) -> i8x16; - fn aarch64_vclzq_u8(x: u8x16) -> u8x16; - fn aarch64_vclzq_s16(x: i16x8) -> i16x8; - fn aarch64_vclzq_u16(x: u16x8) -> u16x8; - fn aarch64_vclzq_s32(x: i32x4) -> i32x4; - fn aarch64_vclzq_u32(x: u32x4) -> u32x4; - fn aarch64_vcls_s8(x: i8x8) -> i8x8; - fn aarch64_vcls_u8(x: u8x8) -> u8x8; - fn aarch64_vcls_s16(x: i16x4) -> i16x4; - fn aarch64_vcls_u16(x: u16x4) -> u16x4; - fn aarch64_vcls_s32(x: i32x2) -> i32x2; - fn aarch64_vcls_u32(x: u32x2) -> u32x2; - fn aarch64_vclsq_s8(x: i8x16) -> i8x16; - fn aarch64_vclsq_u8(x: u8x16) -> u8x16; - fn aarch64_vclsq_s16(x: i16x8) -> i16x8; - fn aarch64_vclsq_u16(x: u16x8) -> u16x8; - fn aarch64_vclsq_s32(x: i32x4) -> i32x4; - fn aarch64_vclsq_u32(x: u32x4) -> u32x4; - fn aarch64_vcnt_s8(x: i8x8) -> i8x8; - fn aarch64_vcnt_u8(x: u8x8) -> u8x8; - fn aarch64_vcntq_s8(x: i8x16) -> i8x16; - fn aarch64_vcntq_u8(x: u8x16) -> u8x16; - fn aarch64_vrecpe_u32(x: u32x2) -> u32x2; - fn aarch64_vrecpe_f32(x: f32x2) -> f32x2; - fn aarch64_vrecpe_f64(x: f64x1) -> f64x1; - fn aarch64_vrecpeq_u32(x: u32x4) -> u32x4; - fn aarch64_vrecpeq_f32(x: f32x4) -> f32x4; - fn aarch64_vrecpeq_f64(x: f64x2) -> f64x2; - fn aarch64_vrecps_f32(x: f32x2, y: f32x2) -> f32x2; - fn aarch64_vrecps_f64(x: f64x1, y: f64x1) -> f64x1; - fn aarch64_vrecpsq_f32(x: f32x4, y: f32x4) -> f32x4; - fn aarch64_vrecpsq_f64(x: f64x2, y: f64x2) -> f64x2; - fn aarch64_vsqrt_f32(x: f32x2) -> f32x2; - fn aarch64_vsqrt_f64(x: f64x1) -> f64x1; - fn aarch64_vsqrtq_f32(x: f32x4) -> f32x4; - fn aarch64_vsqrtq_f64(x: f64x2) -> f64x2; - fn aarch64_vrsqrte_u32(x: u32x2) -> u32x2; - fn aarch64_vrsqrte_f32(x: f32x2) -> f32x2; - fn aarch64_vrsqrte_f64(x: f64x1) -> f64x1; - fn aarch64_vrsqrteq_u32(x: u32x4) -> u32x4; - fn aarch64_vrsqrteq_f32(x: f32x4) -> f32x4; - fn aarch64_vrsqrteq_f64(x: f64x2) -> f64x2; - fn aarch64_vrsqrts_f32(x: f32x2, y: f32x2) -> f32x2; - fn aarch64_vrsqrts_f64(x: f64x1, y: f64x1) -> f64x1; - fn aarch64_vrsqrtsq_f32(x: f32x4, y: f32x4) -> f32x4; - fn aarch64_vrsqrtsq_f64(x: f64x2, y: f64x2) -> f64x2; - fn aarch64_vrbit_s8(x: i8x8) -> i8x8; - fn aarch64_vrbit_u8(x: u8x8) -> u8x8; - fn aarch64_vrbitq_s8(x: i8x16) -> i8x16; - fn aarch64_vrbitq_u8(x: u8x16) -> u8x16; - fn aarch64_vpadd_s8(x: i8x8, y: i8x8) -> i8x8; - fn aarch64_vpadd_u8(x: u8x8, y: u8x8) -> u8x8; - fn aarch64_vpadd_s16(x: i16x4, y: i16x4) -> i16x4; - fn aarch64_vpadd_u16(x: u16x4, y: u16x4) -> u16x4; - fn aarch64_vpadd_s32(x: i32x2, y: i32x2) -> i32x2; - fn aarch64_vpadd_u32(x: u32x2, y: u32x2) -> u32x2; - fn aarch64_vpadd_f32(x: f32x2, y: f32x2) -> f32x2; - fn aarch64_vpaddq_s8(x: i8x16, y: i8x16) -> i8x16; - fn aarch64_vpaddq_u8(x: u8x16, y: u8x16) -> u8x16; - fn aarch64_vpaddq_s16(x: i16x8, y: i16x8) -> i16x8; - fn aarch64_vpaddq_u16(x: u16x8, y: u16x8) -> u16x8; - fn aarch64_vpaddq_s32(x: i32x4, y: i32x4) -> i32x4; - fn aarch64_vpaddq_u32(x: u32x4, y: u32x4) -> u32x4; - fn aarch64_vpaddq_f32(x: f32x4, y: f32x4) -> f32x4; - fn aarch64_vpaddq_s64(x: i64x2, y: i64x2) -> i64x2; - fn aarch64_vpaddq_u64(x: u64x2, y: u64x2) -> u64x2; - fn aarch64_vpaddq_f64(x: f64x2, y: f64x2) -> f64x2; - fn aarch64_vpaddl_s16(x: i8x8) -> i16x4; - fn aarch64_vpaddl_u16(x: u8x8) -> u16x4; - fn aarch64_vpaddl_s32(x: i16x4) -> i32x2; - fn aarch64_vpaddl_u32(x: u16x4) -> u32x2; - fn aarch64_vpaddl_s64(x: i32x2) -> i64x1; - fn aarch64_vpaddl_u64(x: u32x2) -> u64x1; - fn aarch64_vpaddlq_s16(x: i8x16) -> i16x8; - fn aarch64_vpaddlq_u16(x: u8x16) -> u16x8; - fn aarch64_vpaddlq_s32(x: i16x8) -> i32x4; - fn aarch64_vpaddlq_u32(x: u16x8) -> u32x4; - fn aarch64_vpaddlq_s64(x: i32x4) -> i64x2; - fn aarch64_vpaddlq_u64(x: u32x4) -> u64x2; - fn aarch64_vpmax_s8(x: i8x8, y: i8x8) -> i8x8; - fn aarch64_vpmax_u8(x: u8x8, y: u8x8) -> u8x8; - fn aarch64_vpmax_s16(x: i16x4, y: i16x4) -> i16x4; - fn aarch64_vpmax_u16(x: u16x4, y: u16x4) -> u16x4; - fn aarch64_vpmax_s32(x: i32x2, y: i32x2) -> i32x2; - fn aarch64_vpmax_u32(x: u32x2, y: u32x2) -> u32x2; - fn aarch64_vpmax_f32(x: f32x2, y: f32x2) -> f32x2; - fn aarch64_vpmaxq_s8(x: i8x16, y: i8x16) -> i8x16; - fn aarch64_vpmaxq_u8(x: u8x16, y: u8x16) -> u8x16; - fn aarch64_vpmaxq_s16(x: i16x8, y: i16x8) -> i16x8; - fn aarch64_vpmaxq_u16(x: u16x8, y: u16x8) -> u16x8; - fn aarch64_vpmaxq_s32(x: i32x4, y: i32x4) -> i32x4; - fn aarch64_vpmaxq_u32(x: u32x4, y: u32x4) -> u32x4; - fn aarch64_vpmaxq_f32(x: f32x4, y: f32x4) -> f32x4; - fn aarch64_vpmaxq_s64(x: i64x2, y: i64x2) -> i64x2; - fn aarch64_vpmaxq_u64(x: u64x2, y: u64x2) -> u64x2; - fn aarch64_vpmaxq_f64(x: f64x2, y: f64x2) -> f64x2; - fn aarch64_vpmin_s8(x: i8x8, y: i8x8) -> i8x8; - fn aarch64_vpmin_u8(x: u8x8, y: u8x8) -> u8x8; - fn aarch64_vpmin_s16(x: i16x4, y: i16x4) -> i16x4; - fn aarch64_vpmin_u16(x: u16x4, y: u16x4) -> u16x4; - fn aarch64_vpmin_s32(x: i32x2, y: i32x2) -> i32x2; - fn aarch64_vpmin_u32(x: u32x2, y: u32x2) -> u32x2; - fn aarch64_vpmin_f32(x: f32x2, y: f32x2) -> f32x2; - fn aarch64_vpminq_s8(x: i8x16, y: i8x16) -> i8x16; - fn aarch64_vpminq_u8(x: u8x16, y: u8x16) -> u8x16; - fn aarch64_vpminq_s16(x: i16x8, y: i16x8) -> i16x8; - fn aarch64_vpminq_u16(x: u16x8, y: u16x8) -> u16x8; - fn aarch64_vpminq_s32(x: i32x4, y: i32x4) -> i32x4; - fn aarch64_vpminq_u32(x: u32x4, y: u32x4) -> u32x4; - fn aarch64_vpminq_f32(x: f32x4, y: f32x4) -> f32x4; - fn aarch64_vpminq_s64(x: i64x2, y: i64x2) -> i64x2; - fn aarch64_vpminq_u64(x: u64x2, y: u64x2) -> u64x2; - fn aarch64_vpminq_f64(x: f64x2, y: f64x2) -> f64x2; - fn aarch64_vpmaxnm_s8(x: i8x8, y: i8x8) -> i8x8; - fn aarch64_vpmaxnm_u8(x: u8x8, y: u8x8) -> u8x8; - fn aarch64_vpmaxnm_s16(x: i16x4, y: i16x4) -> i16x4; - fn aarch64_vpmaxnm_u16(x: u16x4, y: u16x4) -> u16x4; - fn aarch64_vpmaxnm_s32(x: i32x2, y: i32x2) -> i32x2; - fn aarch64_vpmaxnm_u32(x: u32x2, y: u32x2) -> u32x2; - fn aarch64_vpmaxnm_f32(x: f32x2, y: f32x2) -> f32x2; - fn aarch64_vpmaxnmq_s8(x: i8x16, y: i8x16) -> i8x16; - fn aarch64_vpmaxnmq_u8(x: u8x16, y: u8x16) -> u8x16; - fn aarch64_vpmaxnmq_s16(x: i16x8, y: i16x8) -> i16x8; - fn aarch64_vpmaxnmq_u16(x: u16x8, y: u16x8) -> u16x8; - fn aarch64_vpmaxnmq_s32(x: i32x4, y: i32x4) -> i32x4; - fn aarch64_vpmaxnmq_u32(x: u32x4, y: u32x4) -> u32x4; - fn aarch64_vpmaxnmq_f32(x: f32x4, y: f32x4) -> f32x4; - fn aarch64_vpmaxnmq_s64(x: i64x2, y: i64x2) -> i64x2; - fn aarch64_vpmaxnmq_u64(x: u64x2, y: u64x2) -> u64x2; - fn aarch64_vpmaxnmq_f64(x: f64x2, y: f64x2) -> f64x2; - fn aarch64_vpminnm_f32(x: f32x2, y: f32x2) -> f32x2; - fn aarch64_vpminnmq_f32(x: f32x4, y: f32x4) -> f32x4; - fn aarch64_vpminnmq_f64(x: f64x2, y: f64x2) -> f64x2; - fn aarch64_vaddv_s8(x: i8x8) -> i8; - fn aarch64_vaddv_u8(x: u8x8) -> u8; - fn aarch64_vaddv_s16(x: i16x4) -> i16; - fn aarch64_vaddv_u16(x: u16x4) -> u16; - fn aarch64_vaddv_s32(x: i32x2) -> i32; - fn aarch64_vaddv_u32(x: u32x2) -> u32; - fn aarch64_vaddv_f32(x: f32x2) -> f32; - fn aarch64_vaddvq_s8(x: i8x16) -> i8; - fn aarch64_vaddvq_u8(x: u8x16) -> u8; - fn aarch64_vaddvq_s16(x: i16x8) -> i16; - fn aarch64_vaddvq_u16(x: u16x8) -> u16; - fn aarch64_vaddvq_s32(x: i32x4) -> i32; - fn aarch64_vaddvq_u32(x: u32x4) -> u32; - fn aarch64_vaddvq_f32(x: f32x4) -> f32; - fn aarch64_vaddvq_s64(x: i64x2) -> i64; - fn aarch64_vaddvq_u64(x: u64x2) -> u64; - fn aarch64_vaddvq_f64(x: f64x2) -> f64; - fn aarch64_vaddlv_s8(x: i8x8) -> i16; - fn aarch64_vaddlv_u8(x: u8x8) -> u16; - fn aarch64_vaddlv_s16(x: i16x4) -> i32; - fn aarch64_vaddlv_u16(x: u16x4) -> u32; - fn aarch64_vaddlv_s32(x: i32x2) -> i64; - fn aarch64_vaddlv_u32(x: u32x2) -> u64; - fn aarch64_vaddlvq_s8(x: i8x16) -> i16; - fn aarch64_vaddlvq_u8(x: u8x16) -> u16; - fn aarch64_vaddlvq_s16(x: i16x8) -> i32; - fn aarch64_vaddlvq_u16(x: u16x8) -> u32; - fn aarch64_vaddlvq_s32(x: i32x4) -> i64; - fn aarch64_vaddlvq_u32(x: u32x4) -> u64; - fn aarch64_vmaxv_s8(x: i8x8) -> i8; - fn aarch64_vmaxv_u8(x: u8x8) -> u8; - fn aarch64_vmaxv_s16(x: i16x4) -> i16; - fn aarch64_vmaxv_u16(x: u16x4) -> u16; - fn aarch64_vmaxv_s32(x: i32x2) -> i32; - fn aarch64_vmaxv_u32(x: u32x2) -> u32; - fn aarch64_vmaxv_f32(x: f32x2) -> f32; - fn aarch64_vmaxvq_s8(x: i8x16) -> i8; - fn aarch64_vmaxvq_u8(x: u8x16) -> u8; - fn aarch64_vmaxvq_s16(x: i16x8) -> i16; - fn aarch64_vmaxvq_u16(x: u16x8) -> u16; - fn aarch64_vmaxvq_s32(x: i32x4) -> i32; - fn aarch64_vmaxvq_u32(x: u32x4) -> u32; - fn aarch64_vmaxvq_f32(x: f32x4) -> f32; - fn aarch64_vmaxvq_f64(x: f64x2) -> f64; - fn aarch64_vminv_s8(x: i8x8) -> i8; - fn aarch64_vminv_u8(x: u8x8) -> u8; - fn aarch64_vminv_s16(x: i16x4) -> i16; - fn aarch64_vminv_u16(x: u16x4) -> u16; - fn aarch64_vminv_s32(x: i32x2) -> i32; - fn aarch64_vminv_u32(x: u32x2) -> u32; - fn aarch64_vminv_f32(x: f32x2) -> f32; - fn aarch64_vminvq_s8(x: i8x16) -> i8; - fn aarch64_vminvq_u8(x: u8x16) -> u8; - fn aarch64_vminvq_s16(x: i16x8) -> i16; - fn aarch64_vminvq_u16(x: u16x8) -> u16; - fn aarch64_vminvq_s32(x: i32x4) -> i32; - fn aarch64_vminvq_u32(x: u32x4) -> u32; - fn aarch64_vminvq_f32(x: f32x4) -> f32; - fn aarch64_vminvq_f64(x: f64x2) -> f64; - fn aarch64_vmaxnmv_f32(x: f32x2) -> f32; - fn aarch64_vmaxnmvq_f32(x: f32x4) -> f32; - fn aarch64_vmaxnmvq_f64(x: f64x2) -> f64; - fn aarch64_vminnmv_f32(x: f32x2) -> f32; - fn aarch64_vminnmvq_f32(x: f32x4) -> f32; - fn aarch64_vminnmvq_f64(x: f64x2) -> f64; - fn aarch64_vqtbl1_s8(x: i8x16, y: u8x8) -> i8x8; - fn aarch64_vqtbl1_u8(x: u8x16, y: u8x8) -> u8x8; - fn aarch64_vqtbl1q_s8(x: i8x16, y: u8x16) -> i8x16; - fn aarch64_vqtbl1q_u8(x: u8x16, y: u8x16) -> u8x16; - fn aarch64_vqtbx1_s8(x: i8x8, y: i8x16, z: u8x8) -> i8x8; - fn aarch64_vqtbx1_u8(x: u8x8, y: u8x16, z: u8x8) -> u8x8; - fn aarch64_vqtbx1q_s8(x: i8x16, y: i8x16, z: u8x16) -> i8x16; - fn aarch64_vqtbx1q_u8(x: u8x16, y: u8x16, z: u8x16) -> u8x16; - fn aarch64_vqtbl2_s8(x: (i8x16, i8x16), y: u8x8) -> i8x8; - fn aarch64_vqtbl2_u8(x: (u8x16, u8x16), y: u8x8) -> u8x8; - fn aarch64_vqtbl2q_s8(x: (i8x16, i8x16), y: u8x16) -> i8x16; - fn aarch64_vqtbl2q_u8(x: (u8x16, u8x16), y: u8x16) -> u8x16; - fn aarch64_vqtbx2_s8(x: (i8x16, i8x16), y: u8x8) -> i8x8; - fn aarch64_vqtbx2_u8(x: (u8x16, u8x16), y: u8x8) -> u8x8; - fn aarch64_vqtbx2q_s8(x: (i8x16, i8x16), y: u8x16) -> i8x16; - fn aarch64_vqtbx2q_u8(x: (u8x16, u8x16), y: u8x16) -> u8x16; - fn aarch64_vqtbl3_s8(x: (i8x16, i8x16, i8x16), y: u8x8) -> i8x8; - fn aarch64_vqtbl3_u8(x: (u8x16, u8x16, u8x16), y: u8x8) -> u8x8; - fn aarch64_vqtbl3q_s8(x: (i8x16, i8x16, i8x16), y: u8x16) -> i8x16; - fn aarch64_vqtbl3q_u8(x: (u8x16, u8x16, u8x16), y: u8x16) -> u8x16; - fn aarch64_vqtbx3_s8(x: i8x8, y: (i8x16, i8x16, i8x16), z: u8x8) -> i8x8; - fn aarch64_vqtbx3_u8(x: u8x8, y: (u8x16, u8x16, u8x16), z: u8x8) -> u8x8; - fn aarch64_vqtbx3q_s8(x: i8x16, y: (i8x16, i8x16, i8x16), z: u8x16) -> i8x16; - fn aarch64_vqtbx3q_u8(x: u8x16, y: (u8x16, u8x16, u8x16), z: u8x16) -> u8x16; - fn aarch64_vqtbl4_s8(x: (i8x16, i8x16, i8x16, i8x16), y: u8x8) -> i8x8; - fn aarch64_vqtbl4_u8(x: (u8x16, u8x16, u8x16, u8x16), y: u8x8) -> u8x8; - fn aarch64_vqtbl4q_s8(x: (i8x16, i8x16, i8x16, i8x16), y: u8x16) -> i8x16; - fn aarch64_vqtbl4q_u8(x: (u8x16, u8x16, u8x16, u8x16), y: u8x16) -> u8x16; - fn aarch64_vqtbx4_s8(x: i8x8, y: (i8x16, i8x16, i8x16, i8x16), z: u8x8) -> i8x8; - fn aarch64_vqtbx4_u8(x: u8x8, y: (u8x16, u8x16, u8x16, u8x16), z: u8x8) -> u8x8; - fn aarch64_vqtbx4q_s8(x: i8x16, y: (i8x16, i8x16, i8x16, i8x16), z: u8x16) -> i8x16; - fn aarch64_vqtbx4q_u8(x: u8x16, y: (u8x16, u8x16, u8x16, u8x16), z: u8x16) -> u8x16; -} - -pub trait Aarch64F32x4 { - fn to_f64(self) -> f64x2; -} -impl Aarch64F32x4 for f32x4 { - #[inline] - fn to_f64(self) -> f64x2 { - unsafe { - simd_cast(f32x2(self.0, self.1)) - } - } -} - -pub trait Aarch64U8x16 { - fn table_lookup_1(self, t0: u8x16) -> u8x16; -} -impl Aarch64U8x16 for u8x16 { - #[inline] - fn table_lookup_1(self, t0: u8x16) -> u8x16 { - unsafe {aarch64_vqtbl1q_u8(t0, self)} - } -} -pub trait Aarch64I8x16 { - fn table_lookup_1(self, t0: i8x16) -> i8x16; -} -impl Aarch64I8x16 for i8x16 { - #[inline] - fn table_lookup_1(self, t0: i8x16) -> i8x16 { - unsafe {aarch64_vqtbl2q_s8((t0, t0), ::bitcast(self))} - } -} - -#[doc(hidden)] -pub mod common { - use super::super::super::*; - use core::mem; - - #[inline] - pub fn f32x4_sqrt(x: f32x4) -> f32x4 { - unsafe {super::aarch64_vsqrtq_f32(x)} - } - #[inline] - pub fn f32x4_approx_rsqrt(x: f32x4) -> f32x4 { - unsafe {super::aarch64_vrsqrteq_f32(x)} - } - #[inline] - pub fn f32x4_approx_reciprocal(x: f32x4) -> f32x4 { - unsafe {super::aarch64_vrecpeq_f32(x)} - } - #[inline] - pub fn f32x4_max(x: f32x4, y: f32x4) -> f32x4 { - unsafe {super::aarch64_vmaxq_f32(x, y)} - } - #[inline] - pub fn f32x4_min(x: f32x4, y: f32x4) -> f32x4 { - unsafe {super::aarch64_vminq_f32(x, y)} - } - - macro_rules! bools { - ($($ty: ty, $all: ident ($min: ident), $any: ident ($max: ident);)*) => { - $( - #[inline] - pub fn $all(x: $ty) -> bool { - unsafe { - super::$min(mem::transmute(x)) != 0 - } - } - #[inline] - pub fn $any(x: $ty) -> bool { - unsafe { - super::$max(mem::transmute(x)) != 0 - } - } - )* - } - } - - bools! { - bool32fx4, bool32fx4_all(aarch64_vminvq_u32), bool32fx4_any(aarch64_vmaxvq_u32); - bool8ix16, bool8ix16_all(aarch64_vminvq_u8), bool8ix16_any(aarch64_vmaxvq_u8); - bool16ix8, bool16ix8_all(aarch64_vminvq_u16), bool16ix8_any(aarch64_vmaxvq_u16); - bool32ix4, bool32ix4_all(aarch64_vminvq_u32), bool32ix4_any(aarch64_vmaxvq_u32); - } -} diff --git a/third_party/rust/simd/src/arm/mod.rs b/third_party/rust/simd/src/arm/mod.rs deleted file mode 100644 index 0d451103840b..000000000000 --- a/third_party/rust/simd/src/arm/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ -//! Features specific to ARM CPUs. - -#[cfg(any(feature = "doc", target_feature = "neon"))] -pub mod neon; diff --git a/third_party/rust/simd/src/arm/neon.rs b/third_party/rust/simd/src/arm/neon.rs deleted file mode 100644 index 8c90a72bb0dc..000000000000 --- a/third_party/rust/simd/src/arm/neon.rs +++ /dev/null @@ -1,622 +0,0 @@ -use super::super::*; -use sixty_four::{i64x2, u64x2}; - -#[repr(simd)] -#[derive(Debug, Copy, Clone)] -pub struct u32x2(u32, u32); -#[repr(simd)] -#[derive(Debug, Copy, Clone)] -pub struct i32x2(i32, i32); -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct bool32ix2(i32, i32); - -#[repr(simd)] -#[derive(Debug, Copy, Clone)] -pub struct f32x2(f32, f32); -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct bool32fx2(i32, i32); - -#[repr(simd)] -#[derive(Debug, Copy, Clone)] -pub struct u16x4(u16, u16, u16, u16); -#[repr(simd)] -#[derive(Debug, Copy, Clone)] -pub struct i16x4(i16, i16, i16, i16); -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct bool16ix4(i16, i16, i16, i16); - -#[repr(simd)] -#[derive(Debug, Copy, Clone)] -pub struct u8x8(u8, u8, u8, u8, - u8, u8, u8, u8); -#[repr(simd)] -#[derive(Debug, Copy, Clone)] -pub struct i8x8(i8, i8, i8, i8, - i8, i8, i8, i8); -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct bool8ix8(i8, i8, i8, i8, - i8, i8, i8, i8); - -#[repr(simd)] -#[derive(Debug, Copy, Clone)] -pub struct i64x1(i64); -#[repr(simd)] -#[derive(Debug, Copy, Clone)] -pub struct u64x1(u64); - -macro_rules! half_bools { - ($($ty: ty, $as_u: ty, $elem: ty, $all: ident ($min: ident), $any: ident ($max: ident);)*) => { - $( - impl $ty { - #[inline] - pub fn $all(self) -> bool { - unsafe { - let t: $as_u = bitcast(self); - let y = $min(t, mem::uninitialized()); - let y32: u32x2 = bitcast(y); - y32.0 == 0xFFFFFFFF - } - } - #[inline] - pub fn $any(self) -> bool { - unsafe { - let t: $as_u = bitcast(self); - let y = $max(t, mem::uninitialized()); - let y32: u32x2 = bitcast(y); - y32.0 != 0 - } - } - } - - impl Clone for $ty { - #[inline] fn clone(&self) -> Self { - *self - } - } - - unsafe impl Simd for $ty { - type Bool = $ty; - type Elem = $elem; - } - - )* - } -} - -half_bools! { - bool32fx2, u32x2, i32, bool32fx2_all(arm_vpmin_u32), bool32fx2_any(arm_vpmax_u32); - bool8ix8, u8x8, i8, bool8ix8_all(arm_vpmin_u8), bool8ix8_any(arm_vpmax_u8); - bool16ix4, u16x4, i16, bool16ix4_all(arm_vpmin_u16), bool16ix4_any(arm_vpmax_u16); - bool32ix2, u32x2, f32, bool32ix2_all(arm_vpmin_u32), bool32ix2_any(arm_vpmax_u32); -} - -macro_rules! half_simd { - ($($ty: ty, $elem: ty, $bool_ty: ty;)*) => { - $( - unsafe impl Simd for $ty { - type Bool = $bool_ty; - type Elem = $elem; - } - )* - } -} - -half_simd! { - f32x2, f32, bool32fx2; - u32x2, u32, bool32ix2; - i32x2, i32, bool32ix2; - u16x4, u16, bool16ix4; - i16x4, i16, bool16ix4; - u8x8, u8, bool8ix8; - i8x8, i8, bool8ix8; -} - -#[allow(dead_code)] -extern "platform-intrinsic" { - fn arm_vhadd_s8(x: i8x8, y: i8x8) -> i8x8; - fn arm_vhadd_u8(x: u8x8, y: u8x8) -> u8x8; - fn arm_vhadd_s16(x: i16x4, y: i16x4) -> i16x4; - fn arm_vhadd_u16(x: u16x4, y: u16x4) -> u16x4; - fn arm_vhadd_s32(x: i32x2, y: i32x2) -> i32x2; - fn arm_vhadd_u32(x: u32x2, y: u32x2) -> u32x2; - fn arm_vhaddq_s8(x: i8x16, y: i8x16) -> i8x16; - fn arm_vhaddq_u8(x: u8x16, y: u8x16) -> u8x16; - fn arm_vhaddq_s16(x: i16x8, y: i16x8) -> i16x8; - fn arm_vhaddq_u16(x: u16x8, y: u16x8) -> u16x8; - fn arm_vhaddq_s32(x: i32x4, y: i32x4) -> i32x4; - fn arm_vhaddq_u32(x: u32x4, y: u32x4) -> u32x4; - fn arm_vrhadd_s8(x: i8x8, y: i8x8) -> i8x8; - fn arm_vrhadd_u8(x: u8x8, y: u8x8) -> u8x8; - fn arm_vrhadd_s16(x: i16x4, y: i16x4) -> i16x4; - fn arm_vrhadd_u16(x: u16x4, y: u16x4) -> u16x4; - fn arm_vrhadd_s32(x: i32x2, y: i32x2) -> i32x2; - fn arm_vrhadd_u32(x: u32x2, y: u32x2) -> u32x2; - fn arm_vrhaddq_s8(x: i8x16, y: i8x16) -> i8x16; - fn arm_vrhaddq_u8(x: u8x16, y: u8x16) -> u8x16; - fn arm_vrhaddq_s16(x: i16x8, y: i16x8) -> i16x8; - fn arm_vrhaddq_u16(x: u16x8, y: u16x8) -> u16x8; - fn arm_vrhaddq_s32(x: i32x4, y: i32x4) -> i32x4; - fn arm_vrhaddq_u32(x: u32x4, y: u32x4) -> u32x4; - fn arm_vqadd_s8(x: i8x8, y: i8x8) -> i8x8; - fn arm_vqadd_u8(x: u8x8, y: u8x8) -> u8x8; - fn arm_vqadd_s16(x: i16x4, y: i16x4) -> i16x4; - fn arm_vqadd_u16(x: u16x4, y: u16x4) -> u16x4; - fn arm_vqadd_s32(x: i32x2, y: i32x2) -> i32x2; - fn arm_vqadd_u32(x: u32x2, y: u32x2) -> u32x2; - fn arm_vqadd_s64(x: i64x1, y: i64x1) -> i64x1; - fn arm_vqadd_u64(x: u64x1, y: u64x1) -> u64x1; - fn arm_vqaddq_s8(x: i8x16, y: i8x16) -> i8x16; - fn arm_vqaddq_u8(x: u8x16, y: u8x16) -> u8x16; - fn arm_vqaddq_s16(x: i16x8, y: i16x8) -> i16x8; - fn arm_vqaddq_u16(x: u16x8, y: u16x8) -> u16x8; - fn arm_vqaddq_s32(x: i32x4, y: i32x4) -> i32x4; - fn arm_vqaddq_u32(x: u32x4, y: u32x4) -> u32x4; - fn arm_vqaddq_s64(x: i64x2, y: i64x2) -> i64x2; - fn arm_vqaddq_u64(x: u64x2, y: u64x2) -> u64x2; - fn arm_vraddhn_s16(x: i16x8, y: i16x8) -> i8x8; - fn arm_vraddhn_u16(x: u16x8, y: u16x8) -> u8x8; - fn arm_vraddhn_s32(x: i32x4, y: i32x4) -> i16x4; - fn arm_vraddhn_u32(x: u32x4, y: u32x4) -> u16x4; - fn arm_vraddhn_s64(x: i64x2, y: i64x2) -> i32x2; - fn arm_vraddhn_u64(x: u64x2, y: u64x2) -> u32x2; - fn arm_vfma_f32(x: f32x2, y: f32x2) -> f32x2; - fn arm_vfmaq_f32(x: f32x4, y: f32x4) -> f32x4; - fn arm_vqdmulh_s16(x: i16x4, y: i16x4) -> i16x4; - fn arm_vqdmulh_s32(x: i32x2, y: i32x2) -> i32x2; - fn arm_vqdmulhq_s16(x: i16x8, y: i16x8) -> i16x8; - fn arm_vqdmulhq_s32(x: i32x4, y: i32x4) -> i32x4; - fn arm_vqrdmulh_s16(x: i16x4, y: i16x4) -> i16x4; - fn arm_vqrdmulh_s32(x: i32x2, y: i32x2) -> i32x2; - fn arm_vqrdmulhq_s16(x: i16x8, y: i16x8) -> i16x8; - fn arm_vqrdmulhq_s32(x: i32x4, y: i32x4) -> i32x4; - fn arm_vmull_s8(x: i8x8, y: i8x8) -> i16x8; - fn arm_vmull_u8(x: u8x8, y: u8x8) -> u16x8; - fn arm_vmull_s16(x: i16x4, y: i16x4) -> i32x4; - fn arm_vmull_u16(x: u16x4, y: u16x4) -> u32x4; - fn arm_vmull_s32(x: i32x2, y: i32x2) -> i64x2; - fn arm_vmull_u32(x: u32x2, y: u32x2) -> u64x2; - fn arm_vqdmullq_s8(x: i8x8, y: i8x8) -> i16x8; - fn arm_vqdmullq_s16(x: i16x4, y: i16x4) -> i32x4; - fn arm_vhsub_s8(x: i8x8, y: i8x8) -> i8x8; - fn arm_vhsub_u8(x: u8x8, y: u8x8) -> u8x8; - fn arm_vhsub_s16(x: i16x4, y: i16x4) -> i16x4; - fn arm_vhsub_u16(x: u16x4, y: u16x4) -> u16x4; - fn arm_vhsub_s32(x: i32x2, y: i32x2) -> i32x2; - fn arm_vhsub_u32(x: u32x2, y: u32x2) -> u32x2; - fn arm_vhsubq_s8(x: i8x16, y: i8x16) -> i8x16; - fn arm_vhsubq_u8(x: u8x16, y: u8x16) -> u8x16; - fn arm_vhsubq_s16(x: i16x8, y: i16x8) -> i16x8; - fn arm_vhsubq_u16(x: u16x8, y: u16x8) -> u16x8; - fn arm_vhsubq_s32(x: i32x4, y: i32x4) -> i32x4; - fn arm_vhsubq_u32(x: u32x4, y: u32x4) -> u32x4; - fn arm_vqsub_s8(x: i8x8, y: i8x8) -> i8x8; - fn arm_vqsub_u8(x: u8x8, y: u8x8) -> u8x8; - fn arm_vqsub_s16(x: i16x4, y: i16x4) -> i16x4; - fn arm_vqsub_u16(x: u16x4, y: u16x4) -> u16x4; - fn arm_vqsub_s32(x: i32x2, y: i32x2) -> i32x2; - fn arm_vqsub_u32(x: u32x2, y: u32x2) -> u32x2; - fn arm_vqsub_s64(x: i64x1, y: i64x1) -> i64x1; - fn arm_vqsub_u64(x: u64x1, y: u64x1) -> u64x1; - fn arm_vqsubq_s8(x: i8x16, y: i8x16) -> i8x16; - fn arm_vqsubq_u8(x: u8x16, y: u8x16) -> u8x16; - fn arm_vqsubq_s16(x: i16x8, y: i16x8) -> i16x8; - fn arm_vqsubq_u16(x: u16x8, y: u16x8) -> u16x8; - fn arm_vqsubq_s32(x: i32x4, y: i32x4) -> i32x4; - fn arm_vqsubq_u32(x: u32x4, y: u32x4) -> u32x4; - fn arm_vqsubq_s64(x: i64x2, y: i64x2) -> i64x2; - fn arm_vqsubq_u64(x: u64x2, y: u64x2) -> u64x2; - fn arm_vrsubhn_s16(x: i16x8, y: i16x8) -> i8x8; - fn arm_vrsubhn_u16(x: u16x8, y: u16x8) -> u8x8; - fn arm_vrsubhn_s32(x: i32x4, y: i32x4) -> i16x4; - fn arm_vrsubhn_u32(x: u32x4, y: u32x4) -> u16x4; - fn arm_vrsubhn_s64(x: i64x2, y: i64x2) -> i32x2; - fn arm_vrsubhn_u64(x: u64x2, y: u64x2) -> u32x2; - fn arm_vabd_s8(x: i8x8, y: i8x8) -> i8x8; - fn arm_vabd_u8(x: u8x8, y: u8x8) -> u8x8; - fn arm_vabd_s16(x: i16x4, y: i16x4) -> i16x4; - fn arm_vabd_u16(x: u16x4, y: u16x4) -> u16x4; - fn arm_vabd_s32(x: i32x2, y: i32x2) -> i32x2; - fn arm_vabd_u32(x: u32x2, y: u32x2) -> u32x2; - fn arm_vabd_f32(x: f32x2, y: f32x2) -> f32x2; - fn arm_vabdq_s8(x: i8x16, y: i8x16) -> i8x16; - fn arm_vabdq_u8(x: u8x16, y: u8x16) -> u8x16; - fn arm_vabdq_s16(x: i16x8, y: i16x8) -> i16x8; - fn arm_vabdq_u16(x: u16x8, y: u16x8) -> u16x8; - fn arm_vabdq_s32(x: i32x4, y: i32x4) -> i32x4; - fn arm_vabdq_u32(x: u32x4, y: u32x4) -> u32x4; - fn arm_vabdq_f32(x: f32x4, y: f32x4) -> f32x4; - fn arm_vmax_s8(x: i8x8, y: i8x8) -> i8x8; - fn arm_vmax_u8(x: u8x8, y: u8x8) -> u8x8; - fn arm_vmax_s16(x: i16x4, y: i16x4) -> i16x4; - fn arm_vmax_u16(x: u16x4, y: u16x4) -> u16x4; - fn arm_vmax_s32(x: i32x2, y: i32x2) -> i32x2; - fn arm_vmax_u32(x: u32x2, y: u32x2) -> u32x2; - fn arm_vmax_f32(x: f32x2, y: f32x2) -> f32x2; - fn arm_vmaxq_s8(x: i8x16, y: i8x16) -> i8x16; - fn arm_vmaxq_u8(x: u8x16, y: u8x16) -> u8x16; - fn arm_vmaxq_s16(x: i16x8, y: i16x8) -> i16x8; - fn arm_vmaxq_u16(x: u16x8, y: u16x8) -> u16x8; - fn arm_vmaxq_s32(x: i32x4, y: i32x4) -> i32x4; - fn arm_vmaxq_u32(x: u32x4, y: u32x4) -> u32x4; - fn arm_vmaxq_f32(x: f32x4, y: f32x4) -> f32x4; - fn arm_vmin_s8(x: i8x8, y: i8x8) -> i8x8; - fn arm_vmin_u8(x: u8x8, y: u8x8) -> u8x8; - fn arm_vmin_s16(x: i16x4, y: i16x4) -> i16x4; - fn arm_vmin_u16(x: u16x4, y: u16x4) -> u16x4; - fn arm_vmin_s32(x: i32x2, y: i32x2) -> i32x2; - fn arm_vmin_u32(x: u32x2, y: u32x2) -> u32x2; - fn arm_vmin_f32(x: f32x2, y: f32x2) -> f32x2; - fn arm_vminq_s8(x: i8x16, y: i8x16) -> i8x16; - fn arm_vminq_u8(x: u8x16, y: u8x16) -> u8x16; - fn arm_vminq_s16(x: i16x8, y: i16x8) -> i16x8; - fn arm_vminq_u16(x: u16x8, y: u16x8) -> u16x8; - fn arm_vminq_s32(x: i32x4, y: i32x4) -> i32x4; - fn arm_vminq_u32(x: u32x4, y: u32x4) -> u32x4; - fn arm_vminq_f32(x: f32x4, y: f32x4) -> f32x4; - fn arm_vshl_s8(x: i8x8, y: i8x8) -> i8x8; - fn arm_vshl_u8(x: u8x8, y: i8x8) -> u8x8; - fn arm_vshl_s16(x: i16x4, y: i16x4) -> i16x4; - fn arm_vshl_u16(x: u16x4, y: i16x4) -> u16x4; - fn arm_vshl_s32(x: i32x2, y: i32x2) -> i32x2; - fn arm_vshl_u32(x: u32x2, y: i32x2) -> u32x2; - fn arm_vshl_s64(x: i64x1, y: i64x1) -> i64x1; - fn arm_vshl_u64(x: u64x1, y: i64x1) -> u64x1; - fn arm_vshlq_s8(x: i8x16, y: i8x16) -> i8x16; - fn arm_vshlq_u8(x: u8x16, y: i8x16) -> u8x16; - fn arm_vshlq_s16(x: i16x8, y: i16x8) -> i16x8; - fn arm_vshlq_u16(x: u16x8, y: i16x8) -> u16x8; - fn arm_vshlq_s32(x: i32x4, y: i32x4) -> i32x4; - fn arm_vshlq_u32(x: u32x4, y: i32x4) -> u32x4; - fn arm_vshlq_s64(x: i64x2, y: i64x2) -> i64x2; - fn arm_vshlq_u64(x: u64x2, y: i64x2) -> u64x2; - fn arm_vqshl_s8(x: i8x8, y: i8x8) -> i8x8; - fn arm_vqshl_u8(x: u8x8, y: i8x8) -> u8x8; - fn arm_vqshl_s16(x: i16x4, y: i16x4) -> i16x4; - fn arm_vqshl_u16(x: u16x4, y: i16x4) -> u16x4; - fn arm_vqshl_s32(x: i32x2, y: i32x2) -> i32x2; - fn arm_vqshl_u32(x: u32x2, y: i32x2) -> u32x2; - fn arm_vqshl_s64(x: i64x1, y: i64x1) -> i64x1; - fn arm_vqshl_u64(x: u64x1, y: i64x1) -> u64x1; - fn arm_vqshlq_s8(x: i8x16, y: i8x16) -> i8x16; - fn arm_vqshlq_u8(x: u8x16, y: i8x16) -> u8x16; - fn arm_vqshlq_s16(x: i16x8, y: i16x8) -> i16x8; - fn arm_vqshlq_u16(x: u16x8, y: i16x8) -> u16x8; - fn arm_vqshlq_s32(x: i32x4, y: i32x4) -> i32x4; - fn arm_vqshlq_u32(x: u32x4, y: i32x4) -> u32x4; - fn arm_vqshlq_s64(x: i64x2, y: i64x2) -> i64x2; - fn arm_vqshlq_u64(x: u64x2, y: i64x2) -> u64x2; - fn arm_vrshl_s8(x: i8x8, y: i8x8) -> i8x8; - fn arm_vrshl_u8(x: u8x8, y: i8x8) -> u8x8; - fn arm_vrshl_s16(x: i16x4, y: i16x4) -> i16x4; - fn arm_vrshl_u16(x: u16x4, y: i16x4) -> u16x4; - fn arm_vrshl_s32(x: i32x2, y: i32x2) -> i32x2; - fn arm_vrshl_u32(x: u32x2, y: i32x2) -> u32x2; - fn arm_vrshl_s64(x: i64x1, y: i64x1) -> i64x1; - fn arm_vrshl_u64(x: u64x1, y: i64x1) -> u64x1; - fn arm_vrshlq_s8(x: i8x16, y: i8x16) -> i8x16; - fn arm_vrshlq_u8(x: u8x16, y: i8x16) -> u8x16; - fn arm_vrshlq_s16(x: i16x8, y: i16x8) -> i16x8; - fn arm_vrshlq_u16(x: u16x8, y: i16x8) -> u16x8; - fn arm_vrshlq_s32(x: i32x4, y: i32x4) -> i32x4; - fn arm_vrshlq_u32(x: u32x4, y: i32x4) -> u32x4; - fn arm_vrshlq_s64(x: i64x2, y: i64x2) -> i64x2; - fn arm_vrshlq_u64(x: u64x2, y: i64x2) -> u64x2; - fn arm_vqrshl_s8(x: i8x8, y: i8x8) -> i8x8; - fn arm_vqrshl_u8(x: u8x8, y: i8x8) -> u8x8; - fn arm_vqrshl_s16(x: i16x4, y: i16x4) -> i16x4; - fn arm_vqrshl_u16(x: u16x4, y: i16x4) -> u16x4; - fn arm_vqrshl_s32(x: i32x2, y: i32x2) -> i32x2; - fn arm_vqrshl_u32(x: u32x2, y: i32x2) -> u32x2; - fn arm_vqrshl_s64(x: i64x1, y: i64x1) -> i64x1; - fn arm_vqrshl_u64(x: u64x1, y: i64x1) -> u64x1; - fn arm_vqrshlq_s8(x: i8x16, y: i8x16) -> i8x16; - fn arm_vqrshlq_u8(x: u8x16, y: i8x16) -> u8x16; - fn arm_vqrshlq_s16(x: i16x8, y: i16x8) -> i16x8; - fn arm_vqrshlq_u16(x: u16x8, y: i16x8) -> u16x8; - fn arm_vqrshlq_s32(x: i32x4, y: i32x4) -> i32x4; - fn arm_vqrshlq_u32(x: u32x4, y: i32x4) -> u32x4; - fn arm_vqrshlq_s64(x: i64x2, y: i64x2) -> i64x2; - fn arm_vqrshlq_u64(x: u64x2, y: i64x2) -> u64x2; - fn arm_vqshrun_n_s16(x: i16x8, y: u32) -> i8x8; - fn arm_vqshrun_n_s32(x: i32x4, y: u32) -> i16x4; - fn arm_vqshrun_n_s64(x: i64x2, y: u32) -> i32x2; - fn arm_vqrshrun_n_s16(x: i16x8, y: u32) -> i8x8; - fn arm_vqrshrun_n_s32(x: i32x4, y: u32) -> i16x4; - fn arm_vqrshrun_n_s64(x: i64x2, y: u32) -> i32x2; - fn arm_vqshrn_n_s16(x: i16x8, y: u32) -> i8x8; - fn arm_vqshrn_n_u16(x: u16x8, y: u32) -> u8x8; - fn arm_vqshrn_n_s32(x: i32x4, y: u32) -> i16x4; - fn arm_vqshrn_n_u32(x: u32x4, y: u32) -> u16x4; - fn arm_vqshrn_n_s64(x: i64x2, y: u32) -> i32x2; - fn arm_vqshrn_n_u64(x: u64x2, y: u32) -> u32x2; - fn arm_vrshrn_n_s16(x: i16x8, y: u32) -> i8x8; - fn arm_vrshrn_n_u16(x: u16x8, y: u32) -> u8x8; - fn arm_vrshrn_n_s32(x: i32x4, y: u32) -> i16x4; - fn arm_vrshrn_n_u32(x: u32x4, y: u32) -> u16x4; - fn arm_vrshrn_n_s64(x: i64x2, y: u32) -> i32x2; - fn arm_vrshrn_n_u64(x: u64x2, y: u32) -> u32x2; - fn arm_vqrshrn_n_s16(x: i16x8, y: u32) -> i8x8; - fn arm_vqrshrn_n_u16(x: u16x8, y: u32) -> u8x8; - fn arm_vqrshrn_n_s32(x: i32x4, y: u32) -> i16x4; - fn arm_vqrshrn_n_u32(x: u32x4, y: u32) -> u16x4; - fn arm_vqrshrn_n_s64(x: i64x2, y: u32) -> i32x2; - fn arm_vqrshrn_n_u64(x: u64x2, y: u32) -> u32x2; - fn arm_vsri_s8(x: i8x8, y: i8x8) -> i8x8; - fn arm_vsri_u8(x: u8x8, y: u8x8) -> u8x8; - fn arm_vsri_s16(x: i16x4, y: i16x4) -> i16x4; - fn arm_vsri_u16(x: u16x4, y: u16x4) -> u16x4; - fn arm_vsri_s32(x: i32x2, y: i32x2) -> i32x2; - fn arm_vsri_u32(x: u32x2, y: u32x2) -> u32x2; - fn arm_vsri_s64(x: i64x1, y: i64x1) -> i64x1; - fn arm_vsri_u64(x: u64x1, y: u64x1) -> u64x1; - fn arm_vsriq_s8(x: i8x16, y: i8x16) -> i8x16; - fn arm_vsriq_u8(x: u8x16, y: u8x16) -> u8x16; - fn arm_vsriq_s16(x: i16x8, y: i16x8) -> i16x8; - fn arm_vsriq_u16(x: u16x8, y: u16x8) -> u16x8; - fn arm_vsriq_s32(x: i32x4, y: i32x4) -> i32x4; - fn arm_vsriq_u32(x: u32x4, y: u32x4) -> u32x4; - fn arm_vsriq_s64(x: i64x2, y: i64x2) -> i64x2; - fn arm_vsriq_u64(x: u64x2, y: u64x2) -> u64x2; - fn arm_vsli_s8(x: i8x8, y: i8x8) -> i8x8; - fn arm_vsli_u8(x: u8x8, y: u8x8) -> u8x8; - fn arm_vsli_s16(x: i16x4, y: i16x4) -> i16x4; - fn arm_vsli_u16(x: u16x4, y: u16x4) -> u16x4; - fn arm_vsli_s32(x: i32x2, y: i32x2) -> i32x2; - fn arm_vsli_u32(x: u32x2, y: u32x2) -> u32x2; - fn arm_vsli_s64(x: i64x1, y: i64x1) -> i64x1; - fn arm_vsli_u64(x: u64x1, y: u64x1) -> u64x1; - fn arm_vsliq_s8(x: i8x16, y: i8x16) -> i8x16; - fn arm_vsliq_u8(x: u8x16, y: u8x16) -> u8x16; - fn arm_vsliq_s16(x: i16x8, y: i16x8) -> i16x8; - fn arm_vsliq_u16(x: u16x8, y: u16x8) -> u16x8; - fn arm_vsliq_s32(x: i32x4, y: i32x4) -> i32x4; - fn arm_vsliq_u32(x: u32x4, y: u32x4) -> u32x4; - fn arm_vsliq_s64(x: i64x2, y: i64x2) -> i64x2; - fn arm_vsliq_u64(x: u64x2, y: u64x2) -> u64x2; - fn arm_vvqmovn_s16(x: i16x8) -> i8x8; - fn arm_vvqmovn_u16(x: u16x8) -> u8x8; - fn arm_vvqmovn_s32(x: i32x4) -> i16x4; - fn arm_vvqmovn_u32(x: u32x4) -> u16x4; - fn arm_vvqmovn_s64(x: i64x2) -> i32x2; - fn arm_vvqmovn_u64(x: u64x2) -> u32x2; - fn arm_vabs_s8(x: i8x8) -> i8x8; - fn arm_vabs_s16(x: i16x4) -> i16x4; - fn arm_vabs_s32(x: i32x2) -> i32x2; - fn arm_vabsq_s8(x: i8x16) -> i8x16; - fn arm_vabsq_s16(x: i16x8) -> i16x8; - fn arm_vabsq_s32(x: i32x4) -> i32x4; - fn arm_vabs_f32(x: f32x2) -> f32x2; - fn arm_vabsq_f32(x: f32x4) -> f32x4; - fn arm_vqabs_s8(x: i8x8) -> i8x8; - fn arm_vqabs_s16(x: i16x4) -> i16x4; - fn arm_vqabs_s32(x: i32x2) -> i32x2; - fn arm_vqabsq_s8(x: i8x16) -> i8x16; - fn arm_vqabsq_s16(x: i16x8) -> i16x8; - fn arm_vqabsq_s32(x: i32x4) -> i32x4; - fn arm_vqneg_s8(x: i8x8) -> i8x8; - fn arm_vqneg_s16(x: i16x4) -> i16x4; - fn arm_vqneg_s32(x: i32x2) -> i32x2; - fn arm_vqnegq_s8(x: i8x16) -> i8x16; - fn arm_vqnegq_s16(x: i16x8) -> i16x8; - fn arm_vqnegq_s32(x: i32x4) -> i32x4; - fn arm_vclz_s8(x: i8x8) -> i8x8; - fn arm_vclz_u8(x: u8x8) -> u8x8; - fn arm_vclz_s16(x: i16x4) -> i16x4; - fn arm_vclz_u16(x: u16x4) -> u16x4; - fn arm_vclz_s32(x: i32x2) -> i32x2; - fn arm_vclz_u32(x: u32x2) -> u32x2; - fn arm_vclzq_s8(x: i8x16) -> i8x16; - fn arm_vclzq_u8(x: u8x16) -> u8x16; - fn arm_vclzq_s16(x: i16x8) -> i16x8; - fn arm_vclzq_u16(x: u16x8) -> u16x8; - fn arm_vclzq_s32(x: i32x4) -> i32x4; - fn arm_vclzq_u32(x: u32x4) -> u32x4; - fn arm_vcls_s8(x: i8x8) -> i8x8; - fn arm_vcls_u8(x: u8x8) -> u8x8; - fn arm_vcls_s16(x: i16x4) -> i16x4; - fn arm_vcls_u16(x: u16x4) -> u16x4; - fn arm_vcls_s32(x: i32x2) -> i32x2; - fn arm_vcls_u32(x: u32x2) -> u32x2; - fn arm_vclsq_s8(x: i8x16) -> i8x16; - fn arm_vclsq_u8(x: u8x16) -> u8x16; - fn arm_vclsq_s16(x: i16x8) -> i16x8; - fn arm_vclsq_u16(x: u16x8) -> u16x8; - fn arm_vclsq_s32(x: i32x4) -> i32x4; - fn arm_vclsq_u32(x: u32x4) -> u32x4; - fn arm_vcnt_s8(x: i8x8) -> i8x8; - fn arm_vcnt_u8(x: u8x8) -> u8x8; - fn arm_vcntq_s8(x: i8x16) -> i8x16; - fn arm_vcntq_u8(x: u8x16) -> u8x16; - fn arm_vrecpe_u32(x: u32x2) -> u32x2; - fn arm_vrecpe_f32(x: f32x2) -> f32x2; - fn arm_vrecpeq_u32(x: u32x4) -> u32x4; - fn arm_vrecpeq_f32(x: f32x4) -> f32x4; - fn arm_vrecps_f32(x: f32x2, y: f32x2) -> f32x2; - fn arm_vrecpsq_f32(x: f32x4, y: f32x4) -> f32x4; - fn arm_vsqrt_f32(x: f32x2) -> f32x2; - fn arm_vsqrtq_f32(x: f32x4) -> f32x4; - fn arm_vrsqrte_u32(x: u32x2) -> u32x2; - fn arm_vrsqrte_f32(x: f32x2) -> f32x2; - fn arm_vrsqrteq_u32(x: u32x4) -> u32x4; - fn arm_vrsqrteq_f32(x: f32x4) -> f32x4; - fn arm_vrsqrts_f32(x: f32x2, y: f32x2) -> f32x2; - fn arm_vrsqrtsq_f32(x: f32x4, y: f32x4) -> f32x4; - fn arm_vbsl_s8(x: u8x8, y: i8x8) -> i8x8; - fn arm_vbsl_u8(x: u8x8, y: u8x8) -> u8x8; - fn arm_vbsl_s16(x: u16x4, y: i16x4) -> i16x4; - fn arm_vbsl_u16(x: u16x4, y: u16x4) -> u16x4; - fn arm_vbsl_s32(x: u32x2, y: i32x2) -> i32x2; - fn arm_vbsl_u32(x: u32x2, y: u32x2) -> u32x2; - fn arm_vbsl_s64(x: u64x1, y: i64x1) -> i64x1; - fn arm_vbsl_u64(x: u64x1, y: u64x1) -> u64x1; - fn arm_vbslq_s8(x: u8x16, y: i8x16) -> i8x16; - fn arm_vbslq_u8(x: u8x16, y: u8x16) -> u8x16; - fn arm_vbslq_s16(x: u16x8, y: i16x8) -> i16x8; - fn arm_vbslq_u16(x: u16x8, y: u16x8) -> u16x8; - fn arm_vbslq_s32(x: u32x4, y: i32x4) -> i32x4; - fn arm_vbslq_u32(x: u32x4, y: u32x4) -> u32x4; - fn arm_vbslq_s64(x: u64x2, y: i64x2) -> i64x2; - fn arm_vbslq_u64(x: u64x2, y: u64x2) -> u64x2; - fn arm_vpadd_s8(x: i8x8, y: i8x8) -> i8x8; - fn arm_vpadd_u8(x: u8x8, y: u8x8) -> u8x8; - fn arm_vpadd_s16(x: i16x4, y: i16x4) -> i16x4; - fn arm_vpadd_u16(x: u16x4, y: u16x4) -> u16x4; - fn arm_vpadd_s32(x: i32x2, y: i32x2) -> i32x2; - fn arm_vpadd_u32(x: u32x2, y: u32x2) -> u32x2; - fn arm_vpadd_f32(x: f32x2, y: f32x2) -> f32x2; - fn arm_vpaddl_s16(x: i8x8) -> i16x4; - fn arm_vpaddl_u16(x: u8x8) -> u16x4; - fn arm_vpaddl_s32(x: i16x4) -> i32x2; - fn arm_vpaddl_u32(x: u16x4) -> u32x2; - fn arm_vpaddl_s64(x: i32x2) -> i64x1; - fn arm_vpaddl_u64(x: u32x2) -> u64x1; - fn arm_vpaddlq_s16(x: i8x16) -> i16x8; - fn arm_vpaddlq_u16(x: u8x16) -> u16x8; - fn arm_vpaddlq_s32(x: i16x8) -> i32x4; - fn arm_vpaddlq_u32(x: u16x8) -> u32x4; - fn arm_vpaddlq_s64(x: i32x4) -> i64x2; - fn arm_vpaddlq_u64(x: u32x4) -> u64x2; - fn arm_vpadal_s16(x: i16x4, y: i8x8) -> i16x4; - fn arm_vpadal_u16(x: u16x4, y: u8x8) -> u16x4; - fn arm_vpadal_s32(x: i32x2, y: i16x4) -> i32x2; - fn arm_vpadal_u32(x: u32x2, y: u16x4) -> u32x2; - fn arm_vpadal_s64(x: i64x1, y: i32x2) -> i64x1; - fn arm_vpadal_u64(x: u64x1, y: u32x2) -> u64x1; - fn arm_vpadalq_s16(x: i16x8, y: i8x16) -> i16x8; - fn arm_vpadalq_u16(x: u16x8, y: u8x16) -> u16x8; - fn arm_vpadalq_s32(x: i32x4, y: i16x8) -> i32x4; - fn arm_vpadalq_u32(x: u32x4, y: u16x8) -> u32x4; - fn arm_vpadalq_s64(x: i64x2, y: i32x4) -> i64x2; - fn arm_vpadalq_u64(x: u64x2, y: u32x4) -> u64x2; - fn arm_vpmax_s8(x: i8x8, y: i8x8) -> i8x8; - fn arm_vpmax_u8(x: u8x8, y: u8x8) -> u8x8; - fn arm_vpmax_s16(x: i16x4, y: i16x4) -> i16x4; - fn arm_vpmax_u16(x: u16x4, y: u16x4) -> u16x4; - fn arm_vpmax_s32(x: i32x2, y: i32x2) -> i32x2; - fn arm_vpmax_u32(x: u32x2, y: u32x2) -> u32x2; - fn arm_vpmax_f32(x: f32x2, y: f32x2) -> f32x2; - fn arm_vpmin_s8(x: i8x8, y: i8x8) -> i8x8; - fn arm_vpmin_u8(x: u8x8, y: u8x8) -> u8x8; - fn arm_vpmin_s16(x: i16x4, y: i16x4) -> i16x4; - fn arm_vpmin_u16(x: u16x4, y: u16x4) -> u16x4; - fn arm_vpmin_s32(x: i32x2, y: i32x2) -> i32x2; - fn arm_vpmin_u32(x: u32x2, y: u32x2) -> u32x2; - fn arm_vpmin_f32(x: f32x2, y: f32x2) -> f32x2; - fn arm_vpminq_s8(x: i8x16, y: i8x16) -> i8x16; - fn arm_vpminq_u8(x: u8x16, y: u8x16) -> u8x16; - fn arm_vpminq_s16(x: i16x8, y: i16x8) -> i16x8; - fn arm_vpminq_u16(x: u16x8, y: u16x8) -> u16x8; - fn arm_vpminq_s32(x: i32x4, y: i32x4) -> i32x4; - fn arm_vpminq_u32(x: u32x4, y: u32x4) -> u32x4; - fn arm_vpminq_f32(x: f32x4, y: f32x4) -> f32x4; - fn arm_vtbl1_s8(x: i8x8, y: u8x8) -> i8x8; - fn arm_vtbl1_u8(x: u8x8, y: u8x8) -> u8x8; - fn arm_vtbx1_s8(x: i8x8, y: i8x8, z: u8x8) -> i8x8; - fn arm_vtbx1_u8(x: u8x8, y: u8x8, z: u8x8) -> u8x8; - fn arm_vtbl2_s8(x: (i8x8, i8x8), y: u8x8) -> i8x8; - fn arm_vtbl2_u8(x: (u8x8, u8x8), y: u8x8) -> u8x8; - fn arm_vtbx2_s8(x: (i8x8, i8x8), y: u8x8) -> i8x8; - fn arm_vtbx2_u8(x: (u8x8, u8x8), y: u8x8) -> u8x8; - fn arm_vtbl3_s8(x: (i8x8, i8x8, i8x8), y: u8x8) -> i8x8; - fn arm_vtbl3_u8(x: (u8x8, u8x8, u8x8), y: u8x8) -> u8x8; - fn arm_vtbx3_s8(x: i8x8, y: (i8x8, i8x8, i8x8), z: u8x8) -> i8x8; - fn arm_vtbx3_u8(x: u8x8, y: (u8x8, u8x8, u8x8), z: u8x8) -> u8x8; - fn arm_vtbl4_s8(x: (i8x8, i8x8, i8x8, i8x8), y: u8x8) -> i8x8; - fn arm_vtbl4_u8(x: (u8x8, u8x8, u8x8, u8x8), y: u8x8) -> u8x8; - fn arm_vtbx4_s8(x: i8x8, y: (i8x8, i8x8, i8x8, i8x8), z: u8x8) -> i8x8; - fn arm_vtbx4_u8(x: u8x8, y: (u8x8, u8x8, u8x8, u8x8), z: u8x8) -> u8x8; -} - - -impl u8x8 { - #[inline] - pub fn table_lookup_1(self, t0: u8x8) -> u8x8 { - unsafe {arm_vtbl1_u8(t0, self)} - } - #[inline] - pub fn table_lookup_2(self, t0: u8x8, t1: u8x8) -> u8x8 { - unsafe {arm_vtbl2_u8((t0, t1), self)} - } - #[inline] - pub fn table_lookup_3(self, t0: u8x8, t1: u8x8, t2: u8x8) -> u8x8 { - unsafe {arm_vtbl3_u8((t0, t1, t2), self)} - } - #[inline] - pub fn table_lookup_4(self, t0: u8x8, t1: u8x8, t2: u8x8, t3: u8x8) -> u8x8 { - unsafe {arm_vtbl4_u8((t0, t1, t2, t3), self)} - } -} - -#[doc(hidden)] -pub mod common { - use super::super::super::*; - use super::*; - use core::mem; - - #[inline] - pub fn f32x4_sqrt(x: f32x4) -> f32x4 { - unsafe {super::arm_vsqrtq_f32(x)} - } - #[inline] - pub fn f32x4_approx_rsqrt(x: f32x4) -> f32x4 { - unsafe {super::arm_vrsqrteq_f32(x)} - } - #[inline] - pub fn f32x4_approx_reciprocal(x: f32x4) -> f32x4 { - unsafe {super::arm_vrecpeq_f32(x)} - } - #[inline] - pub fn f32x4_max(x: f32x4, y: f32x4) -> f32x4 { - unsafe {super::arm_vmaxq_f32(x, y)} - } - #[inline] - pub fn f32x4_min(x: f32x4, y: f32x4) -> f32x4 { - unsafe {super::arm_vminq_f32(x, y)} - } - - macro_rules! bools { - ($($ty: ty, $as_u: ty, $shuffle_fn: ident, $lo_idxs: expr, $hi_idxs: expr, $all: ident ($min: ident), $any: ident ($max: ident);)*) => { - $( - #[inline] - pub fn $all(x: $ty) -> bool { - unsafe { - let t: $as_u = bitcast(x); - let lo = $shuffle_fn(t, t, $lo_idxs); - let hi = $shuffle_fn(t, t, $hi_idxs); - let x = super::$min(lo, hi); - let y = super::$min(x, mem::uninitialized()); - let y32: u32x2 = bitcast(y); - y32.0 == 0xFFFFFFFF - } - } - #[inline] - pub fn $any(x: $ty) -> bool { - unsafe { - let t: $as_u = bitcast(x); - let lo = $shuffle_fn(t, t, $lo_idxs); - let hi = $shuffle_fn(t, t, $hi_idxs); - let x = super::$max(lo, hi); - let y = super::$max(x, mem::uninitialized()); - let y32: u32x2 = bitcast(y); - y32.0 != 0 - } - } - )* - } - } - - bools! { - bool32fx4, u32x4, simd_shuffle2, [0, 1], [2, 3], bool32fx4_all(arm_vpmin_u32), bool32fx4_any(arm_vpmax_u32); - bool8ix16, u8x16, simd_shuffle8, [0, 1, 2, 3, 4, 5, 6, 7], [8, 9, 10, 11, 12, 13, 14, 15], bool8ix16_all(arm_vpmin_u8), bool8ix16_any(arm_vpmax_u8); - bool16ix8, u16x8, simd_shuffle4, [0, 1, 2, 3], [4, 5, 6, 7], bool16ix8_all(arm_vpmin_u16), bool16ix8_any(arm_vpmax_u16); - bool32ix4, u32x4, simd_shuffle2, [0, 1], [2, 3], bool32ix4_all(arm_vpmin_u32), bool32ix4_any(arm_vpmax_u32); - } -} diff --git a/third_party/rust/simd/src/common.rs b/third_party/rust/simd/src/common.rs deleted file mode 100644 index 1052ae36959d..000000000000 --- a/third_party/rust/simd/src/common.rs +++ /dev/null @@ -1,520 +0,0 @@ -use super::*; -#[allow(unused_imports)] -use super::{ - simd_eq, simd_ne, simd_lt, simd_le, simd_gt, simd_ge, - simd_shuffle2, simd_shuffle4, simd_shuffle8, simd_shuffle16, - simd_insert, simd_extract, - simd_cast, - simd_add, simd_sub, simd_mul, simd_div, simd_shl, simd_shr, simd_and, simd_or, simd_xor, - - Unalign, bitcast, -}; -use core::{mem,ops}; - -#[cfg(any(target_arch = "x86", - target_arch = "x86_64"))] -use x86::sse2::common; -#[cfg(any(target_arch = "arm"))] -use arm::neon::common; -#[cfg(any(target_arch = "aarch64"))] -use aarch64::neon::common; - -macro_rules! basic_impls { - ($( - $name: ident: - $elem: ident, $bool: ident, $shuffle: ident, $length: expr, $($first: ident),* | $($last: ident),*; - )*) => { - $(impl $name { - /// Create a new instance. - #[inline] - pub const fn new($($first: $elem),*, $($last: $elem),*) -> $name { - $name($($first),*, $($last),*) - } - - /// Create a new instance where every lane has value `x`. - #[inline] - pub const fn splat(x: $elem) -> $name { - $name($({ #[allow(dead_code)] struct $first; x }),*, - $({ #[allow(dead_code)] struct $last; x }),*) - } - - /// Compare for equality. - #[inline] - pub fn eq(self, other: Self) -> $bool { - unsafe {simd_eq(self, other)} - } - /// Compare for equality. - #[inline] - pub fn ne(self, other: Self) -> $bool { - unsafe {simd_ne(self, other)} - } - /// Compare for equality. - #[inline] - pub fn lt(self, other: Self) -> $bool { - unsafe {simd_lt(self, other)} - } - /// Compare for equality. - #[inline] - pub fn le(self, other: Self) -> $bool { - unsafe {simd_le(self, other)} - } - /// Compare for equality. - #[inline] - pub fn gt(self, other: Self) -> $bool { - unsafe {simd_gt(self, other)} - } - /// Compare for equality. - #[inline] - pub fn ge(self, other: Self) -> $bool { - unsafe {simd_ge(self, other)} - } - - /// Extract the value of the `idx`th lane of `self`. - /// - /// # Panics - /// - /// `extract` will panic if `idx` is out of bounds. - #[inline] - pub fn extract(self, idx: u32) -> $elem { - assert!(idx < $length); - unsafe {simd_extract(self, idx)} - } - /// Return a new vector where the `idx`th lane is replaced - /// by `elem`. - /// - /// # Panics - /// - /// `replace` will panic if `idx` is out of bounds. - #[inline] - pub fn replace(self, idx: u32, elem: $elem) -> Self { - assert!(idx < $length); - unsafe {simd_insert(self, idx, elem)} - } - - /// Load a new value from the `idx`th position of `array`. - /// - /// This is equivalent to the following, but is possibly - /// more efficient: - /// - /// ```rust,ignore - /// Self::new(array[idx], array[idx + 1], ...) - /// ``` - /// - /// # Panics - /// - /// `load` will panic if `idx` is out of bounds in - /// `array`, or if `array[idx..]` is too short. - #[inline] - pub fn load(array: &[$elem], idx: usize) -> Self { - let data = &array[idx..idx + $length]; - let loaded = unsafe { - *(data.as_ptr() as *const Unalign) - }; - loaded.0 - } - - /// Store the elements of `self` to `array`, starting at - /// the `idx`th position. - /// - /// This is equivalent to the following, but is possibly - /// more efficient: - /// - /// ```rust,ignore - /// array[i] = self.extract(0); - /// array[i + 1] = self.extract(1); - /// // ... - /// ``` - /// - /// # Panics - /// - /// `store` will panic if `idx` is out of bounds in - /// `array`, or if `array[idx...]` is too short. - #[inline] - pub fn store(self, array: &mut [$elem], idx: usize) { - let place = &mut array[idx..idx + $length]; - unsafe { - *(place.as_mut_ptr() as *mut Unalign) = Unalign(self) - } - } - })* - } -} - -basic_impls! { - u32x4: u32, bool32ix4, simd_shuffle4, 4, x0, x1 | x2, x3; - i32x4: i32, bool32ix4, simd_shuffle4, 4, x0, x1 | x2, x3; - f32x4: f32, bool32fx4, simd_shuffle4, 4, x0, x1 | x2, x3; - - u16x8: u16, bool16ix8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7; - i16x8: i16, bool16ix8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7; - - u8x16: u8, bool8ix16, simd_shuffle16, 16, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15; - i8x16: i8, bool8ix16, simd_shuffle16, 16, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15; -} - -macro_rules! bool_impls { - ($( - $name: ident: - $elem: ident, $repr: ident, $repr_elem: ident, $length: expr, $all: ident, $any: ident, - $($first: ident),* | $($last: ident),* - [$(#[$cvt_meta: meta] $cvt: ident -> $cvt_to: ident),*]; - )*) => { - $(impl $name { - /// Convert to integer representation. - #[inline] - pub fn to_repr(self) -> $repr { - unsafe {mem::transmute(self)} - } - /// Convert from integer representation. - #[inline] - #[inline] - pub fn from_repr(x: $repr) -> Self { - unsafe {mem::transmute(x)} - } - - /// Create a new instance. - #[inline] - pub fn new($($first: bool),*, $($last: bool),*) -> $name { - unsafe { - // negate everything together - simd_sub($name::splat(false), - $name($( ($first as $repr_elem) ),*, - $( ($last as $repr_elem) ),*)) - } - } - - /// Create a new instance where every lane has value `x`. - #[allow(unused_variables)] - #[inline] - pub fn splat(x: bool) -> $name { - let x = if x {!(0 as $repr_elem)} else {0}; - $name($({ let $first = (); x}),*, - $({ let $last = (); x}),*) - } - - /// Extract the value of the `idx`th lane of `self`. - /// - /// # Panics - /// - /// `extract` will panic if `idx` is out of bounds. - #[inline] - pub fn extract(self, idx: u32) -> bool { - assert!(idx < $length); - unsafe {simd_extract(self.to_repr(), idx) != 0} - } - /// Return a new vector where the `idx`th lane is replaced - /// by `elem`. - /// - /// # Panics - /// - /// `replace` will panic if `idx` is out of bounds. - #[inline] - pub fn replace(self, idx: u32, elem: bool) -> Self { - assert!(idx < $length); - let x = if elem {!(0 as $repr_elem)} else {0}; - unsafe {Self::from_repr(simd_insert(self.to_repr(), idx, x))} - } - /// Select between elements of `then` and `else_`, based on - /// the corresponding element of `self`. - /// - /// This is equivalent to the following, but is possibly - /// more efficient: - /// - /// ```rust,ignore - /// T::new(if self.extract(0) { then.extract(0) } else { else_.extract(0) }, - /// if self.extract(1) { then.extract(1) } else { else_.extract(1) }, - /// ...) - /// ``` - #[inline] - pub fn select>(self, then: T, else_: T) -> T { - let then: $repr = bitcast(then); - let else_: $repr = bitcast(else_); - bitcast((then & self.to_repr()) | (else_ & (!self).to_repr())) - } - - /// Check if every element of `self` is true. - /// - /// This is equivalent to the following, but is possibly - /// more efficient: - /// - /// ```rust,ignore - /// self.extract(0) && self.extract(1) && ... - /// ``` - #[inline] - pub fn all(self) -> bool { - common::$all(self) - } - /// Check if any element of `self` is true. - /// - /// This is equivalent to the following, but is possibly - /// more efficient: - /// - /// ```rust,ignore - /// self.extract(0) || self.extract(1) || ... - /// ``` - #[inline] - pub fn any(self) -> bool { - common::$any(self) - } - - $( - #[$cvt_meta] - #[inline] - pub fn $cvt(self) -> $cvt_to { - bitcast(self) - } - )* - } - impl ops::Not for $name { - type Output = Self; - - #[inline] - fn not(self) -> Self { - Self::from_repr($repr::splat(!(0 as $repr_elem)) ^ self.to_repr()) - } - } - )* - } -} - -bool_impls! { - bool32ix4: bool32i, i32x4, i32, 4, bool32ix4_all, bool32ix4_any, x0, x1 | x2, x3 - [/// Convert `self` to a boolean vector for interacting with floating point vectors. - to_f -> bool32fx4]; - bool32fx4: bool32f, i32x4, i32, 4, bool32fx4_all, bool32fx4_any, x0, x1 | x2, x3 - [/// Convert `self` to a boolean vector for interacting with integer vectors. - to_i -> bool32ix4]; - - bool16ix8: bool16i, i16x8, i16, 8, bool16ix8_all, bool16ix8_any, x0, x1, x2, x3 | x4, x5, x6, x7 []; - - bool8ix16: bool8i, i8x16, i8, 16, bool8ix16_all, bool8ix16_any, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15 []; -} - -impl u32x4 { - /// Convert each lane to a signed integer. - #[inline] - pub fn to_i32(self) -> i32x4 { - unsafe {simd_cast(self)} - } - /// Convert each lane to a 32-bit float. - #[inline] - pub fn to_f32(self) -> f32x4 { - unsafe {simd_cast(self)} - } -} -impl i32x4 { - /// Convert each lane to an unsigned integer. - #[inline] - pub fn to_u32(self) -> u32x4 { - unsafe {simd_cast(self)} - } - /// Convert each lane to a 32-bit float. - #[inline] - pub fn to_f32(self) -> f32x4 { - unsafe {simd_cast(self)} - } -} -impl f32x4 { - /// Compute the square root of each lane. - #[inline] - pub fn sqrt(self) -> Self { - common::f32x4_sqrt(self) - } - /// Compute an approximation to the reciprocal of the square root - /// of `self`, that is, `f32::splat(1.0) / self.sqrt()`. - /// - /// The accuracy of this approximation is platform dependent. - #[inline] - pub fn approx_rsqrt(self) -> Self { - common::f32x4_approx_rsqrt(self) - } - /// Compute an approximation to the reciprocal of `self`, that is, - /// `f32::splat(1.0) / self`. - /// - /// The accuracy of this approximation is platform dependent. - #[inline] - pub fn approx_reciprocal(self) -> Self { - common::f32x4_approx_reciprocal(self) - } - /// Compute the lane-wise maximum of `self` and `other`. - /// - /// This is equivalent to the following, but is possibly more - /// efficient: - /// - /// ```rust,ignore - /// f32x4::new(self.extract(0).max(other.extract(0)), - /// self.extract(1).max(other.extract(1)), - /// ...) - /// ``` - #[inline] - pub fn max(self, other: Self) -> Self { - common::f32x4_max(self, other) - } - /// Compute the lane-wise minimum of `self` and `other`. - /// - /// This is equivalent to the following, but is possibly more - /// efficient: - /// - /// ```rust,ignore - /// f32x4::new(self.extract(0).min(other.extract(0)), - /// self.extract(1).min(other.extract(1)), - /// ...) - /// ``` - #[inline] - pub fn min(self, other: Self) -> Self { - common::f32x4_min(self, other) - } - /// Convert each lane to a signed integer. - #[inline] - pub fn to_i32(self) -> i32x4 { - unsafe {simd_cast(self)} - } - /// Convert each lane to an unsigned integer. - #[inline] - pub fn to_u32(self) -> u32x4 { - unsafe {simd_cast(self)} - } -} - -impl i16x8 { - /// Convert each lane to an unsigned integer. - #[inline] - pub fn to_u16(self) -> u16x8 { - unsafe {simd_cast(self)} - } -} -impl u16x8 { - /// Convert each lane to a signed integer. - #[inline] - pub fn to_i16(self) -> i16x8 { - unsafe {simd_cast(self)} - } -} - -impl i8x16 { - /// Convert each lane to an unsigned integer. - #[inline] - pub fn to_u8(self) -> u8x16 { - unsafe {simd_cast(self)} - } -} -impl u8x16 { - /// Convert each lane to a signed integer. - #[inline] - pub fn to_i8(self) -> i8x16 { - unsafe {simd_cast(self)} - } -} - - -macro_rules! neg_impls { - ($zero: expr, $($ty: ident,)*) => { - $(impl ops::Neg for $ty { - type Output = Self; - fn neg(self) -> Self { - $ty::splat($zero) - self - } - })* - } -} -neg_impls!{ - 0, - i32x4, - i16x8, - i8x16, -} -neg_impls! { - 0.0, - f32x4, -} -macro_rules! not_impls { - ($($ty: ident,)*) => { - $(impl ops::Not for $ty { - type Output = Self; - fn not(self) -> Self { - $ty::splat(!0) ^ self - } - })* - } -} -not_impls! { - i32x4, - i16x8, - i8x16, - u32x4, - u16x8, - u8x16, -} - -macro_rules! operators { - ($($trayt: ident ($func: ident, $method: ident): $($ty: ty),*;)*) => { - $( - $(impl ops::$trayt for $ty { - type Output = Self; - #[inline] - fn $method(self, x: Self) -> Self { - unsafe {$func(self, x)} - } - })* - )* - } -} -operators! { - Add (simd_add, add): - i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, - f32x4; - Sub (simd_sub, sub): - i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, - f32x4; - Mul (simd_mul, mul): - i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, - f32x4; - Div (simd_div, div): f32x4; - - BitAnd (simd_and, bitand): - i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, - bool8ix16, bool16ix8, bool32ix4, - bool32fx4; - BitOr (simd_or, bitor): - i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, - bool8ix16, bool16ix8, bool32ix4, - bool32fx4; - BitXor (simd_xor, bitxor): - i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, - bool8ix16, bool16ix8, bool32ix4, - bool32fx4; -} - -macro_rules! shift_one { - ($ty: ident, $($by: ident),*) => { - $( - impl ops::Shl<$by> for $ty { - type Output = Self; - #[inline] - fn shl(self, other: $by) -> Self { - unsafe { simd_shl(self, $ty::splat(other as <$ty as Simd>::Elem)) } - } - } - impl ops::Shr<$by> for $ty { - type Output = Self; - #[inline] - fn shr(self, other: $by) -> Self { - unsafe {simd_shr(self, $ty::splat(other as <$ty as Simd>::Elem))} - } - } - )* - } -} - -macro_rules! shift { - ($($ty: ident),*) => { - $(shift_one! { - $ty, - u8, u16, u32, u64, usize, - i8, i16, i32, i64, isize - })* - } -} -shift! { - i8x16, u8x16, i16x8, u16x8, i32x4, u32x4 -} diff --git a/third_party/rust/simd/src/lib.rs b/third_party/rust/simd/src/lib.rs deleted file mode 100644 index e8fb1b16f53b..000000000000 --- a/third_party/rust/simd/src/lib.rs +++ /dev/null @@ -1,804 +0,0 @@ -//! `simd` offers a basic interface to the SIMD functionality of CPUs. -#![no_std] - -#![feature(cfg_target_feature, repr_simd, platform_intrinsics, const_fn)] -#![allow(non_camel_case_types)] - -#[cfg(feature = "with-serde")] -extern crate serde; -#[cfg(feature = "with-serde")] -#[macro_use] -extern crate serde_derive; - -use core::mem; - -/// Boolean type for 8-bit integers. -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] -pub struct bool8i(i8); -/// Boolean type for 16-bit integers. -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] -pub struct bool16i(i16); -/// Boolean type for 32-bit integers. -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] -pub struct bool32i(i32); -/// Boolean type for 32-bit floats. -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] -pub struct bool32f(i32); - -macro_rules! bool { - ($($name: ident, $inner: ty;)*) => { - $( - impl From for $name { - #[inline] - fn from(b: bool) -> $name { - $name(-(b as $inner)) - } - } - impl From<$name> for bool { - #[inline] - fn from(b: $name) -> bool { - b.0 != 0 - } - } - )* - } -} -bool! { - bool8i, i8; - bool16i, i16; - bool32i, i32; - bool32f, i32; -} - -/// Types that are SIMD vectors. -pub unsafe trait Simd { - /// The corresponding boolean vector type. - type Bool: Simd; - /// The element that this vector stores. - type Elem; -} - -/// A SIMD vector of 4 `u32`s. -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct u32x4(u32, u32, u32, u32); -/// A SIMD vector of 4 `i32`s. -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct i32x4(i32, i32, i32, i32); -/// A SIMD vector of 4 `f32`s. -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct f32x4(f32, f32, f32, f32); -/// A SIMD boolean vector for length-4 vectors of 32-bit integers. -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct bool32ix4(i32, i32, i32, i32); -/// A SIMD boolean vector for length-4 vectors of 32-bit floats. -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct bool32fx4(i32, i32, i32, i32); - -#[allow(dead_code)] -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -struct u32x2(u32, u32); -#[allow(dead_code)] -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -struct i32x2(i32, i32); -#[allow(dead_code)] -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -struct f32x2(f32, f32); -#[allow(dead_code)] -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -struct bool32ix2(i32, i32); -#[allow(dead_code)] -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -struct bool32fx2(i32, i32); - -/// A SIMD vector of 8 `u16`s. -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct u16x8(u16, u16, u16, u16, - u16, u16, u16, u16); -/// A SIMD vector of 8 `i16`s. -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct i16x8(i16, i16, i16, i16, - i16, i16, i16, i16); -/// A SIMD boolean vector for length-8 vectors of 16-bit integers. -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct bool16ix8(i16, i16, i16, i16, - i16, i16, i16, i16); - -/// A SIMD vector of 16 `u8`s. -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct u8x16(u8, u8, u8, u8, u8, u8, u8, u8, - u8, u8, u8, u8, u8, u8, u8, u8); -/// A SIMD vector of 16 `i8`s. -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct i8x16(i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8); -/// A SIMD boolean vector for length-16 vectors of 8-bit integers. -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct bool8ix16(i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8); - - -macro_rules! simd { - ($($bool: ty: $($ty: ty = $elem: ty),*;)*) => { - $($(unsafe impl Simd for $ty { - type Bool = $bool; - type Elem = $elem; - } - impl Clone for $ty { #[inline] fn clone(&self) -> Self { *self } } - )*)*} -} -simd! { - bool8ix16: i8x16 = i8, u8x16 = u8, bool8ix16 = bool8i; - bool16ix8: i16x8 = i16, u16x8 = u16, bool16ix8 = bool16i; - bool32ix4: i32x4 = i32, u32x4 = u32, bool32ix4 = bool32i; - bool32fx4: f32x4 = f32, bool32fx4 = bool32f; - - bool32ix2: i32x2 = i32, u32x2 = u32, bool32ix2 = bool32i; - bool32fx2: f32x2 = f32, bool32fx2 = bool32f; -} - -#[allow(dead_code)] -#[inline] -fn bitcast(x: T) -> U { - assert_eq!(mem::size_of::(), - mem::size_of::()); - unsafe {mem::transmute_copy(&x)} -} - -#[allow(dead_code)] -extern "platform-intrinsic" { - fn simd_eq, U>(x: T, y: T) -> U; - fn simd_ne, U>(x: T, y: T) -> U; - fn simd_lt, U>(x: T, y: T) -> U; - fn simd_le, U>(x: T, y: T) -> U; - fn simd_gt, U>(x: T, y: T) -> U; - fn simd_ge, U>(x: T, y: T) -> U; - - fn simd_shuffle2>(x: T, y: T, idx: [u32; 2]) -> U; - fn simd_shuffle4>(x: T, y: T, idx: [u32; 4]) -> U; - fn simd_shuffle8>(x: T, y: T, idx: [u32; 8]) -> U; - fn simd_shuffle16>(x: T, y: T, idx: [u32; 16]) -> U; - - fn simd_insert, U>(x: T, idx: u32, val: U) -> T; - fn simd_extract, U>(x: T, idx: u32) -> U; - - fn simd_cast(x: T) -> U; - - fn simd_add(x: T, y: T) -> T; - fn simd_sub(x: T, y: T) -> T; - fn simd_mul(x: T, y: T) -> T; - fn simd_div(x: T, y: T) -> T; - fn simd_shl(x: T, y: T) -> T; - fn simd_shr(x: T, y: T) -> T; - fn simd_and(x: T, y: T) -> T; - fn simd_or(x: T, y: T) -> T; - fn simd_xor(x: T, y: T) -> T; -} -#[repr(packed)] -#[derive(Copy)] -struct Unalign(T); - -impl Clone for Unalign { - fn clone(&self) -> Unalign { - Unalign(unsafe { self.0.clone() }) - } -} - -#[macro_use] -mod common; -mod sixty_four; -mod v256; - -#[cfg(any(feature = "doc", - target_arch = "x86", - target_arch = "x86_64"))] -pub mod x86; -#[cfg(any(feature = "doc", target_arch = "arm"))] -pub mod arm; -#[cfg(any(feature = "doc", target_arch = "aarch64"))] -pub mod aarch64; - -#[cfg(test)] -mod tests { - - use super::u8x16; - use super::u16x8; - use super::u32x4; - use super::f32x4; - - #[test] - fn test_u8x16_none_not_any() { - let x1 = u8x16::splat(1); - let x2 = u8x16::splat(2); - assert!(!(x1.eq(x2)).any()); - } - - #[test] - fn test_u8x16_none_not_all() { - let x1 = u8x16::splat(1); - let x2 = u8x16::splat(2); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u8x16_all_any() { - let x1 = u8x16::splat(1); - let x2 = u8x16::splat(1); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u8x16_all_all() { - let x1 = u8x16::splat(1); - let x2 = u8x16::splat(1); - assert!((x1.eq(x2)).all()); - } - - #[test] - fn test_u8x16_except_last_any() { - let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1); - let x2 = u8x16::splat(2); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u8x16_except_last_not_all() { - let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1); - let x2 = u8x16::splat(2); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u8x16_except_first_any() { - let x1 = u8x16::new(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); - let x2 = u8x16::splat(2); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u8x16_except_first_not_all() { - let x1 = u8x16::new(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); - let x2 = u8x16::splat(2); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u8x16_only_last_any() { - let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1); - let x2 = u8x16::splat(1); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u8x16_only_last_not_all() { - let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1); - let x2 = u8x16::splat(1); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u8x16_only_first_any() { - let x1 = u8x16::new(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); - let x2 = u8x16::splat(1); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u8x16_only_first_not_all() { - let x1 = u8x16::new(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); - let x2 = u8x16::splat(1); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u8x16_except_thirteenth_any() { - let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2); - let x2 = u8x16::splat(2); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u8x16_except_thirteenth_not_all() { - let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2); - let x2 = u8x16::splat(2); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u8x16_except_fifth_any() { - let x1 = u8x16::new(2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); - let x2 = u8x16::splat(2); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u8x16_except_fifth_not_all() { - let x1 = u8x16::new(2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); - let x2 = u8x16::splat(2); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u8x16_only_thirteenth_any() { - let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2); - let x2 = u8x16::splat(1); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u8x16_only_thirteenth_not_all() { - let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2); - let x2 = u8x16::splat(1); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u8x16_only_fifth_any() { - let x1 = u8x16::new(2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); - let x2 = u8x16::splat(1); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u8x16_only_fifth_not_all() { - let x1 = u8x16::new(2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); - let x2 = u8x16::splat(1); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u16x8_none_not_any() { - let x1 = u16x8::splat(1); - let x2 = u16x8::splat(2); - assert!(!(x1.eq(x2)).any()); - } - - #[test] - fn test_u16x8_none_not_all() { - let x1 = u16x8::splat(1); - let x2 = u16x8::splat(2); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u16x8_all_any() { - let x1 = u16x8::splat(1); - let x2 = u16x8::splat(1); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u16x8_all_all() { - let x1 = u16x8::splat(1); - let x2 = u16x8::splat(1); - assert!((x1.eq(x2)).all()); - } - - #[test] - fn test_u16x8_except_last_any() { - let x1 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 1); - let x2 = u16x8::splat(2); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u16x8_except_last_not_all() { - let x1 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 1); - let x2 = u16x8::splat(2); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u16x8_except_first_any() { - let x1 = u16x8::new(1, 2, 2, 2, 2, 2, 2, 2); - let x2 = u16x8::splat(2); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u16x8_except_first_not_all() { - let x1 = u16x8::new(1, 2, 2, 2, 2, 2, 2, 2); - let x2 = u16x8::splat(2); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u16x8_only_last_any() { - let x1 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 1); - let x2 = u16x8::splat(1); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u16x8_only_last_not_all() { - let x1 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 1); - let x2 = u16x8::splat(1); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u16x8_only_first_any() { - let x1 = u16x8::new(1, 2, 2, 2, 2, 2, 2, 2); - let x2 = u16x8::splat(1); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u16x8_only_first_not_all() { - let x1 = u16x8::new(1, 2, 2, 2, 2, 2, 2, 2); - let x2 = u16x8::splat(1); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u16x8_except_sixth_any() { - let x1 = u16x8::new(2, 2, 2, 2, 2, 1, 2, 2); - let x2 = u16x8::splat(2); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u16x8_except_sixth_not_all() { - let x1 = u16x8::new(2, 2, 2, 2, 2, 1, 2, 2); - let x2 = u16x8::splat(2); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u16x8_except_third_any() { - let x1 = u16x8::new(2, 2, 1, 2, 2, 2, 2, 2); - let x2 = u16x8::splat(2); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u16x8_except_third_not_all() { - let x1 = u16x8::new(2, 2, 1, 2, 2, 2, 2, 2); - let x2 = u16x8::splat(2); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u16x8_only_sixth_any() { - let x1 = u16x8::new(2, 2, 2, 2, 2, 1, 2, 2); - let x2 = u16x8::splat(1); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u16x8_only_sixth_not_all() { - let x1 = u16x8::new(2, 2, 2, 2, 2, 1, 2, 2); - let x2 = u16x8::splat(1); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u16x8_only_third_any() { - let x1 = u16x8::new(2, 2, 1, 2, 2, 2, 2, 2); - let x2 = u16x8::splat(1); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u16x8_only_third_not_all() { - let x1 = u16x8::new(2, 2, 1, 2, 2, 2, 2, 2); - let x2 = u16x8::splat(1); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u32x4_none_not_any() { - let x1 = u32x4::splat(1); - let x2 = u32x4::splat(2); - assert!(!(x1.eq(x2)).any()); - } - - #[test] - fn test_u32x4_none_not_all() { - let x1 = u32x4::splat(1); - let x2 = u32x4::splat(2); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u32x4_all_any() { - let x1 = u32x4::splat(1); - let x2 = u32x4::splat(1); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u32x4_all_all() { - let x1 = u32x4::splat(1); - let x2 = u32x4::splat(1); - assert!((x1.eq(x2)).all()); - } - - #[test] - fn test_u32x4_except_last_any() { - let x1 = u32x4::new(2, 2, 2, 1); - let x2 = u32x4::splat(2); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u32x4_except_last_not_all() { - let x1 = u32x4::new(2, 2, 2, 1); - let x2 = u32x4::splat(2); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u32x4_except_first_any() { - let x1 = u32x4::new(1, 2, 2, 2); - let x2 = u32x4::splat(2); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u32x4_except_first_not_all() { - let x1 = u32x4::new(1, 2, 2, 2); - let x2 = u32x4::splat(2); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u32x4_only_last_any() { - let x1 = u32x4::new(2, 2, 2, 1); - let x2 = u32x4::splat(1); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u32x4_only_last_not_all() { - let x1 = u32x4::new(2, 2, 2, 1); - let x2 = u32x4::splat(1); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u32x4_only_first_any() { - let x1 = u32x4::new(1, 2, 2, 2); - let x2 = u32x4::splat(1); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u32x4_only_first_not_all() { - let x1 = u32x4::new(1, 2, 2, 2); - let x2 = u32x4::splat(1); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u32x4_except_second_any() { - let x1 = u32x4::new(1, 2, 2, 2); - let x2 = u32x4::splat(2); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u32x4_except_second_not_all() { - let x1 = u32x4::new(1, 2, 2, 2); - let x2 = u32x4::splat(2); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u32x4_except_third_any() { - let x1 = u32x4::new(2, 2, 1, 2); - let x2 = u32x4::splat(2); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u32x4_except_third_not_all() { - let x1 = u32x4::new(2, 2, 1, 2); - let x2 = u32x4::splat(2); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u32x4_only_second_any() { - let x1 = u32x4::new(1, 2, 2, 2); - let x2 = u32x4::splat(1); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u32x4_only_second_not_all() { - let x1 = u32x4::new(1, 2, 2, 2); - let x2 = u32x4::splat(1); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_u32x4_only_third_any() { - let x1 = u32x4::new(2, 2, 1, 2); - let x2 = u32x4::splat(1); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_u32x4_only_third_not_all() { - let x1 = u32x4::new(2, 2, 1, 2); - let x2 = u32x4::splat(1); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_f32x4_none_not_any() { - let x1 = f32x4::splat(1.0); - let x2 = f32x4::splat(2.0); - assert!(!(x1.eq(x2)).any()); - } - - #[test] - fn test_f32x4_none_not_all() { - let x1 = f32x4::splat(1.0); - let x2 = f32x4::splat(2.0); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_f32x4_all_any() { - let x1 = f32x4::splat(1.0); - let x2 = f32x4::splat(1.0); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_f32x4_all_all() { - let x1 = f32x4::splat(1.0); - let x2 = f32x4::splat(1.0); - assert!((x1.eq(x2)).all()); - } - - #[test] - fn test_f32x4_except_last_any() { - let x1 = f32x4::new(2.0, 2.0, 2.0, 1.0); - let x2 = f32x4::splat(2.0); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_f32x4_except_last_not_all() { - let x1 = f32x4::new(2.0, 2.0, 2.0, 1.0); - let x2 = f32x4::splat(2.0); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_f32x4_except_first_any() { - let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); - let x2 = f32x4::splat(2.0); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_f32x4_except_first_not_all() { - let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); - let x2 = f32x4::splat(2.0); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_f32x4_only_last_any() { - let x1 = f32x4::new(2.0, 2.0, 2.0, 1.0); - let x2 = f32x4::splat(1.0); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_f32x4_only_last_not_all() { - let x1 = f32x4::new(2.0, 2.0, 2.0, 1.0); - let x2 = f32x4::splat(1.0); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_f32x4_only_first_any() { - let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); - let x2 = f32x4::splat(1.0); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_f32x4_only_first_not_all() { - let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); - let x2 = f32x4::splat(1.0); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_f32x4_except_second_any() { - let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); - let x2 = f32x4::splat(2.0); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_f32x4_except_second_not_all() { - let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); - let x2 = f32x4::splat(2.0); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_f32x4_except_third_any() { - let x1 = f32x4::new(2.0, 2.0, 1.0, 2.0); - let x2 = f32x4::splat(2.0); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_f32x4_except_third_not_all() { - let x1 = f32x4::new(2.0, 2.0, 1.0, 2.0); - let x2 = f32x4::splat(2.0); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_f32x4_only_second_any() { - let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); - let x2 = f32x4::splat(1.0); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_f32x4_only_second_not_all() { - let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); - let x2 = f32x4::splat(1.0); - assert!(!(x1.eq(x2)).all()); - } - - #[test] - fn test_f32x4_only_third_any() { - let x1 = f32x4::new(2.0, 2.0, 1.0, 2.0); - let x2 = f32x4::splat(1.0); - assert!((x1.eq(x2)).any()); - } - - #[test] - fn test_f32x4_only_third_not_all() { - let x1 = f32x4::new(2.0, 2.0, 1.0, 2.0); - let x2 = f32x4::splat(1.0); - assert!(!(x1.eq(x2)).all()); - } - -} diff --git a/third_party/rust/simd/src/sixty_four.rs b/third_party/rust/simd/src/sixty_four.rs deleted file mode 100644 index a87f44a77ee7..000000000000 --- a/third_party/rust/simd/src/sixty_four.rs +++ /dev/null @@ -1,228 +0,0 @@ -#![allow(dead_code)] -use super::*; -#[allow(unused_imports)] -use super::{ - f32x2, - simd_eq, simd_ne, simd_lt, simd_le, simd_gt, simd_ge, - simd_shuffle2, simd_shuffle4, simd_shuffle8, simd_shuffle16, - simd_insert, simd_extract, - simd_cast, - simd_add, simd_sub, simd_mul, simd_div, simd_shl, simd_shr, simd_and, simd_or, simd_xor, - - Unalign, bitcast, -}; -use core::{mem,ops}; - -/// Boolean type for 64-bit integers. -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy, Clone)] -pub struct bool64i(i64); -/// Boolean type for 64-bit floats. -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy, Clone)] -pub struct bool64f(i64); -/// A SIMD vector of 2 `u64`s. -#[repr(simd)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct u64x2(u64, u64); -/// A SIMD vector of 2 `i64`s. -#[repr(simd)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct i64x2(i64, i64); -/// A SIMD vector of 2 `f64`s. -#[repr(simd)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct f64x2(f64, f64); -/// A SIMD boolean vector for length-2 vectors of 64-bit integers. -#[repr(simd)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct bool64ix2(i64, i64); -/// A SIMD boolean vector for length-2 vectors of 64-bit floats. -#[repr(simd)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct bool64fx2(i64, i64); - -simd! { - bool64ix2: i64x2 = i64, u64x2 = u64, bool64ix2 = bool64i; - bool64fx2: f64x2 = f64, bool64fx2 = bool64f; -} -basic_impls! { - u64x2: u64, bool64ix2, simd_shuffle2, 2, x0 | x1; - i64x2: i64, bool64ix2, simd_shuffle2, 2, x0 | x1; - f64x2: f64, bool64fx2, simd_shuffle2, 2, x0 | x1; -} - -mod common { - use super::*; - // naive for now - #[inline] - pub fn bool64ix2_all(x: bool64ix2) -> bool { - x.0 != 0 && x.1 != 0 - } - #[inline] - pub fn bool64ix2_any(x: bool64ix2) -> bool { - x.0 != 0 || x.1 != 0 - } - #[inline] - pub fn bool64fx2_all(x: bool64fx2) -> bool { - x.0 != 0 && x.1 != 0 - } - #[inline] - pub fn bool64fx2_any(x: bool64fx2) -> bool { - x.0 != 0 || x.1 != 0 - }} -bool_impls! { - bool64ix2: bool64i, i64x2, i64, 2, bool64ix2_all, bool64ix2_any, x0 | x1 - [/// Convert `self` to a boolean vector for interacting with floating point vectors. - to_f -> bool64fx2]; - - bool64fx2: bool64f, i64x2, i64, 2, bool64fx2_all, bool64fx2_any, x0 | x1 - [/// Convert `self` to a boolean vector for interacting with integer vectors. - to_i -> bool64ix2]; -} - -impl u64x2 { - /// Convert each lane to a signed integer. - #[inline] - pub fn to_i64(self) -> i64x2 { - unsafe {simd_cast(self)} - } - /// Convert each lane to a 64-bit float. - #[inline] - pub fn to_f64(self) -> f64x2 { - unsafe {simd_cast(self)} - } -} -impl i64x2 { - /// Convert each lane to an unsigned integer. - #[inline] - pub fn to_u64(self) -> u64x2 { - unsafe {simd_cast(self)} - } - /// Convert each lane to a 64-bit float. - #[inline] - pub fn to_f64(self) -> f64x2 { - unsafe {simd_cast(self)} - } -} -impl f64x2 { - /// Convert each lane to a signed integer. - #[inline] - pub fn to_i64(self) -> i64x2 { - unsafe {simd_cast(self)} - } - /// Convert each lane to an unsigned integer. - #[inline] - pub fn to_u64(self) -> u64x2 { - unsafe {simd_cast(self)} - } - - /// Convert each lane to a 32-bit float. - #[inline] - pub fn to_f32(self) -> f32x4 { - unsafe { - let x: f32x2 = simd_cast(self); - f32x4::new(x.0, x.1, 0.0, 0.0) - } - } -} - -neg_impls!{ - 0, - i64x2, -} -neg_impls! { - 0.0, - f64x2, -} -macro_rules! not_impls { - ($($ty: ident,)*) => { - $(impl ops::Not for $ty { - type Output = Self; - fn not(self) -> Self { - $ty::splat(!0) ^ self - } - })* - } -} -not_impls! { - i64x2, - u64x2, -} - -macro_rules! operators { - ($($trayt: ident ($func: ident, $method: ident): $($ty: ty),*;)*) => { - $( - $(impl ops::$trayt for $ty { - type Output = Self; - #[inline] - fn $method(self, x: Self) -> Self { - unsafe {$func(self, x)} - } - })* - )* - } -} -operators! { - Add (simd_add, add): - i64x2, u64x2, - f64x2; - Sub (simd_sub, sub): - i64x2, u64x2, - f64x2; - Mul (simd_mul, mul): - i64x2, u64x2, - f64x2; - Div (simd_div, div): f64x2; - - BitAnd (simd_and, bitand): - i64x2, u64x2, - bool64ix2, - bool64fx2; - BitOr (simd_or, bitor): - i64x2, u64x2, - bool64ix2, - bool64fx2; - BitXor (simd_xor, bitxor): - i64x2, u64x2, - bool64ix2, - bool64fx2; -} - -macro_rules! shift_one { ($ty: ident, $($by: ident),*) => { - $( - impl ops::Shl<$by> for $ty { - type Output = Self; - #[inline] - fn shl(self, other: $by) -> Self { - unsafe { simd_shl(self, $ty::splat(other as <$ty as Simd>::Elem)) } - } - } - impl ops::Shr<$by> for $ty { - type Output = Self; - #[inline] - fn shr(self, other: $by) -> Self { - unsafe {simd_shr(self, $ty::splat(other as <$ty as Simd>::Elem))} - } - } - )* - } -} - -macro_rules! shift { - ($($ty: ident),*) => { - $(shift_one! { - $ty, - u8, u16, u32, u64, usize, - i8, i16, i32, i64, isize - })* - } -} -shift! { - i64x2, u64x2 -} diff --git a/third_party/rust/simd/src/v256.rs b/third_party/rust/simd/src/v256.rs deleted file mode 100644 index 519eb14e7259..000000000000 --- a/third_party/rust/simd/src/v256.rs +++ /dev/null @@ -1,436 +0,0 @@ -#![allow(dead_code)] -use core::{mem,ops}; -#[allow(unused_imports)] -use super::{ - Simd, - u32x4, i32x4, u16x8, i16x8, u8x16, i8x16, f32x4, - bool32ix4, bool16ix8, bool8ix16, bool32fx4, - simd_eq, simd_ne, simd_lt, simd_le, simd_gt, simd_ge, - simd_shuffle2, simd_shuffle4, simd_shuffle8, simd_shuffle16, - simd_insert, simd_extract, - simd_cast, - simd_add, simd_sub, simd_mul, simd_div, simd_shl, simd_shr, simd_and, simd_or, simd_xor, - bool8i, bool16i, bool32i, bool32f, - Unalign, bitcast, -}; -use super::sixty_four::*; -#[cfg(all(target_feature = "avx"))] -use super::x86::avx::common; - -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct u64x4(u64, u64, u64, u64); -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct i64x4(i64, i64, i64, i64); -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct f64x4(f64, f64, f64, f64); -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct bool64ix4(i64, i64, i64, i64); -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct bool64fx4(i64, i64, i64, i64); - -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct u32x8(u32, u32, u32, u32, - u32, u32, u32, u32); -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct i32x8(i32, i32, i32, i32, - i32, i32, i32, i32); -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct f32x8(f32, f32, f32, f32, - f32, f32, f32, f32); -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct bool32ix8(i32, i32, i32, i32, - i32, i32, i32, i32);#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct bool32fx8(i32, i32, i32, i32, - i32, i32, i32, i32); - -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct u16x16(u16, u16, u16, u16, u16, u16, u16, u16, - u16, u16, u16, u16, u16, u16, u16, u16); -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct i16x16(i16, i16, i16, i16, i16, i16, i16, i16, - i16, i16, i16, i16, i16, i16, i16, i16); -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct bool16ix16(i16, i16, i16, i16, i16, i16, i16, i16, - i16, i16, i16, i16, i16, i16, i16, i16); - -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct u8x32(u8, u8, u8, u8, u8, u8, u8, u8, - u8, u8, u8, u8, u8, u8, u8, u8, - u8, u8, u8, u8, u8, u8, u8, u8, - u8, u8, u8, u8, u8, u8, u8, u8); -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct i8x32(i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8); -#[repr(simd)] -#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] -#[derive(Debug, Copy)] -pub struct bool8ix32(i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8); - -simd! { - bool8ix32: i8x32 = i8, u8x32 = u8, bool8ix32 = bool8i; - bool16ix16: i16x16 = i16, u16x16 = u16, bool16ix16 = bool16i; - bool32ix8: i32x8 = i32, u32x8 = u32, bool32ix8 = bool32i; - bool64ix4: i64x4 = i64, u64x4 = u64, bool64ix4 = bool64i; - - bool32fx8: f32x8 = f32, bool32fx8 = bool32f; - bool64fx4: f64x4 = f64, bool64fx4 = bool64f; -} - -basic_impls! { - u64x4: u64, bool64ix4, simd_shuffle4, 4, x0, x1 | x2, x3; - i64x4: i64, bool64ix4, simd_shuffle4, 4, x0, x1 | x2, x3; - f64x4: f64, bool64fx4, simd_shuffle4, 4, x0, x1 | x2, x3; - - u32x8: u32, bool32ix8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7; - i32x8: i32, bool32ix8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7; - f32x8: f32, bool32fx8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7; - - u16x16: u16, bool16ix16, simd_shuffle16, 16, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15; - i16x16: i16, bool16ix16, simd_shuffle16, 16, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15; - - u8x32: u8, bool8ix32, simd_shuffle32, 32, x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | - x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31; - i8x32: i8, bool8ix32, simd_shuffle32, 32, x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | - x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31; -} - -#[cfg(all(not(target_feature = "avx")))] -#[doc(hidden)] -mod common { - use super::*; - // implementation via SSE vectors - macro_rules! bools { - ($($ty: ty, $all: ident, $any: ident;)*) => { - $( - #[inline] - pub fn $all(x: $ty) -> bool { - x.low().all() && x.high().all() - } - #[inline] - pub fn $any(x: $ty) -> bool { - x.low().any() || x.high().any() - } - )* - } - } - - bools! { - bool64ix4, bool64ix4_all, bool64ix4_any; - bool64fx4, bool64fx4_all, bool64fx4_any; - bool32ix8, bool32ix8_all, bool32ix8_any; - bool32fx8, bool32fx8_all, bool32fx8_any; - bool16ix16, bool16ix16_all, bool16ix16_any; - bool8ix32, bool8ix32_all, bool8ix32_any; - } - -} - -bool_impls! { - bool64ix4: bool64i, i64x4, i64, 4, bool64ix4_all, bool64ix4_any, x0, x1 | x2, x3 - [/// Convert `self` to a boolean vector for interacting with floating point vectors. - to_f -> bool64fx4]; - - bool64fx4: bool64f, i64x4, i64, 4, bool64fx4_all, bool64fx4_any, x0, x1 | x2, x3 - [/// Convert `self` to a boolean vector for interacting with integer vectors. - to_i -> bool64ix4]; - - bool32ix8: bool32i, i32x8, i32, 8, bool32ix8_all, bool32ix8_any, x0, x1, x2, x3 | x4, x5, x6, x7 - [/// Convert `self` to a boolean vector for interacting with floating point vectors. - to_f -> bool32fx8]; - - bool32fx8: bool32f, i32x8, i32, 8, bool32fx8_all, bool32fx8_any, x0, x1, x2, x3 | x4, x5, x6, x7 - [/// Convert `self` to a boolean vector for interacting with integer vectors. - to_i -> bool32ix8]; - - bool16ix16: bool16i, i16x16, i16, 16, bool16ix16_all, bool16ix16_any, - x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15 []; - - bool8ix32: bool8i, i8x32, i8, 32, bool8ix32_all, bool8ix32_any, - x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | - x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 []; -} - -pub trait LowHigh128 { - type Half: Simd; - /// Extract the low 128 bit part. - fn low(self) -> Self::Half; - /// Extract the high 128 bit part. - fn high(self) -> Self::Half; -} - -macro_rules! expr { ($x:expr) => ($x) } // HACK -macro_rules! low_high_impls { - ($( - $name: ident, $half: ident, $($first: tt),+ ... $($last: tt),+; - )*) => { - $(impl LowHigh128 for $name { - type Half = $half; - #[inline] - fn low(self) -> Self::Half { - $half::new($( expr!(self.$first), )*) - } - - #[inline] - fn high(self) -> Self::Half { - $half::new($( expr!(self.$last), )*) - } - })* - } -} - -low_high_impls! { - u64x4, u64x2, 0, 1 ... 2, 3; - i64x4, i64x2, 0, 1 ... 2, 3; - f64x4, f64x2, 0, 1 ... 2, 3; - - u32x8, u32x4, 0, 1, 2, 3 ... 4, 5, 6, 7; - i32x8, i32x4, 0, 1, 2, 3 ... 4, 5, 6, 7; - f32x8, f32x4, 0, 1, 2, 3 ... 4, 5, 6, 7; - - u16x16, u16x8, 0, 1, 2, 3, 4, 5, 6, 7 ... 8, 9, 10, 11, 12, 13, 14, 15; - i16x16, i16x8, 0, 1, 2, 3, 4, 5, 6, 7 ... 8, 9, 10, 11, 12, 13, 14, 15; - - u8x32, u8x16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ... - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31; - i8x32, i8x16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ... - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31; - -} - -macro_rules! bool_low_high_impls { - ($( - $name: ident: $half: ident; - )*) => { - $(impl LowHigh128 for $name { - type Half = $half; - /// Extract the low 128 bit part. - #[inline] - fn low(self) -> Self::Half { - Self::Half::from_repr(self.to_repr().low()) - } - - /// Extract the high 128 bit part. - #[inline] - fn high(self) -> Self::Half { - Self::Half::from_repr(self.to_repr().high()) - } - })* - } -} - -bool_low_high_impls! { - bool64fx4: bool64fx2; - bool32fx8: bool32fx4; - - bool64ix4: bool64ix2; - bool32ix8: bool32ix4; - bool16ix16: bool16ix8; - bool8ix32: bool8ix16; -} - -impl u64x4 { - /// Convert each lane to a signed integer. - #[inline] - pub fn to_i64(self) -> i64x4 { - unsafe {simd_cast(self)} - } - /// Convert each lane to a 64-bit float. - #[inline] - pub fn to_f64(self) -> f64x4 { - unsafe {simd_cast(self)} - } -} - -impl i64x4 { - /// Convert each lane to an unsigned integer. - #[inline] - pub fn to_u64(self) -> u64x4 { - unsafe {simd_cast(self)} - } - /// Convert each lane to a 64-bit float. - #[inline] - pub fn to_f64(self) -> f64x4 { - unsafe {simd_cast(self)} - } -} - -impl f64x4 { - /// Convert each lane to a signed integer. - #[inline] - pub fn to_i64(self) -> i64x4 { - unsafe {simd_cast(self)} - } - /// Convert each lane to an unsigned integer. - #[inline] - pub fn to_u64(self) -> u64x4 { - unsafe {simd_cast(self)} - } -} - -impl u32x8 { - /// Convert each lane to a signed integer. - #[inline] - pub fn to_i32(self) -> i32x8 { - unsafe {simd_cast(self)} - } - /// Convert each lane to a 32-bit float. - #[inline] - pub fn to_f32(self) -> f32x8 { - unsafe {simd_cast(self)} - } -} - -impl i32x8 { - /// Convert each lane to an unsigned integer. - #[inline] - pub fn to_u32(self) -> u32x8 { - unsafe {simd_cast(self)} - } - /// Convert each lane to a 32-bit float. - #[inline] - pub fn to_f32(self) -> f32x8 { - unsafe {simd_cast(self)} - } -} - -impl f32x8 { - /// Convert each lane to a signed integer. - #[inline] - pub fn to_i32(self) -> i32x8 { - unsafe {simd_cast(self)} - } - /// Convert each lane to an unsigned integer. - #[inline] - pub fn to_u32(self) -> u32x8 { - unsafe {simd_cast(self)} - } -} - -impl i16x16 { - /// Convert each lane to an unsigned integer. - #[inline] - pub fn to_u16(self) -> u16x16 { - unsafe {simd_cast(self)} - } -} - -impl u16x16 { - /// Convert each lane to a signed integer. - #[inline] - pub fn to_i16(self) -> i16x16 { - unsafe {simd_cast(self)} - } -} - -impl i8x32 { - /// Convert each lane to an unsigned integer. - #[inline] - pub fn to_u8(self) -> u8x32 { - unsafe {simd_cast(self)} - } -} - -impl u8x32 { - /// Convert each lane to a signed integer. - #[inline] - pub fn to_i8(self) -> i8x32 { - unsafe {simd_cast(self)} - } -} - -operators! { - Add (simd_add, add): - i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4, - f64x4, f32x8; - Sub (simd_sub, sub): - i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4, - f64x4, f32x8; - Mul (simd_mul, mul): - i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4, - f64x4, f32x8; - Div (simd_div, div): f64x4, f32x8; - - BitAnd (simd_and, bitand): - i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4, - bool64ix4, bool32ix8, bool16ix16, - bool64fx4, bool32fx8; - BitOr (simd_or, bitor): - i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4, - bool64ix4, bool32ix8, bool16ix16, - bool64fx4, bool32fx8; - BitXor (simd_xor, bitxor): - i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4, - bool64ix4, bool32ix8, bool16ix16, - bool64fx4, bool32fx8; -} - -neg_impls!{ - 0, - i64x4, - i32x8, - i16x16, - i8x32, -} - -neg_impls! { - 0.0, - f64x4, - f32x8, -} - -not_impls! { - i64x4, - u64x4, - i32x8, - u32x8, - i16x16, - u16x16, - i8x32, - u8x32, -} - -shift! { - i64x4, - u64x4, - i32x8, - u32x8, - i16x16, - u16x16, - i8x32, - u8x32 -} diff --git a/third_party/rust/simd/src/x86/avx.rs b/third_party/rust/simd/src/x86/avx.rs deleted file mode 100644 index 180247e36561..000000000000 --- a/third_party/rust/simd/src/x86/avx.rs +++ /dev/null @@ -1,290 +0,0 @@ -use super::super::*; -use sixty_four::*; - -use super::super::bitcast; - -pub use v256::{ - f64x4, bool64fx4, u64x4, i64x4, bool64ix4, - f32x8, bool32fx8, u32x8, i32x8, bool32ix8, - u16x16, i16x16, bool16ix16, - u8x32, i8x32, bool8ix32, - LowHigh128 -}; - -#[allow(dead_code)] -extern "platform-intrinsic" { - fn x86_mm256_addsub_ps(x: f32x8, y: f32x8) -> f32x8; - fn x86_mm256_addsub_pd(x: f64x4, y: f64x4) -> f64x4; - fn x86_mm256_dp_ps(x: f32x8, y: f32x8, z: i32) -> f32x8; - fn x86_mm256_hadd_ps(x: f32x8, y: f32x8) -> f32x8; - fn x86_mm256_hadd_pd(x: f64x4, y: f64x4) -> f64x4; - fn x86_mm256_hsub_ps(x: f32x8, y: f32x8) -> f32x8; - fn x86_mm256_hsub_pd(x: f64x4, y: f64x4) -> f64x4; - fn x86_mm256_max_ps(x: f32x8, y: f32x8) -> f32x8; - fn x86_mm256_max_pd(x: f64x4, y: f64x4) -> f64x4; - fn x86_mm256_min_ps(x: f32x8, y: f32x8) -> f32x8; - fn x86_mm256_min_pd(x: f64x4, y: f64x4) -> f64x4; - fn x86_mm256_movemask_ps(x: f32x8) -> i32; - fn x86_mm256_movemask_pd(x: f64x4) -> i32; - fn x86_mm_permutevar_ps(x: f32x4, y: i32x4) -> f32x4; - fn x86_mm_permutevar_pd(x: f64x2, y: i64x2) -> f64x2; - fn x86_mm256_permutevar_ps(x: f32x8, y: i32x8) -> f32x8; - fn x86_mm256_permutevar_pd(x: f64x4, y: i64x4) -> f64x4; - fn x86_mm256_rcp_ps(x: f32x8) -> f32x8; - fn x86_mm256_rsqrt_ps(x: f32x8) -> f32x8; - fn x86_mm256_sqrt_ps(x: f32x8) -> f32x8; - fn x86_mm256_sqrt_pd(x: f64x4) -> f64x4; - fn x86_mm_testc_ps(x: f32x4, y: f32x4) -> i32; - fn x86_mm256_testc_ps(x: f32x8, y: f32x8) -> i32; - fn x86_mm_testc_pd(x: f64x2, y: f64x2) -> i32; - fn x86_mm256_testc_pd(x: f64x4, y: f64x4) -> i32; - fn x86_mm256_testc_si256(x: u64x4, y: u64x4) -> i32; - fn x86_mm_testnzc_ps(x: f32x4, y: f32x4) -> i32; - fn x86_mm256_testnzc_ps(x: f32x8, y: f32x8) -> i32; - fn x86_mm_testnzc_pd(x: f64x2, y: f64x2) -> i32; - fn x86_mm256_testnzc_pd(x: f64x4, y: f64x4) -> i32; - fn x86_mm256_testnzc_si256(x: u64x4, y: u64x4) -> i32; - fn x86_mm_testz_ps(x: f32x4, y: f32x4) -> i32; - fn x86_mm256_testz_ps(x: f32x8, y: f32x8) -> i32; - fn x86_mm_testz_pd(x: f64x2, y: f64x2) -> i32; - fn x86_mm256_testz_pd(x: f64x4, y: f64x4) -> i32; - fn x86_mm256_testz_si256(x: u64x4, y: u64x4) -> i32; -} - -#[doc(hidden)] -pub mod common { - use super::*; - use core::mem; - - macro_rules! bools { - ($($ty: ty, $all: ident, $any: ident, $testc: ident, $testz: ident;)*) => { - $( - #[inline] - pub fn $all(x: $ty) -> bool { - unsafe { - super::$testc(mem::transmute(x), mem::transmute(<$ty>::splat(true))) != 0 - } - } - #[inline] - pub fn $any(x: $ty) -> bool { - unsafe { - super::$testz(mem::transmute(x), mem::transmute(x)) == 0 - } - } - )* - } - } - - bools! { - bool32fx8, bool32fx8_all, bool32fx8_any, x86_mm256_testc_ps, x86_mm256_testz_ps; - bool64fx4, bool64fx4_all, bool64fx4_any, x86_mm256_testc_pd, x86_mm256_testz_pd; - bool8ix32, bool8ix32_all, bool8ix32_any, x86_mm256_testc_si256, x86_mm256_testz_si256; - bool16ix16, bool16ix16_all, bool16ix16_any, x86_mm256_testc_si256, x86_mm256_testz_si256; - bool32ix8, bool32ix8_all, bool32ix8_any, x86_mm256_testc_si256, x86_mm256_testz_si256; - bool64ix4, bool64ix4_all, bool64ix4_any, x86_mm256_testc_si256, x86_mm256_testz_si256; - } -} - -// 128-bit vectors: - -// 32 bit floats - -pub trait AvxF32x4 { - fn permutevar(self, other: i32x4) -> f32x4; -} -impl AvxF32x4 for f32x4 { - fn permutevar(self, other: i32x4) -> f32x4 { - unsafe { x86_mm_permutevar_ps(self, other) } - } -} - -pub trait AvxF64x4 { - fn sqrt(self) -> Self; - fn addsub(self, other: Self) -> Self; - fn hadd(self, other: Self) -> Self; - fn hsub(self, other: Self) -> Self; - fn max(self, other: Self) -> Self; - fn min(self, other: Self) -> Self; - fn move_mask(self) -> u32; -} - -impl AvxF64x4 for f64x4 { - #[inline] - fn sqrt(self) -> Self { - unsafe { x86_mm256_sqrt_pd(self) } - } - - #[inline] - fn addsub(self, other: Self) -> Self { - unsafe { x86_mm256_addsub_pd(self, other) } - } - - #[inline] - fn hadd(self, other: Self) -> Self { - unsafe { x86_mm256_hadd_pd(self, other) } - } - - #[inline] - fn hsub(self, other: Self) -> Self { - unsafe { x86_mm256_hsub_pd(self, other) } - } - - #[inline] - fn max(self, other: Self) -> Self { - unsafe { x86_mm256_max_pd(self, other) } - } - - #[inline] - fn min(self, other: Self) -> Self { - unsafe { x86_mm256_min_pd(self, other) } - } - - #[inline] - fn move_mask(self) -> u32 { - unsafe { x86_mm256_movemask_pd(self) as u32 } - } -} - -pub trait AvxBool64fx4 { - fn move_mask(self) -> u32; -} -impl AvxBool64fx4 for bool64fx4 { - #[inline] - fn move_mask(self) -> u32 { - unsafe { x86_mm256_movemask_pd(bitcast(self)) as u32 } - } -} - -pub trait AvxF32x8 { - fn sqrt(self) -> Self; - fn addsub(self, other: Self) -> Self; - fn hadd(self, other: Self) -> Self; - fn hsub(self, other: Self) -> Self; - fn max(self, other: Self) -> Self; - fn min(self, other: Self) -> Self; - fn move_mask(self) -> u32; - /// Compute an approximation to the reciprocal of the square root - /// of `self`, that is, `f32x8::splat(1.0) / self.sqrt()`. - /// - /// The accuracy of this approximation is platform dependent. - fn approx_rsqrt(self) -> Self; - /// Compute an approximation to the reciprocal of `self`, that is, - /// `f32x8::splat(1.0) / self`. - /// - /// The accuracy of this approximation is platform dependent. - fn approx_reciprocal(self) -> Self; -} - -impl AvxF32x8 for f32x8 { - #[inline] - fn sqrt(self) -> Self { - unsafe { x86_mm256_sqrt_ps(self) } - } - - #[inline] - fn addsub(self, other: Self) -> Self { - unsafe { x86_mm256_addsub_ps(self, other) } - } - - #[inline] - fn hadd(self, other: Self) -> Self { - unsafe { x86_mm256_hadd_ps(self, other) } - } - - #[inline] - fn hsub(self, other: Self) -> Self { - unsafe { x86_mm256_hsub_ps(self, other) } - } - - #[inline] - fn max(self, other: Self) -> Self { - unsafe { x86_mm256_max_ps(self, other) } - } - - #[inline] - fn min(self, other: Self) -> Self { - unsafe { x86_mm256_min_ps(self, other) } - } - - #[inline] - fn move_mask(self) -> u32 { - unsafe { x86_mm256_movemask_ps(self) as u32 } - } - - #[inline] - fn approx_reciprocal(self) -> Self { - unsafe { x86_mm256_rcp_ps(self) } - } - - #[inline] - fn approx_rsqrt(self) -> Self { - unsafe { x86_mm256_rsqrt_ps(self) } - } -} - -pub trait AvxBool32fx8 { - fn move_mask(self) -> u32; -} -impl AvxBool32fx8 for bool32fx8 { - #[inline] - fn move_mask(self) -> u32 { - unsafe { x86_mm256_movemask_ps(bitcast(self)) as u32 } - } -} - -pub trait AvxBool32fx4 {} -impl AvxBool32fx4 for bool32fx4 {} - -// 64 bit floats - -pub trait AvxF64x2 { - fn permutevar(self, other: i64x2) -> f64x2; -} -impl AvxF64x2 for f64x2 { - fn permutevar(self, other: i64x2) -> f64x2 { - unsafe { x86_mm_permutevar_pd(self, other) } - } -} - -pub trait AvxBool64fx2 {} -impl AvxBool64fx2 for bool64fx2 {} - -// 64 bit integers - -pub trait AvxU64x2 {} -impl AvxU64x2 for u64x2 {} -pub trait AvxI64x2 {} -impl AvxI64x2 for i64x2 {} - -pub trait AvxBool64ix2 {} -impl AvxBool64ix2 for bool64ix2 {} - -// 32 bit integers - -pub trait AvxU32x4 {} -impl AvxU32x4 for u32x4 {} -pub trait AvxI32x4 {} -impl AvxI32x4 for i32x4 {} - -pub trait AvxBool32ix4 {} -impl AvxBool32ix4 for bool32ix4 {} - -// 16 bit integers - -pub trait AvxU16x8 {} -impl AvxU16x8 for u16x8 {} -pub trait AvxI16x8 {} -impl AvxI16x8 for i16x8 {} - -pub trait AvxBool16ix8 {} -impl AvxBool16ix8 for bool16ix8 {} - -// 8 bit integers - -pub trait AvxU8x16 {} -impl AvxU8x16 for u8x16 {} -pub trait AvxI8x16 {} -impl AvxI8x16 for i8x16 {} - -pub trait AvxBool8ix16 {} -impl AvxBool8ix16 for bool8ix16 {} diff --git a/third_party/rust/simd/src/x86/avx2.rs b/third_party/rust/simd/src/x86/avx2.rs deleted file mode 100644 index e86a33d3b5bb..000000000000 --- a/third_party/rust/simd/src/x86/avx2.rs +++ /dev/null @@ -1,65 +0,0 @@ -use x86::avx::*; - -#[allow(dead_code)] -extern "platform-intrinsic" { - fn x86_mm256_abs_epi8(x: i8x32) -> i8x32; - fn x86_mm256_abs_epi16(x: i16x16) -> i16x16; - fn x86_mm256_abs_epi32(x: i32x8) -> i32x8; - fn x86_mm256_adds_epi8(x: i8x32, y: i8x32) -> i8x32; - fn x86_mm256_adds_epu8(x: u8x32, y: u8x32) -> u8x32; - fn x86_mm256_adds_epi16(x: i16x16, y: i16x16) -> i16x16; - fn x86_mm256_adds_epu16(x: u16x16, y: u16x16) -> u16x16; - fn x86_mm256_avg_epu8(x: u8x32, y: u8x32) -> u8x32; - fn x86_mm256_avg_epu16(x: u16x16, y: u16x16) -> u16x16; - fn x86_mm256_hadd_epi16(x: i16x16, y: i16x16) -> i16x16; - fn x86_mm256_hadd_epi32(x: i32x8, y: i32x8) -> i32x8; - fn x86_mm256_hadds_epi16(x: i16x16, y: i16x16) -> i16x16; - fn x86_mm256_hsub_epi16(x: i16x16, y: i16x16) -> i16x16; - fn x86_mm256_hsub_epi32(x: i32x8, y: i32x8) -> i32x8; - fn x86_mm256_hsubs_epi16(x: i16x16, y: i16x16) -> i16x16; - fn x86_mm256_madd_epi16(x: i16x16, y: i16x16) -> i32x8; - fn x86_mm256_maddubs_epi16(x: i8x32, y: i8x32) -> i16x16; - fn x86_mm256_max_epi8(x: i8x32, y: i8x32) -> i8x32; - fn x86_mm256_max_epu8(x: u8x32, y: u8x32) -> u8x32; - fn x86_mm256_max_epi16(x: i16x16, y: i16x16) -> i16x16; - fn x86_mm256_max_epu16(x: u16x16, y: u16x16) -> u16x16; - fn x86_mm256_max_epi32(x: i32x8, y: i32x8) -> i32x8; - fn x86_mm256_max_epu32(x: u32x8, y: u32x8) -> u32x8; - fn x86_mm256_min_epi8(x: i8x32, y: i8x32) -> i8x32; - fn x86_mm256_min_epu8(x: u8x32, y: u8x32) -> u8x32; - fn x86_mm256_min_epi16(x: i16x16, y: i16x16) -> i16x16; - fn x86_mm256_min_epu16(x: u16x16, y: u16x16) -> u16x16; - fn x86_mm256_min_epi32(x: i32x8, y: i32x8) -> i32x8; - fn x86_mm256_min_epu32(x: u32x8, y: u32x8) -> u32x8; - fn x86_mm256_mul_epi64(x: i32x8, y: i32x8) -> i64x4; - fn x86_mm256_mul_epu64(x: u32x8, y: u32x8) -> u64x4; - fn x86_mm256_mulhi_epi16(x: i16x16, y: i16x16) -> i16x16; - fn x86_mm256_mulhi_epu16(x: u16x16, y: u16x16) -> u16x16; - fn x86_mm256_mulhrs_epi16(x: i16x16, y: i16x16) -> i16x16; - fn x86_mm256_packs_epi16(x: i16x16, y: i16x16) -> i8x32; - fn x86_mm256_packus_epi16(x: i16x16, y: i16x16) -> u8x32; - fn x86_mm256_packs_epi32(x: i32x8, y: i32x8) -> i16x16; - fn x86_mm256_packus_epi32(x: i32x8, y: i32x8) -> u16x16; - fn x86_mm256_permutevar8x32_epi32(x: i32x8, y: i32x8) -> i32x8; - fn x86_mm256_permutevar8x32_ps(x: f32x8, y: i32x8) -> f32x8; - fn x86_mm256_sad_epu8(x: u8x32, y: u8x32) -> u64x4; - fn x86_mm256_shuffle_epi8(x: i8x32, y: i8x32) -> i8x32; - fn x86_mm256_sign_epi8(x: i8x32, y: i8x32) -> i8x32; - fn x86_mm256_sign_epi16(x: i16x16, y: i16x16) -> i16x16; - fn x86_mm256_sign_epi32(x: i32x8, y: i32x8) -> i32x8; - fn x86_mm256_subs_epi8(x: i8x32, y: i8x32) -> i8x32; - fn x86_mm256_subs_epu8(x: u8x32, y: u8x32) -> u8x32; - fn x86_mm256_subs_epi16(x: i16x16, y: i16x16) -> i16x16; - fn x86_mm256_subs_epu16(x: u16x16, y: u16x16) -> u16x16; -} - -// broken on rustc 1.7.0-nightly (1ddaf8bdf 2015-12-12) -// pub trait Avx2F32x8 { -// fn permutevar(self, other: i32x8) -> f32x8; -// } -// -// impl Avx2F32x8 for f32x8 { -// fn permutevar(self, other: i32x8) -> f32x8 { -// unsafe { x86_mm256_permutevar8x32_ps(self, other) } -// } -// } diff --git a/third_party/rust/simd/src/x86/mod.rs b/third_party/rust/simd/src/x86/mod.rs deleted file mode 100644 index 8763fb16ccfd..000000000000 --- a/third_party/rust/simd/src/x86/mod.rs +++ /dev/null @@ -1,16 +0,0 @@ -//! Features specific to x86 and x86-64 CPUs. - -#[cfg(any(feature = "doc", target_feature = "sse2"))] -pub mod sse2; -#[cfg(any(feature = "doc", target_feature = "sse3"))] -pub mod sse3; -#[cfg(any(feature = "doc", target_feature = "ssse3"))] -pub mod ssse3; -#[cfg(any(feature = "doc", target_feature = "sse4.1"))] -pub mod sse4_1; -#[cfg(any(feature = "doc", target_feature = "sse4.2"))] -pub mod sse4_2; -#[cfg(any(feature = "doc", target_feature = "avx"))] -pub mod avx; -#[cfg(any(feature = "doc", target_feature = "avx2"))] -pub mod avx2; diff --git a/third_party/rust/simd/src/x86/sse2.rs b/third_party/rust/simd/src/x86/sse2.rs deleted file mode 100644 index 5cbc853694d5..000000000000 --- a/third_party/rust/simd/src/x86/sse2.rs +++ /dev/null @@ -1,359 +0,0 @@ -use super::super::*; -use {bitcast, simd_cast, f32x2}; - -pub use sixty_four::{f64x2, i64x2, u64x2, bool64ix2, bool64fx2}; - -//pub use super::{u64x2, i64x2, f64x2, bool64ix2, bool64fx2}; - -// strictly speaking, these are SSE instructions, not SSE2. -extern "platform-intrinsic" { - fn x86_mm_movemask_ps(x: f32x4) -> i32; - fn x86_mm_max_ps(x: f32x4, y: f32x4) -> f32x4; - fn x86_mm_min_ps(x: f32x4, y: f32x4) -> f32x4; - fn x86_mm_rsqrt_ps(x: f32x4) -> f32x4; - fn x86_mm_rcp_ps(x: f32x4) -> f32x4; - fn x86_mm_sqrt_ps(x: f32x4) -> f32x4; -} - -extern "platform-intrinsic" { - fn x86_mm_adds_epi8(x: i8x16, y: i8x16) -> i8x16; - fn x86_mm_adds_epu8(x: u8x16, y: u8x16) -> u8x16; - fn x86_mm_adds_epi16(x: i16x8, y: i16x8) -> i16x8; - fn x86_mm_adds_epu16(x: u16x8, y: u16x8) -> u16x8; - fn x86_mm_avg_epu8(x: u8x16, y: u8x16) -> u8x16; - fn x86_mm_avg_epu16(x: u16x8, y: u16x8) -> u16x8; - fn x86_mm_madd_epi16(x: i16x8, y: i16x8) -> i32x4; - fn x86_mm_max_epi16(x: i16x8, y: i16x8) -> i16x8; - fn x86_mm_max_epu8(x: u8x16, y: u8x16) -> u8x16; - fn x86_mm_max_pd(x: f64x2, y: f64x2) -> f64x2; - fn x86_mm_min_epi16(x: i16x8, y: i16x8) -> i16x8; - fn x86_mm_min_epu8(x: u8x16, y: u8x16) -> u8x16; - fn x86_mm_min_pd(x: f64x2, y: f64x2) -> f64x2; - fn x86_mm_movemask_pd(x: f64x2) -> i32; - fn x86_mm_movemask_epi8(x: i8x16) -> i32; - fn x86_mm_mul_epu32(x: u32x4, y: u32x4) -> u64x2; - fn x86_mm_mulhi_epi16(x: i16x8, y: i16x8) -> i16x8; - fn x86_mm_mulhi_epu16(x: u16x8, y: u16x8) -> u16x8; - fn x86_mm_packs_epi16(x: i16x8, y: i16x8) -> i8x16; - fn x86_mm_packs_epi32(x: i32x4, y: i32x4) -> i16x8; - fn x86_mm_packus_epi16(x: i16x8, y: i16x8) -> u8x16; - fn x86_mm_sad_epu8(x: u8x16, y: u8x16) -> u64x2; - fn x86_mm_sqrt_pd(x: f64x2) -> f64x2; - fn x86_mm_subs_epi8(x: i8x16, y: i8x16) -> i8x16; - fn x86_mm_subs_epu8(x: u8x16, y: u8x16) -> u8x16; - fn x86_mm_subs_epi16(x: i16x8, y: i16x8) -> i16x8; - fn x86_mm_subs_epu16(x: u16x8, y: u16x8) -> u16x8; -} - -#[doc(hidden)] -pub mod common { - use super::super::super::*; - use core::mem; - - #[inline] - pub fn f32x4_sqrt(x: f32x4) -> f32x4 { - unsafe {super::x86_mm_sqrt_ps(x)} - } - #[inline] - pub fn f32x4_approx_rsqrt(x: f32x4) -> f32x4 { - unsafe {super::x86_mm_rsqrt_ps(x)} - } - #[inline] - pub fn f32x4_approx_reciprocal(x: f32x4) -> f32x4 { - unsafe {super::x86_mm_rcp_ps(x)} - } - #[inline] - pub fn f32x4_max(x: f32x4, y: f32x4) -> f32x4 { - unsafe {super::x86_mm_max_ps(x, y)} - } - #[inline] - pub fn f32x4_min(x: f32x4, y: f32x4) -> f32x4 { - unsafe {super::x86_mm_min_ps(x, y)} - } - - macro_rules! bools { - ($($ty: ty, $all: ident, $any: ident, $movemask: ident, $width: expr;)*) => { - $( - #[inline] - pub fn $all(x: $ty) -> bool { - unsafe { - super::$movemask(mem::transmute(x)) == (1 << $width) - 1 - } - } - #[inline] - pub fn $any(x: $ty) -> bool { - unsafe { - super::$movemask(mem::transmute(x)) != 0 - } - } - )* - } - } - - bools! { - bool32fx4, bool32fx4_all, bool32fx4_any, x86_mm_movemask_ps, 4; - bool8ix16, bool8ix16_all, bool8ix16_any, x86_mm_movemask_epi8, 16; - bool16ix8, bool16ix8_all, bool16ix8_any, x86_mm_movemask_epi8, 16; - bool32ix4, bool32ix4_all, bool32ix4_any, x86_mm_movemask_epi8, 16; - } -} - -// 32 bit floats - -pub trait Sse2F32x4 { - fn to_f64(self) -> f64x2; - fn move_mask(self) -> u32; -} -impl Sse2F32x4 for f32x4 { - #[inline] - fn to_f64(self) -> f64x2 { - unsafe { - simd_cast(f32x2(self.0, self.1)) - } - } - fn move_mask(self) -> u32 { - unsafe {x86_mm_movemask_ps(self) as u32} - } -} -pub trait Sse2Bool32fx4 { - fn move_mask(self) -> u32; -} -impl Sse2Bool32fx4 for bool32fx4 { - #[inline] - fn move_mask(self) -> u32 { - unsafe { x86_mm_movemask_ps(bitcast(self)) as u32} - } -} - -// 64 bit floats - -pub trait Sse2F64x2 { - fn move_mask(self) -> u32; - fn sqrt(self) -> Self; - fn max(self, other: Self) -> Self; - fn min(self, other: Self) -> Self; -} -impl Sse2F64x2 for f64x2 { - #[inline] - fn move_mask(self) -> u32 { - unsafe { x86_mm_movemask_pd(bitcast(self)) as u32} - } - - #[inline] - fn sqrt(self) -> Self { - unsafe { x86_mm_sqrt_pd(self) } - } - - #[inline] - fn max(self, other: Self) -> Self { - unsafe { x86_mm_max_pd(self, other) } - } - #[inline] - fn min(self, other: Self) -> Self { - unsafe { x86_mm_min_pd(self, other) } - } -} - -pub trait Sse2Bool64fx2 { - fn move_mask(self) -> u32; -} -impl Sse2Bool64fx2 for bool64fx2 { - #[inline] - fn move_mask(self) -> u32 { - unsafe { x86_mm_movemask_pd(bitcast(self)) as u32} - } -} - -// 64 bit ints - -pub trait Sse2U64x2 {} -impl Sse2U64x2 for u64x2 {} - -pub trait Sse2I64x2 {} -impl Sse2I64x2 for i64x2 {} - -pub trait Sse2Bool64ix2 {} -impl Sse2Bool64ix2 for bool64ix2 {} - -// 32 bit ints - -pub trait Sse2U32x4 { - fn low_mul(self, other: Self) -> u64x2; -} -impl Sse2U32x4 for u32x4 { - #[inline] - fn low_mul(self, other: Self) -> u64x2 { - unsafe { x86_mm_mul_epu32(self, other) } - } -} - -pub trait Sse2I32x4 { - fn packs(self, other: Self) -> i16x8; -} -impl Sse2I32x4 for i32x4 { - #[inline] - fn packs(self, other: Self) -> i16x8 { - unsafe { x86_mm_packs_epi32(self, other) } - } -} - -pub trait Sse2Bool32ix4 {} -impl Sse2Bool32ix4 for bool32ix4 {} - -// 16 bit ints - -pub trait Sse2U16x8 { - fn adds(self, other: Self) -> Self; - fn subs(self, other: Self) -> Self; - fn avg(self, other: Self) -> Self; - fn mulhi(self, other: Self) -> Self; -} -impl Sse2U16x8 for u16x8 { - #[inline] - fn adds(self, other: Self) -> Self { - unsafe { x86_mm_adds_epu16(self, other) } - } - #[inline] - fn subs(self, other: Self) -> Self { - unsafe { x86_mm_subs_epu16(self, other) } - } - - #[inline] - fn avg(self, other: Self) -> Self { - unsafe { x86_mm_avg_epu16(self, other) } - } - - #[inline] - fn mulhi(self, other: Self) -> Self { - unsafe { x86_mm_mulhi_epu16(self, other) } - } -} - -pub trait Sse2I16x8 { - fn adds(self, other: Self) -> Self; - fn subs(self, other: Self) -> Self; - fn madd(self, other: Self) -> i32x4; - fn max(self, other: Self) -> Self; - fn min(self, other: Self) -> Self; - fn mulhi(self, other: Self) -> Self; - fn packs(self, other: Self) -> i8x16; - fn packus(self, other: Self) -> u8x16; -} -impl Sse2I16x8 for i16x8 { - #[inline] - fn adds(self, other: Self) -> Self { - unsafe { x86_mm_adds_epi16(self, other) } - } - #[inline] - fn subs(self, other: Self) -> Self { - unsafe { x86_mm_subs_epi16(self, other) } - } - - #[inline] - fn madd(self, other: Self) -> i32x4 { - unsafe { x86_mm_madd_epi16(self, other) } - } - - #[inline] - fn max(self, other: Self) -> Self { - unsafe { x86_mm_max_epi16(self, other) } - } - #[inline] - fn min(self, other: Self) -> Self { - unsafe { x86_mm_min_epi16(self, other) } - } - - #[inline] - fn mulhi(self, other: Self) -> Self { - unsafe { x86_mm_mulhi_epi16(self, other) } - } - - #[inline] - fn packs(self, other: Self) -> i8x16 { - unsafe { x86_mm_packs_epi16(self, other) } - } - #[inline] - fn packus(self, other: Self) -> u8x16 { - unsafe { x86_mm_packus_epi16(self, other) } - } -} - -pub trait Sse2Bool16ix8 {} -impl Sse2Bool16ix8 for bool16ix8 {} - -// 8 bit ints - -pub trait Sse2U8x16 { - fn move_mask(self) -> u32; - fn adds(self, other: Self) -> Self; - fn subs(self, other: Self) -> Self; - fn avg(self, other: Self) -> Self; - fn max(self, other: Self) -> Self; - fn min(self, other: Self) -> Self; - fn sad(self, other: Self) -> u64x2; -} -impl Sse2U8x16 for u8x16 { - #[inline] - fn move_mask(self) -> u32 { - unsafe { x86_mm_movemask_epi8(bitcast(self)) as u32} - } - - #[inline] - fn adds(self, other: Self) -> Self { - unsafe { x86_mm_adds_epu8(self, other) } - } - #[inline] - fn subs(self, other: Self) -> Self { - unsafe { x86_mm_subs_epu8(self, other) } - } - - #[inline] - fn avg(self, other: Self) -> Self { - unsafe { x86_mm_avg_epu8(self, other) } - } - - #[inline] - fn max(self, other: Self) -> Self { - unsafe { x86_mm_max_epu8(self, other) } - } - #[inline] - fn min(self, other: Self) -> Self { - unsafe { x86_mm_min_epu8(self, other) } - } - - #[inline] - fn sad(self, other: Self) -> u64x2 { - unsafe { x86_mm_sad_epu8(self, other) } - } -} - -pub trait Sse2I8x16 { - fn move_mask(self) -> u32; - fn adds(self, other: Self) -> Self; - fn subs(self, other: Self) -> Self; -} -impl Sse2I8x16 for i8x16 { - #[inline] - fn move_mask(self) -> u32 { - unsafe { x86_mm_movemask_epi8(bitcast(self)) as u32} - } - - #[inline] - fn adds(self, other: Self) -> Self { - unsafe { x86_mm_adds_epi8(self, other) } - } - #[inline] - fn subs(self, other: Self) -> Self { - unsafe { x86_mm_subs_epi8(self, other) } - } -} - -pub trait Sse2Bool8ix16 { - fn move_mask(self) -> u32; -} -impl Sse2Bool8ix16 for bool8ix16 { - #[inline] - fn move_mask(self) -> u32 { - unsafe { x86_mm_movemask_epi8(bitcast(self)) as u32} - } -} diff --git a/third_party/rust/simd/src/x86/sse3.rs b/third_party/rust/simd/src/x86/sse3.rs deleted file mode 100644 index bd70b569f9c0..000000000000 --- a/third_party/rust/simd/src/x86/sse3.rs +++ /dev/null @@ -1,57 +0,0 @@ -use sixty_four::*; -use super::super::*; - -extern "platform-intrinsic" { - fn x86_mm_addsub_ps(x: f32x4, y: f32x4) -> f32x4; - fn x86_mm_addsub_pd(x: f64x2, y: f64x2) -> f64x2; - fn x86_mm_hadd_ps(x: f32x4, y: f32x4) -> f32x4; - fn x86_mm_hadd_pd(x: f64x2, y: f64x2) -> f64x2; - fn x86_mm_hsub_ps(x: f32x4, y: f32x4) -> f32x4; - fn x86_mm_hsub_pd(x: f64x2, y: f64x2) -> f64x2; -} - -pub trait Sse3F32x4 { - fn addsub(self, other: Self) -> Self; - fn hadd(self, other: Self) -> Self; - fn hsub(self, other: Self) -> Self; -} - -impl Sse3F32x4 for f32x4 { - #[inline] - fn addsub(self, other: Self) -> Self { - unsafe { x86_mm_addsub_ps(self, other) } - } - - #[inline] - fn hadd(self, other: Self) -> Self { - unsafe { x86_mm_hadd_ps(self, other) } - } - - #[inline] - fn hsub(self, other: Self) -> Self { - unsafe { x86_mm_hsub_ps(self, other) } - } -} - -pub trait Sse3F64x2 { - fn addsub(self, other: Self) -> Self; - fn hadd(self, other: Self) -> Self; - fn hsub(self, other: Self) -> Self; -} - -impl Sse3F64x2 for f64x2 { - #[inline] - fn addsub(self, other: Self) -> Self { - unsafe { x86_mm_addsub_pd(self, other) } - } - - #[inline] - fn hadd(self, other: Self) -> Self { - unsafe { x86_mm_hadd_pd(self, other) } - } - - #[inline] - fn hsub(self, other: Self) -> Self { - unsafe { x86_mm_hsub_pd(self, other) } - } -} diff --git a/third_party/rust/simd/src/x86/sse4_1.rs b/third_party/rust/simd/src/x86/sse4_1.rs deleted file mode 100644 index fa44678a0584..000000000000 --- a/third_party/rust/simd/src/x86/sse4_1.rs +++ /dev/null @@ -1,155 +0,0 @@ -use super::super::*; -use x86::sse2::*; - -#[allow(dead_code)] -extern "platform-intrinsic" { - fn x86_mm_dp_ps(x: f32x4, y: f32x4, z: i32) -> f32x4; - fn x86_mm_dp_pd(x: f64x2, y: f64x2, z: i32) -> f64x2; - fn x86_mm_max_epi8(x: i8x16, y: i8x16) -> i8x16; - fn x86_mm_max_epu16(x: u16x8, y: u16x8) -> u16x8; - fn x86_mm_max_epi32(x: i32x4, y: i32x4) -> i32x4; - fn x86_mm_max_epu32(x: u32x4, y: u32x4) -> u32x4; - fn x86_mm_min_epi8(x: i8x16, y: i8x16) -> i8x16; - fn x86_mm_min_epu16(x: u16x8, y: u16x8) -> u16x8; - fn x86_mm_min_epi32(x: i32x4, y: i32x4) -> i32x4; - fn x86_mm_min_epu32(x: u32x4, y: u32x4) -> u32x4; - fn x86_mm_minpos_epu16(x: u16x8) -> u16x8; - fn x86_mm_mpsadbw_epu8(x: u8x16, y: u8x16, z: i32) -> u16x8; - fn x86_mm_mul_epi32(x: i32x4, y: i32x4) -> i64x2; - fn x86_mm_packus_epi32(x: i32x4, y: i32x4) -> u16x8; - fn x86_mm_testc_si128(x: u64x2, y: u64x2) -> i32; - fn x86_mm_testnzc_si128(x: u64x2, y: u64x2) -> i32; - fn x86_mm_testz_si128(x: u64x2, y: u64x2) -> i32; -} - -// 32 bit floats - -pub trait Sse41F32x4 {} -impl Sse41F32x4 for f32x4 {} - -// 64 bit floats - -pub trait Sse41F64x2 {} -impl Sse41F64x2 for f64x2 {} - -// 64 bit integers - -pub trait Sse41U64x2 { - fn testc(self, other: Self) -> i32; - fn testnzc(self, other: Self) -> i32; - fn testz(self, other: Self) -> i32; -} -impl Sse41U64x2 for u64x2 { - #[inline] - fn testc(self, other: Self) -> i32 { - unsafe { x86_mm_testc_si128(self, other) } - } - #[inline] - fn testnzc(self, other: Self) -> i32 { - unsafe { x86_mm_testnzc_si128(self, other) } - } - #[inline] - fn testz(self, other: Self) -> i32 { - unsafe { x86_mm_testz_si128(self, other) } - } -} -pub trait Sse41I64x2 {} -impl Sse41I64x2 for i64x2 {} - -pub trait Sse41Bool64ix2 {} -impl Sse41Bool64ix2 for bool64ix2 {} - -// 32 bit integers - -pub trait Sse41U32x4 { - fn max(self, other: Self) -> Self; - fn min(self, other: Self) -> Self; -} -impl Sse41U32x4 for u32x4 { - #[inline] - fn max(self, other: Self) -> Self { - unsafe { x86_mm_max_epu32(self, other) } - } - #[inline] - fn min(self, other: Self) -> Self { - unsafe { x86_mm_min_epu32(self, other) } - } -} -pub trait Sse41I32x4 { - fn max(self, other: Self) -> Self; - fn min(self, other: Self) -> Self; - fn low_mul(self, other: Self) -> i64x2; - fn packus(self, other: Self) -> u16x8; -} -impl Sse41I32x4 for i32x4 { - #[inline] - fn max(self, other: Self) -> Self { - unsafe { x86_mm_max_epi32(self, other) } - } - #[inline] - fn min(self, other: Self) -> Self { - unsafe { x86_mm_min_epi32(self, other) } - } - - #[inline] - fn low_mul(self, other: Self) -> i64x2 { - unsafe { x86_mm_mul_epi32(self, other) } - } - #[inline] - fn packus(self, other: Self) -> u16x8 { - unsafe { x86_mm_packus_epi32(self, other) } - } -} - -pub trait Sse41Bool32ix4 {} -impl Sse41Bool32ix4 for bool32ix4 {} - -// 16 bit integers - -pub trait Sse41U16x8 { - fn max(self, other: Self) -> Self; - fn min(self, other: Self) -> Self; - fn minpos(self) -> Self; -} -impl Sse41U16x8 for u16x8 { - #[inline] - fn max(self, other: Self) -> Self { - unsafe { x86_mm_max_epu16(self, other) } - } - #[inline] - fn min(self, other: Self) -> Self { - unsafe { x86_mm_min_epu16(self, other) } - } - - #[inline] - fn minpos(self) -> Self { - unsafe { x86_mm_minpos_epu16(self) } - } -} -pub trait Sse41I16x8 {} -impl Sse41I16x8 for i16x8 {} - -pub trait Sse41Bool16ix8 {} -impl Sse41Bool16ix8 for bool16ix8 {} - -// 8 bit integers - -pub trait Sse41U8x16 {} -impl Sse41U8x16 for u8x16 {} -pub trait Sse41I8x16 { - fn max(self, other: Self) -> Self; - fn min(self, other: Self) -> Self; -} -impl Sse41I8x16 for i8x16 { - #[inline] - fn max(self, other: Self) -> Self { - unsafe { x86_mm_max_epi8(self, other) } - } - #[inline] - fn min(self, other: Self) -> Self { - unsafe { x86_mm_min_epi8(self, other) } - } -} - -pub trait Sse41Bool8ix16 {} -impl Sse41Bool8ix16 for bool8ix16 {} diff --git a/third_party/rust/simd/src/x86/sse4_2.rs b/third_party/rust/simd/src/x86/sse4_2.rs deleted file mode 100644 index 5afe4583cf71..000000000000 --- a/third_party/rust/simd/src/x86/sse4_2.rs +++ /dev/null @@ -1,19 +0,0 @@ -use i8x16; - -#[allow(dead_code)] -extern "platform-intrinsic" { - fn x86_mm_cmpestra(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32; - fn x86_mm_cmpestrc(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32; - fn x86_mm_cmpestri(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32; - fn x86_mm_cmpestrm(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i8x16; - fn x86_mm_cmpestro(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32; - fn x86_mm_cmpestrs(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32; - fn x86_mm_cmpestrz(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32; - fn x86_mm_cmpistra(x: i8x16, y: i8x16, z: i32) -> i32; - fn x86_mm_cmpistrc(x: i8x16, y: i8x16, z: i32) -> i32; - fn x86_mm_cmpistri(x: i8x16, y: i8x16, z: i32) -> i32; - fn x86_mm_cmpistrm(x: i8x16, y: i8x16, z: i32) -> i8x16; - fn x86_mm_cmpistro(x: i8x16, y: i8x16, z: i32) -> i32; - fn x86_mm_cmpistrs(x: i8x16, y: i8x16, z: i32) -> i32; - fn x86_mm_cmpistrz(x: i8x16, y: i8x16, z: i32) -> i32; -} diff --git a/third_party/rust/simd/src/x86/ssse3.rs b/third_party/rust/simd/src/x86/ssse3.rs deleted file mode 100644 index aa22a08a68a4..000000000000 --- a/third_party/rust/simd/src/x86/ssse3.rs +++ /dev/null @@ -1,172 +0,0 @@ -use super::super::*; -use bitcast; - -macro_rules! bitcast { - ($func: ident($a: ident, $b: ident)) => { - bitcast($func(bitcast($a), bitcast($b))) - } -} - -extern "platform-intrinsic" { - fn x86_mm_abs_epi8(x: i8x16) -> i8x16; - fn x86_mm_abs_epi16(x: i16x8) -> i16x8; - fn x86_mm_abs_epi32(x: i32x4) -> i32x4; - fn x86_mm_hadd_epi16(x: i16x8, y: i16x8) -> i16x8; - fn x86_mm_hadd_epi32(x: i32x4, y: i32x4) -> i32x4; - fn x86_mm_hadds_epi16(x: i16x8, y: i16x8) -> i16x8; - fn x86_mm_hsub_epi16(x: i16x8, y: i16x8) -> i16x8; - fn x86_mm_hsub_epi32(x: i32x4, y: i32x4) -> i32x4; - fn x86_mm_hsubs_epi16(x: i16x8, y: i16x8) -> i16x8; - fn x86_mm_maddubs_epi16(x: u8x16, y: i8x16) -> i16x8; - fn x86_mm_mulhrs_epi16(x: i16x8, y: i16x8) -> i16x8; - fn x86_mm_shuffle_epi8(x: i8x16, y: i8x16) -> i8x16; - fn x86_mm_sign_epi8(x: i8x16, y: i8x16) -> i8x16; - fn x86_mm_sign_epi16(x: i16x8, y: i16x8) -> i16x8; - fn x86_mm_sign_epi32(x: i32x4, y: i32x4) -> i32x4; -} - -// 32 bit integers - -pub trait Ssse3I32x4 { - fn abs(self) -> Self; - fn hadd(self, other: Self) -> Self; - fn hsub(self, other: Self) -> Self; - fn sign(self, other: Self) -> Self; -} -impl Ssse3I32x4 for i32x4 { - #[inline] - fn abs(self) -> Self { - unsafe { x86_mm_abs_epi32(self) } - } - - #[inline] - fn hadd(self, other: Self) -> Self { - unsafe { x86_mm_hadd_epi32(self, other) } - } - #[inline] - fn hsub(self, other: Self) -> Self { - unsafe { x86_mm_hsub_epi32(self, other) } - } - - #[inline] - fn sign(self, other: Self) -> Self { - unsafe { x86_mm_sign_epi32(self, other) } - } -} - -pub trait Ssse3U32x4 { - fn hadd(self, other: Self) -> Self; - fn hsub(self, other: Self) -> Self; -} -impl Ssse3U32x4 for u32x4 { - #[inline] - fn hadd(self, other: Self) -> Self { - unsafe { bitcast!(x86_mm_hadd_epi32(self, other)) } - } - #[inline] - fn hsub(self, other: Self) -> Self { - unsafe { bitcast!(x86_mm_hsub_epi32(self, other)) } - } -} - -// 16 bit integers - -pub trait Ssse3I16x8 { - fn abs(self) -> Self; - fn hadd(self, other: Self) -> Self; - fn hadds(self, other: Self) -> Self; - fn hsub(self, other: Self) -> Self; - fn hsubs(self, other: Self) -> Self; - fn sign(self, other: Self) -> Self; - fn mulhrs(self, other: Self) -> Self; -} -impl Ssse3I16x8 for i16x8 { - #[inline] - fn abs(self) -> Self { - unsafe { x86_mm_abs_epi16(self) } - } - - #[inline] - fn hadd(self, other: Self) -> Self { - unsafe { x86_mm_hadd_epi16(self, other) } - } - #[inline] - fn hadds(self, other: Self) -> Self { - unsafe { x86_mm_hadds_epi16(self, other) } - } - #[inline] - fn hsub(self, other: Self) -> Self { - unsafe { x86_mm_hsub_epi16(self, other) } - } - #[inline] - fn hsubs(self, other: Self) -> Self { - unsafe { x86_mm_hsubs_epi16(self, other) } - } - - #[inline] - fn sign(self, other: Self) -> Self { - unsafe { x86_mm_sign_epi16(self, other) } - } - - #[inline] - fn mulhrs(self, other: Self) -> Self { - unsafe { x86_mm_mulhrs_epi16(self, other) } - } -} - -pub trait Ssse3U16x8 { - fn hadd(self, other: Self) -> Self; - fn hsub(self, other: Self) -> Self; -} -impl Ssse3U16x8 for u16x8 { - #[inline] - fn hadd(self, other: Self) -> Self { - unsafe { bitcast!(x86_mm_hadd_epi16(self, other)) } - } - #[inline] - fn hsub(self, other: Self) -> Self { - unsafe { bitcast!(x86_mm_hsub_epi16(self, other)) } - } -} - - -// 8 bit integers - -pub trait Ssse3U8x16 { - fn shuffle_bytes(self, indices: Self) -> Self; - fn maddubs(self, other: i8x16) -> i16x8; -} - -impl Ssse3U8x16 for u8x16 { - #[inline] - fn shuffle_bytes(self, indices: Self) -> Self { - unsafe {bitcast!(x86_mm_shuffle_epi8(self, indices))} - } - - fn maddubs(self, other: i8x16) -> i16x8 { - unsafe { x86_mm_maddubs_epi16(self, other) } - } -} - -pub trait Ssse3I8x16 { - fn abs(self) -> Self; - fn shuffle_bytes(self, indices: Self) -> Self; - fn sign(self, other: Self) -> Self; -} -impl Ssse3I8x16 for i8x16 { - #[inline] - fn abs(self) -> Self { - unsafe {x86_mm_abs_epi8(self)} - } - #[inline] - fn shuffle_bytes(self, indices: Self) -> Self { - unsafe { - x86_mm_shuffle_epi8(self, indices) - } - } - - #[inline] - fn sign(self, other: Self) -> Self { - unsafe { x86_mm_sign_epi8(self, other) } - } -} diff --git a/toolkit/moz.configure b/toolkit/moz.configure index c2b3c768cba9..c3f3de62f09a 100644 --- a/toolkit/moz.configure +++ b/toolkit/moz.configure @@ -696,14 +696,11 @@ set_config('MOZ_ENABLE_WEBRENDER', webrender.enable) option('--enable-rust-simd', env='MOZ_RUST_SIMD', help='Enable explicit SIMD in Rust code.') -@depends('--enable-rust-simd', target, rustc_info) -def rust_simd(value, target, rustc_info): - # As of 2018-06-05, the simd crate only works on aarch64, - # armv7, x86 and x86_64. +@depends('--enable-rust-simd', target) +def rust_simd(value, target): + # As of 2019-03-04, the simd-accel feature of encoding_rs has not + # been properly set up outside aarch64, armv7, x86 and x86_64. if target.cpu in ('aarch64', 'arm', 'x86', 'x86_64') and value: - if rustc_info and rustc_info.version >= Version('1.33.0'): - die('--enable-rust-simd does not work with Rust 1.33 or later. ' - 'See https://bugzilla.mozilla.org/show_bug.cgi?id=1521249 .') return True set_config('MOZ_RUST_SIMD', rust_simd) -- 2.21.0