diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index e2bc9c53..cf55169e 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -147,18 +147,18 @@ jobs: if [ "$(uname -m)" == "x86_64" ]; then cargo \ --config 'target.'\''cfg(all())'\''.runner = ["/opt/sde/sde64", "-spr", "--"]' \ - test -p simd -- --nocapture + test -p simd -- --no-capture fi if [ "$(uname -m)" == "aarch64" ]; then cargo \ --config 'target.'\''cfg(all())'\''.runner = ["qemu-aarch64-static", "-cpu", "max,sve-default-vector-length=16"]' \ - test -p simd -- --nocapture + test -p simd -- --no-capture cargo \ --config 'target.'\''cfg(all())'\''.runner = ["qemu-aarch64-static", "-cpu", "max,sve-default-vector-length=32"]' \ - test -p simd -- --nocapture + test -p simd -- --no-capture cargo \ --config 'target.'\''cfg(all())'\''.runner = ["qemu-aarch64-static", "-cpu", "max,sve-default-vector-length=64"]' \ - test -p simd -- --nocapture + test -p simd -- --no-capture fi psql: @@ -225,6 +225,14 @@ jobs: make PG_CONFIG=$PG_CONFIG PROFILE=dev build sudo make PG_CONFIG=$PG_CONFIG install + - name: Upload Artifacts + uses: actions/upload-artifact@v4 + with: + name: postgresql-${{ matrix.version }}-vchord_0.0.0_${{ matrix.arch }}-linux-gnu + path: ./build/raw + compression-level: 9 + retention-days: 14 + - name: Service run: | sudo systemctl start postgresql @@ -239,14 +247,6 @@ jobs: sqllogictest --db $USER --user $USER './tests/vchordrq/pg17/*.slt' fi - - name: Upload Artifacts - uses: actions/upload-artifact@v4 - with: - name: postgresql-${{ matrix.version }}-vchord_0.0.0_${{ matrix.arch }}-linux-gnu - path: ./build/raw - compression-level: 9 - retention-days: 14 - psql_macos: if: | (github.event_name == 'push' && contains(github.event.head_commit.message, 'job: +psql_macos')) || @@ -308,6 +308,14 @@ jobs: make PG_CONFIG=$PG_CONFIG PROFILE=dev build sudo make PG_CONFIG=$PG_CONFIG install + - name: Upload Artifacts + uses: actions/upload-artifact@v4 + with: + name: postgresql-${{ matrix.version }}-vchord_0.0.0_${{ matrix.arch }}-apple-darwin + path: ./build/raw + compression-level: 9 + retention-days: 14 + - name: Service run: | brew services start postgresql@${{ matrix.version }} @@ -324,14 +332,6 @@ jobs: sqllogictest --db $USER --user $USER './tests/vchordrq/pg17/*.slt' fi - - name: Upload Artifacts - uses: actions/upload-artifact@v4 - with: - name: postgresql-${{ matrix.version }}-vchord_0.0.0_${{ matrix.arch }}-apple-darwin - path: ./build/raw - compression-level: 9 - retention-days: 14 - psql_windows: if: | (github.event_name == 'push' && contains(github.event.head_commit.message, 'job: +psql_windows')) || @@ -414,6 +414,14 @@ jobs: make PG_CONFIG=$env:PG_CONFIG PROFILE=dev build make PG_CONFIG=$env:PG_CONFIG install + - name: Upload Artifacts + uses: actions/upload-artifact@v4 + with: + name: postgresql-${{ matrix.version }}-vchord_0.0.0_${{ matrix.arch }}-pc-windows-msvc + path: ./build/raw + compression-level: 9 + retention-days: 14 + - name: Service run: | 'PGBIN','PGDATA','PGROOT', 'PGUSER', 'PGPASSWORD' | ForEach-Object { Remove-Item "env:$_" } @@ -430,14 +438,6 @@ jobs: sqllogictest --db $env:USERNAME --user $env:USERNAME './tests/vchordrq/pg17/*.slt' } - - name: Upload Artifacts - uses: actions/upload-artifact@v4 - with: - name: postgresql-${{ matrix.version }}-vchord_0.0.0_${{ matrix.arch }}-pc-windows-msvc - path: ./build/raw - compression-level: 9 - retention-days: 14 - psql_alpine: if: | (github.event_name == 'push' && contains(github.event.head_commit.message, 'job: +psql_alpine')) || @@ -486,7 +486,7 @@ jobs: sudo -iu postgres pg_ctl start -D /var/lib/postgresql/data sudo -iu postgres createuser -s -r $USER sudo -iu postgres createdb -O $USER $USER - sudo -iu postgres psql -c 'ALTER SYSTEM SET shared_preload_libraries = "vchord"' + sudo -iu postgres psql -d $USER -c 'ALTER SYSTEM SET shared_preload_libraries = "vchord"' sudo -iu postgres pg_ctl stop -D /var/lib/postgresql/data mkdir ~/pgvector-install @@ -522,10 +522,18 @@ jobs: make PG_CONFIG=$PG_CONFIG PROFILE=dev build sudo make PG_CONFIG=$PG_CONFIG install + - name: Upload Artifacts + uses: actions/upload-artifact@v4 + with: + name: postgresql-${{ matrix.version }}-vchord_0.0.0_${{ matrix.arch }}-linux-musl + path: ./build/raw + compression-level: 9 + retention-days: 14 + - name: Service run: | sudo -iu postgres pg_ctl start -D /var/lib/postgresql/data - psql -c 'CREATE EXTENSION IF NOT EXISTS vchord CASCADE;' + psql -d $USER -U $USER -c 'CREATE EXTENSION IF NOT EXISTS vchord CASCADE;' - name: Sqllogictest run: | @@ -536,10 +544,205 @@ jobs: sqllogictest --db $USER --user $USER './tests/vchordrq/pg17/*.slt' fi + check_debian: + if: | + (github.event_name == 'push' && contains(github.event.head_commit.message, 'job: +check_debian')) || + (github.event_name == 'pull_request' && contains(github.event.pull_request.body, 'job: +check_debian')) || + github.event_name == 'workflow_dispatch' + + strategy: + matrix: + include: + - version: "15" + platform: "s390x" + clang_triple: "s390x-linux-gnu" + rust_triple: "s390x-unknown-linux-gnu" + gcc_version: "12" + debian_version: "bookworm" + - version: "17" + platform: "s390x" + clang_triple: "s390x-linux-gnu" + rust_triple: "s390x-unknown-linux-gnu" + gcc_version: "14" + debian_version: "trixie" + - version: "13" + platform: "ppc64le" + clang_triple: "powerpc64le-linux-gnu" + rust_triple: "powerpc64le-unknown-linux-gnu" + gcc_version: "12" + debian_version: "bookworm" + - version: "14" + platform: "ppc64le" + clang_triple: "powerpc64le-linux-gnu" + rust_triple: "powerpc64le-unknown-linux-gnu" + gcc_version: "12" + debian_version: "bookworm" + - version: "15" + platform: "ppc64le" + clang_triple: "powerpc64le-linux-gnu" + rust_triple: "powerpc64le-unknown-linux-gnu" + gcc_version: "12" + debian_version: "bookworm" + - version: "16" + platform: "ppc64le" + clang_triple: "powerpc64le-linux-gnu" + rust_triple: "powerpc64le-unknown-linux-gnu" + gcc_version: "12" + debian_version: "bookworm" + - version: "17" + platform: "ppc64le" + clang_triple: "powerpc64le-linux-gnu" + rust_triple: "powerpc64le-unknown-linux-gnu" + gcc_version: "12" + debian_version: "bookworm" + - version: "17" + platform: "riscv64" + clang_triple: "riscv64-linux-gnu" + rust_triple: "riscv64gc-unknown-linux-gnu" + gcc_version: "14" + debian_version: "trixie" + runs-on: "ubuntu-24.04" + + env: + SCCACHE_GHA_ENABLED: "true" + RUSTUP_AUTO_INSTALL: "0" + RUSTC_WRAPPER: "sccache" + # RUSTFLAGS: "-Dwarnings" + CARGO_TERM_COLOR: "always" + RUST_BACKTRACE: "1" + + steps: + - name: Set up Environment + run: | + sudo apt-get update + sudo apt-get install -y qemu-user-static clang lld + sudo systemctl enable --now systemd-binfmt + sudo mkdir /sysroot + curl -fsSL https://github.com/debuerreotype/docker-debian-artifacts/raw/refs/heads/dist-${{ matrix.platform }}/${{ matrix.debian_version }}/oci/blobs/rootfs.tar.gz | sudo tar -xz -C /sysroot + sudo mount --bind /dev /sysroot/dev + sudo mount --bind /dev/pts /sysroot/dev/pts + sudo mount --bind /etc/resolv.conf /sysroot/etc/resolv.conf + sudo mount --bind /proc /sysroot/proc + sudo mount --bind /sys /sysroot/sys + sudo mount --bind /tmp /sysroot/tmp + sudo chroot /sysroot apt-get update + sudo chroot /sysroot apt-get install --no-install-recommends -y libc6-dev libgcc-${{ matrix.gcc_version }}-dev + sudo chroot /sysroot apt-get install --no-install-recommends -y ca-certificates sudo + QEMU_LD_PREFIX=/sysroot + QEMU_CPU=max + if [ "${{ matrix.platform }}" = "ppc64le" ]; then + sudo rm -f /sysroot/lib64/ld64.so.2 + sudo ln /sysroot/usr/lib/powerpc64le-linux-gnu/ld64.so.2 /sysroot/lib64/ld64.so.2 + QEMU_CPU=power10 + fi + export QEMU_LD_PREFIX + export QEMU_CPU + echo QEMU_LD_PREFIX=$QEMU_LD_PREFIX >> $GITHUB_ENV + echo QEMU_CPU=$QEMU_CPU >> $GITHUB_ENV + echo "Defaults env_keep += \"QEMU_CPU\"" | sudo tee -a /etc/sudoers + + sudo apt-get remove -y '^postgres.*' '^libpq.*' + sudo apt-get purge -y '^postgres.*' '^libpq.*' + sudo chroot /sysroot apt-get update + sudo chroot /sysroot apt-get install --no-install-recommends -y postgresql-common + sudo chroot /sysroot /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh -y + sudo chroot /sysroot apt-get install --no-install-recommends -y postgresql-${{ matrix.version }} postgresql-server-dev-${{ matrix.version }} + sudo touch /usr/bin/pg_config + echo "#!/usr/bin/env bash" | sudo tee -a /usr/bin/pg_config + echo "sudo chroot /sysroot pg_config \"\$@\" \\" | sudo tee -a /usr/bin/pg_config + echo " | sed -E 's|^/(.*)$|/sysroot/\1|' \\" | sudo tee -a /usr/bin/pg_config + echo " | sed -E 's|^([A-Z]+) = /(.*)$|\1 = /sysroot/\2|'" | sudo tee -a /usr/bin/pg_config + sudo chmod 755 /usr/bin/pg_config + pg_config + + echo "local all all trust" | sudo tee /sysroot/etc/postgresql/${{ matrix.version }}/main/pg_hba.conf + echo "host all all 127.0.0.1/32 trust" | sudo tee -a /sysroot/etc/postgresql/${{ matrix.version }}/main/pg_hba.conf + echo "host all all ::1/128 trust" | sudo tee -a /sysroot/etc/postgresql/${{ matrix.version }}/main/pg_hba.conf + sudo chroot /sysroot pg_ctlcluster ${{ matrix.version }} main start + sudo chroot /sysroot sudo -iu postgres createuser -s -r $USER + sudo chroot /sysroot sudo -iu postgres createdb -O $USER $USER + sudo chroot /sysroot sudo -iu postgres psql -d $USER -c 'ALTER SYSTEM SET shared_preload_libraries = "vchord"' + sudo chroot /sysroot pg_ctlcluster ${{ matrix.version }} main stop + + mkdir ~/pgvector-install + curl -fsSL https://github.com/pgvector/pgvector/archive/refs/tags/v0.8.0.tar.gz | tar -xz -C ~/pgvector-install + make -C ~/pgvector-install/pgvector-0.8.0 with_llvm=no CC=clang CFLAGS="-fuse-ld=lld --target=${{ matrix.clang_triple }} --sysroot=/sysroot" OPTFLAGS="" + sudo make -C ~/pgvector-install/pgvector-0.8.0 with_llvm=no CC=clang CFLAGS="-fuse-ld=lld --target=${{ matrix.clang_triple }} --sysroot=/sysroot" OPTFLAGS="" install + + curl -fsSL https://github.com/risinglightdb/sqllogictest-rs/releases/download/v0.26.4/sqllogictest-bin-v0.26.4-$(uname -m)-unknown-linux-musl.tar.gz | tar -xOzf - ./sqllogictest | install -m 755 /dev/stdin /usr/local/bin/sqllogictest + + - name: Set up Sccache + uses: mozilla-actions/sccache-action@v0.0.9 + + - name: Checkout + uses: actions/checkout@v4 + + - name: Patch + run: | + cat << EOF > rust-toolchain.toml + [toolchain] + channel = "nightly-2025-08-29" + components = ["rustfmt", "clippy"] + targets = ["${{ matrix.rust_triple }}"] + profile = "minimal" + EOF + mkdir -p .cargo + cat << EOF > .cargo/config.toml + [target.${{ matrix.rust_triple }}] + runner = ["qemu-${{ matrix.platform }}-static"] + linker = "clang" + rustflags = [ + "-Clink-arg=-fuse-ld=lld", + "-Clink-arg=--target=${{ matrix.clang_triple }}", + "-Clink-arg=--sysroot=/sysroot", + "-Dwarnings", + ] + [env] + CC_${{ matrix.rust_triple }} = "clang" + CFLAGS_${{ matrix.rust_triple }} = "--target=${{ matrix.clang_triple }} --sysroot=/sysroot" + BINDGEN_EXTRA_CLANG_ARGS_${{ matrix.rust_triple }} = "--sysroot=/sysroot" + [patch.crates-io] + pgrx = { git = "https://github.com/tensorchord/pgrx.git", branch = "big-endian" } + EOF + rustup toolchain install + + - name: Clippy & Test + run: | + PGRX_PG_CONFIG_PATH=pg_config \ + cargo clippy --target ${{ matrix.rust_triple }} \ + --workspace --features pg${{ matrix.version }} + PGRX_PG_CONFIG_PATH=pg_config \ + cargo test --target ${{ matrix.rust_triple }} \ + --workspace --exclude vchord --no-fail-fast \ + -- --no-capture + + - name: Install + run: | + make \ + TARGET=${{ matrix.rust_triple }} \ + PROFILE=dev \ + RUNNER='qemu-${{ matrix.platform }}-static' \ + build + sudo make install + - name: Upload Artifacts uses: actions/upload-artifact@v4 with: - name: postgresql-${{ matrix.version }}-vchord_0.0.0_${{ matrix.arch }}-linux-musl + name: postgresql-${{ matrix.version }}-vchord_0.0.0_${{ matrix.clang_triple }} path: ./build/raw compression-level: 9 retention-days: 14 + + - name: Service + run: | + sudo chroot /sysroot pg_ctlcluster ${{ matrix.version }} main start + sudo chroot /sysroot psql -d $USER -U $USER -c 'CREATE EXTENSION IF NOT EXISTS vchord CASCADE;' + + - name: Sqllogictest + run: | + sqllogictest --db $USER --user $USER './tests/general/*.slt' + sqllogictest --db $USER --user $USER './tests/vchordg/*.slt' + sqllogictest --db $USER --user $USER './tests/vchordrq/*.slt' + if [ "${{ matrix.version }}" = "17" ]; then + sqllogictest --db $USER --user $USER './tests/vchordrq/pg17/*.slt' + fi diff --git a/Cargo.toml b/Cargo.toml index a3e84be6..9a087514 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -71,9 +71,11 @@ validator = { version = "0.20.0", features = ["derive"] } zerocopy = { version = "0.8.26", features = ["derive"] } [workspace.lints] +rust.unknown_lints = "allow" # complexity clippy.identity_op = "allow" clippy.int_plus_one = "allow" +clippy.manual_is_multiple_of = "allow" clippy.nonminimal_bool = "allow" clippy.too_many_arguments = "allow" clippy.type_complexity = "allow" diff --git a/Makefile b/Makefile index 920c6c30..8f7390a4 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,6 @@ PG_CONFIG ?= pg_config PKGLIBDIR := $(shell $(PG_CONFIG) --pkglibdir) SHAREDIR := $(shell $(PG_CONFIG) --sharedir) -PROFILE ?= release MKDIR ?= mkdir CP ?= cp @@ -10,7 +9,7 @@ CP ?= cp all: build build: - PGRX_PG_CONFIG_PATH="$(PG_CONFIG)" cargo run -p make -- build --output ./build/raw --profile $(PROFILE) + PGRX_PG_CONFIG_PATH="$(PG_CONFIG)" cargo run -p make -- build --output ./build/raw install: $(MKDIR) -p $(DESTDIR)$(PKGLIBDIR) $(DESTDIR)$(SHAREDIR) && \ diff --git a/crates/make/src/main.rs b/crates/make/src/main.rs index 243cc5a7..51d217cb 100644 --- a/crates/make/src/main.rs +++ b/crates/make/src/main.rs @@ -36,14 +36,12 @@ enum Commands { struct BuildArgs { #[arg(short, long)] output: String, - #[arg(long, default_value = "release")] - profile: String, - #[arg(long, default_value = target_triple::TARGET)] + #[arg(long, default_value = target_triple::TARGET, env = "TARGET")] target: String, - #[arg(long)] + #[arg(long, default_value = "release", env = "PROFILE")] + profile: String, + #[arg(long, env = "RUNNER")] runner: Option, - #[arg(long, action = clap::ArgAction::SetTrue, env = "EXPERIMENTAL", value_parser = clap::builder::FalseyValueParser::new())] - experimental: bool, } struct TargetSpecificInformation { @@ -51,6 +49,7 @@ struct TargetSpecificInformation { is_windows: bool, is_emscripten: bool, is_unix: bool, + is_powerpc64: bool, } impl TargetSpecificInformation { @@ -156,6 +155,7 @@ fn target_specific_information(target: &str) -> Result Result> { let mut command = Command::new("cargo"); command .args(["build", "-p", "vchord", "--lib"]) .args(["--profile", profile]) .args(["--target", target]) - .args(["--features".into(), { - let mut features = vec![pg_version]; - if experimental { - features.push("simd/experimental"); - } - features.join(",") - }]) + .args(["--features", pg_version]) .env("PGRX_PG_CONFIG_PATH", pg_config.as_ref()) .stdout(Stdio::inherit()) .stderr(Stdio::inherit()); @@ -243,26 +236,39 @@ fn generate( profile: &str, target: &str, exports: Vec, - experimental: bool, + postmaster: impl AsRef, ) -> Result> { + let imports = if tsi.is_powerpc64 { + let postmaster = postmaster.as_ref(); + eprintln!("Reading {postmaster:?}"); + let contents = std::fs::read(postmaster)?; + let object = object::File::parse(contents.as_slice())?; + object + .exports()? + .into_iter() + .flat_map(|x| std::str::from_utf8(x.name())) + .filter(|x| !["_start", "_IO_stdin_used", "main"].contains(x)) + .map(str::to_string) + .collect::>() + } else { + Vec::new() + }; let pgrx_embed = std::env::temp_dir().join("VCHORD_PGRX_EMBED"); eprintln!("Writing {pgrx_embed:?}"); std::fs::write( &pgrx_embed, - format!("crate::schema_generation!({});", exports.join(" ")), + format!( + "crate::schema_generation!({}; {});", + exports.join(" "), + imports.join(" ") + ), )?; let mut command = Command::new("cargo"); command .args(["rustc", "-p", "vchord", "--bin", "pgrx_embed_vchord"]) .args(["--profile", profile]) .args(["--target", target]) - .args(["--features".into(), { - let mut features = vec![pg_version]; - if experimental { - features.push("simd/experimental"); - } - features.join(",") - }]) + .args(["--features", pg_version]) .env("PGRX_PG_CONFIG_PATH", pg_config.as_ref()) .args(["--", "--cfg", "pgrx_embed"]) .env("PGRX_EMBED", &pgrx_embed) @@ -341,10 +347,9 @@ fn main() -> Result<(), Box> { match cli.command { Commands::Build(BuildArgs { output, - profile, target, + profile, runner, - experimental, }) => { let runner = runner.and_then(|runner| shlex::split(&runner)); let path = if let Some(value) = var_os("PGRX_PG_CONFIG_PATH") { @@ -367,8 +372,9 @@ fn main() -> Result<(), Box> { return Err("PostgreSQL version is invalid.".into()); } }; + let postmaster = format!("{}/postgres", pg_config["BINDIR"]); let tsi = target_specific_information(&target)?; - let obj = build(&path, &pg_version, &tsi, &profile, &target, experimental)?; + let obj = build(&path, &pg_version, &tsi, &profile, &target)?; let pkglibdir = format!("{output}/pkglibdir"); let sharedir = format!("{output}/sharedir"); let sharedir_extension = format!("{sharedir}/extension"); @@ -413,7 +419,7 @@ fn main() -> Result<(), Box> { &profile, &target, exports, - experimental, + postmaster, )?, format!("{sharedir_extension}/vchord--0.0.0.sql"), false, diff --git a/crates/simd/Cargo.toml b/crates/simd/Cargo.toml index 33774431..5157fed0 100644 --- a/crates/simd/Cargo.toml +++ b/crates/simd/Cargo.toml @@ -6,7 +6,8 @@ publish = false [features] init = [] -experimental = ["zerocopy/float-nightly"] +experimental_f16 = ["zerocopy/float-nightly"] +experimental_math = [] [dependencies] simd_macros = { path = "../simd_macros" } diff --git a/crates/simd/build.rs b/crates/simd/build.rs index 4bbcf142..36e35d29 100644 --- a/crates/simd/build.rs +++ b/crates/simd/build.rs @@ -12,7 +12,7 @@ // // Copyright (c) 2025 TensorChord Inc. -use std::env::{VarError, var}; +use std::env::var; use std::error::Error; use std::ffi::OsString; use std::path::Path; @@ -70,16 +70,8 @@ fn main() -> Result<(), Box> { build.opt_level(3); build.compile("simd_cshim"); } - "powerpc64" => { - if let Err(VarError::NotPresent) = var("CARGO_FEATURE_EXPERIMENTAL") { - println!("cargo::error=`experimental` should be enabled on this platform"); - } - } - "s390x" => { - if let Err(VarError::NotPresent) = var("CARGO_FEATURE_EXPERIMENTAL") { - println!("cargo::error=`experimental` should be enabled on this platform"); - } - } + "powerpc64" => {} + "s390x" => {} "x86_64" => { let mut build = cc::Build::new(); if let Some(compiler) = compiler(&host, &target, 16, 12) { @@ -90,9 +82,7 @@ fn main() -> Result<(), Box> { build.compile("simd_cshim"); } _ => { - if let Err(VarError::NotPresent) = var("CARGO_FEATURE_EXPERIMENTAL") { - println!("cargo::error=`experimental` should be enabled on this platform"); - } + /* let messages = [ "This platform has poor SIMD implementation.", "Please submit a feature request on https://github.com/tensorchord/VectorChord/issues.", @@ -100,6 +90,7 @@ fn main() -> Result<(), Box> { for message in messages { println!("cargo::warning={message}"); } + */ } } Ok(()) diff --git a/crates/simd/src/bit.rs b/crates/simd/src/bit.rs index 7bc4305a..e372f860 100644 --- a/crates/simd/src/bit.rs +++ b/crates/simd/src/bit.rs @@ -183,7 +183,7 @@ mod reduce_sum_of_and { } } - #[crate::multiversion(@"v4:avx512vpopcntdq", @"v4", @"v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")] + #[crate::multiversion(@"v4:avx512vpopcntdq", @"v4", @"v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1")] pub fn reduce_sum_of_and(lhs: &[u64], rhs: &[u64]) -> u32 { assert_eq!(lhs.len(), rhs.len()); let n = lhs.len(); @@ -366,7 +366,7 @@ mod reduce_sum_of_or { } } - #[crate::multiversion(@"v4:avx512vpopcntdq", @"v4", @"v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")] + #[crate::multiversion(@"v4:avx512vpopcntdq", @"v4", @"v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1")] pub fn reduce_sum_of_or(lhs: &[u64], rhs: &[u64]) -> u32 { assert_eq!(lhs.len(), rhs.len()); let n = lhs.len(); @@ -549,7 +549,7 @@ mod reduce_sum_of_xor { } } - #[crate::multiversion(@"v4:avx512vpopcntdq", @"v4", @"v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")] + #[crate::multiversion(@"v4:avx512vpopcntdq", @"v4", @"v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1")] pub fn reduce_sum_of_xor(lhs: &[u64], rhs: &[u64]) -> u32 { assert_eq!(lhs.len(), rhs.len()); let n = lhs.len(); @@ -772,7 +772,7 @@ mod reduce_sum_of_and_or { } } - #[crate::multiversion(@"v4:avx512vpopcntdq", @"v4", @"v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")] + #[crate::multiversion(@"v4:avx512vpopcntdq", @"v4", @"v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1")] pub fn reduce_sum_of_and_or(lhs: &[u64], rhs: &[u64]) -> (u32, u32) { assert_eq!(lhs.len(), rhs.len()); let n = lhs.len(); @@ -933,7 +933,7 @@ mod reduce_sum_of_x { } } - #[crate::multiversion(@"v4:avx512vpopcntdq", @"v4", @"v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")] + #[crate::multiversion(@"v4:avx512vpopcntdq", @"v4", @"v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1")] pub fn reduce_sum_of_x(this: &[u64]) -> u32 { let n = this.len(); let mut sum = 0; @@ -951,7 +951,7 @@ pub fn vector_and(lhs: &[u64], rhs: &[u64]) -> Vec { mod vector_and { #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn vector_and(lhs: &[u64], rhs: &[u64]) -> Vec { assert_eq!(lhs.len(), rhs.len()); @@ -976,7 +976,7 @@ pub fn vector_or(lhs: &[u64], rhs: &[u64]) -> Vec { mod vector_or { #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn vector_or(lhs: &[u64], rhs: &[u64]) -> Vec { assert_eq!(lhs.len(), rhs.len()); @@ -1001,7 +1001,7 @@ pub fn vector_xor(lhs: &[u64], rhs: &[u64]) -> Vec { mod vector_xor { #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn vector_xor(lhs: &[u64], rhs: &[u64]) -> Vec { assert_eq!(lhs.len(), rhs.len()); diff --git a/crates/simd/src/fast_scan.rs b/crates/simd/src/fast_scan.rs index a2827f98..47a6c451 100644 --- a/crates/simd/src/fast_scan.rs +++ b/crates/simd/src/fast_scan.rs @@ -672,7 +672,7 @@ pub fn scan(code: &[[u8; 16]], lut: &[[u8; 16]]) -> [u16; 32] { mod accu { #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn accu(sum: &mut [u32; 32], delta: &[u16; 32]) { for i in 0..32 { diff --git a/crates/simd/src/fht.rs b/crates/simd/src/fht.rs index 84238b48..e7effbab 100644 --- a/crates/simd/src/fht.rs +++ b/crates/simd/src/fht.rs @@ -33,7 +33,7 @@ mod step_1 { seq_macro::seq!( Q in 0..16 { mod dispatch_~Q { - #[crate::multiversion("v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")] + #[crate::multiversion("v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1")] pub fn f(x: &mut [f32]) { crate::fht::basic_1::(x); } @@ -48,7 +48,7 @@ mod step_2 { seq_macro::seq!( Q in 0..16 { mod dispatch_~Q { - #[crate::multiversion("v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")] + #[crate::multiversion("v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1")] pub fn f(x: &mut [f32]) { crate::fht::basic_2::(x); } @@ -62,7 +62,7 @@ mod step_2 { macro_rules! fht { ($p:literal, 0) => { { - #[crate::multiversion("v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")] + #[crate::multiversion("v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1")] fn walk(x: &mut [f32]) { assert!(x.len() == (1 << $p)); seq_macro::seq!( diff --git a/crates/simd/src/floating_f16.rs b/crates/simd/src/floating_f16.rs index 4d9dff3b..0316394e 100644 --- a/crates/simd/src/floating_f16.rs +++ b/crates/simd/src/floating_f16.rs @@ -150,7 +150,7 @@ mod reduce_or_of_is_zero_x { use crate::{F16, f16}; #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn reduce_or_of_is_zero_x(this: &[f16]) -> bool { for &x in this { @@ -168,17 +168,17 @@ mod reduce_sum_of_x { use crate::{F16, f16}; #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn reduce_sum_of_x(this: &[f16]) -> f32 { let n = this.len(); let mut x = 0.0f32; for i in 0..n { - #[cfg(not(feature = "experimental"))] + #[cfg(not(all(feature = "experimental_math", feature = "experimental_f16")))] { x += this[i]._to_f32(); } - #[cfg(feature = "experimental")] + #[cfg(all(feature = "experimental_math", feature = "experimental_f16"))] { x = x.algebraic_add(this[i]._to_f32()); } @@ -193,17 +193,17 @@ mod reduce_sum_of_abs_x { use crate::{F16, f16}; #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn reduce_sum_of_abs_x(this: &[f16]) -> f32 { let n = this.len(); let mut x = 0.0f32; for i in 0..n { - #[cfg(not(feature = "experimental"))] + #[cfg(not(all(feature = "experimental_math", feature = "experimental_f16")))] { x += this[i]._to_f32().abs(); } - #[cfg(feature = "experimental")] + #[cfg(all(feature = "experimental_math", feature = "experimental_f16"))] { x = x.algebraic_add(this[i]._to_f32().abs()); } @@ -218,17 +218,17 @@ mod reduce_sum_of_x2 { use crate::{F16, f16}; #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn reduce_sum_of_x2(this: &[f16]) -> f32 { let n = this.len(); let mut x2 = 0.0f32; for i in 0..n { - #[cfg(not(feature = "experimental"))] + #[cfg(not(all(feature = "experimental_math", feature = "experimental_f16")))] { x2 += this[i]._to_f32() * this[i]._to_f32(); } - #[cfg(feature = "experimental")] + #[cfg(all(feature = "experimental_math", feature = "experimental_f16"))] { x2 = x2.algebraic_add(this[i]._to_f32().algebraic_mul(this[i]._to_f32())); } @@ -243,7 +243,7 @@ mod reduce_min_max_of_x { use crate::{F16, f16}; #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn reduce_min_max_of_x(this: &[f16]) -> (f32, f32) { let mut min = f32::INFINITY; @@ -520,17 +520,17 @@ mod reduce_sum_of_xy { } } - #[crate::multiversion(@"v4:avx512fp16", @"v4", @"v3", #[cfg(target_endian = "little")] @"a3.512", @"a2:fp16", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")] + #[crate::multiversion(@"v4:avx512fp16", @"v4", @"v3", #[cfg(target_endian = "little")] @"a3.512", @"a2:fp16", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1")] pub fn reduce_sum_of_xy(lhs: &[f16], rhs: &[f16]) -> f32 { assert!(lhs.len() == rhs.len()); let n = lhs.len(); let mut xy = 0.0f32; for i in 0..n { - #[cfg(not(feature = "experimental"))] + #[cfg(not(all(feature = "experimental_math", feature = "experimental_f16")))] { xy += lhs[i]._to_f32() * rhs[i]._to_f32(); } - #[cfg(feature = "experimental")] + #[cfg(all(feature = "experimental_math", feature = "experimental_f16"))] { xy = xy.algebraic_add(lhs[i]._to_f32().algebraic_mul(rhs[i]._to_f32())); } @@ -810,18 +810,18 @@ mod reduce_sum_of_d2 { } } - #[crate::multiversion(@"v4:avx512fp16", @"v4", @"v3", #[cfg(target_endian = "little")] @"a3.512", @"a2:fp16", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")] + #[crate::multiversion(@"v4:avx512fp16", @"v4", @"v3", #[cfg(target_endian = "little")] @"a3.512", @"a2:fp16", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1")] pub fn reduce_sum_of_d2(lhs: &[f16], rhs: &[f16]) -> f32 { assert!(lhs.len() == rhs.len()); let n = lhs.len(); let mut d2 = 0.0_f32; for i in 0..n { - #[cfg(not(feature = "experimental"))] + #[cfg(not(all(feature = "experimental_math", feature = "experimental_f16")))] { let d = lhs[i]._to_f32() - rhs[i]._to_f32(); d2 += d * d; } - #[cfg(feature = "experimental")] + #[cfg(all(feature = "experimental_math", feature = "experimental_f16"))] { let d = lhs[i]._to_f32().algebraic_sub(rhs[i]._to_f32()); d2 = d2.algebraic_add(d.algebraic_mul(d)); @@ -838,7 +838,7 @@ mod reduce_sum_of_xy_sparse { use crate::{F16, f16}; #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn reduce_sum_of_xy_sparse(lidx: &[u32], lval: &[f16], ridx: &[u32], rval: &[f16]) -> f32 { use std::cmp::Ordering; @@ -873,7 +873,7 @@ mod reduce_sum_of_d2_sparse { use crate::{F16, f16}; #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn reduce_sum_of_d2_sparse(lidx: &[u32], lval: &[f16], ridx: &[u32], rval: &[f16]) -> f32 { use std::cmp::Ordering; @@ -914,7 +914,7 @@ mod vector_add { use crate::f16; #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn vector_add(lhs: &[f16], rhs: &[f16]) -> Vec { assert_eq!(lhs.len(), rhs.len()); @@ -936,7 +936,7 @@ mod vector_add_inplace { use crate::f16; #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn vector_add_inplace(lhs: &mut [f16], rhs: &[f16]) { assert_eq!(lhs.len(), rhs.len()); @@ -951,7 +951,7 @@ mod vector_sub { use crate::f16; #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn vector_sub(lhs: &[f16], rhs: &[f16]) -> Vec { assert_eq!(lhs.len(), rhs.len()); @@ -973,7 +973,7 @@ mod vector_mul { use crate::f16; #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn vector_mul(lhs: &[f16], rhs: &[f16]) -> Vec { assert_eq!(lhs.len(), rhs.len()); @@ -995,7 +995,7 @@ mod vector_mul_scalar { use crate::{F16, f16}; #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn vector_mul_scalar(lhs: &[f16], rhs: f32) -> Vec { let rhs = f16::_from_f32(rhs); @@ -1017,7 +1017,7 @@ mod vector_mul_scalar_inplace { use crate::{F16, f16}; #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn vector_mul_scalar_inplace(lhs: &mut [f16], rhs: f32) { let rhs = f16::_from_f32(rhs); @@ -1032,7 +1032,7 @@ mod vector_abs_inplace { use crate::{F16, f16}; #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn vector_abs_inplace(this: &mut [f16]) { let n = this.len(); @@ -1046,7 +1046,7 @@ mod vector_from_f32 { use crate::{F16, f16}; #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn vector_from_f32(this: &[f32]) -> Vec { let n = this.len(); @@ -1067,7 +1067,7 @@ mod vector_to_f32 { use crate::{F16, f16}; #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn vector_to_f32(this: &[f16]) -> Vec { let n = this.len(); diff --git a/crates/simd/src/floating_f32.rs b/crates/simd/src/floating_f32.rs index f1ec2e12..4fe48642 100644 --- a/crates/simd/src/floating_f32.rs +++ b/crates/simd/src/floating_f32.rs @@ -148,7 +148,7 @@ impl Floating for f32 { mod reduce_or_of_is_zero_x { #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn reduce_or_of_is_zero_x(this: &[f32]) -> bool { for &x in this { @@ -413,16 +413,16 @@ mod reduce_sum_of_x { } } - #[crate::multiversion(@"v4", @"v3", @"v2", #[cfg(target_endian = "little")] @"a3.256", @"a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")] + #[crate::multiversion(@"v4", @"v3", @"v2", #[cfg(target_endian = "little")] @"a3.256", @"a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1")] pub fn reduce_sum_of_x(this: &[f32]) -> f32 { let n = this.len(); let mut sum = 0.0f32; for i in 0..n { - #[cfg(not(feature = "experimental"))] + #[cfg(not(feature = "experimental_math"))] { sum += this[i]; } - #[cfg(feature = "experimental")] + #[cfg(feature = "experimental_math")] { sum = sum.algebraic_add(this[i]); } @@ -695,16 +695,16 @@ mod reduce_sum_of_abs_x { } } - #[crate::multiversion(@"v4", @"v3", @"v2", #[cfg(target_endian = "little")] @"a3.256", @"a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")] + #[crate::multiversion(@"v4", @"v3", @"v2", #[cfg(target_endian = "little")] @"a3.256", @"a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1")] pub fn reduce_sum_of_abs_x(this: &[f32]) -> f32 { let n = this.len(); let mut sum = 0.0f32; for i in 0..n { - #[cfg(not(feature = "experimental"))] + #[cfg(not(feature = "experimental_math"))] { sum += this[i].abs(); } - #[cfg(feature = "experimental")] + #[cfg(feature = "experimental_math")] { sum = sum.algebraic_add(this[i].abs()); } @@ -967,16 +967,16 @@ mod reduce_sum_of_x2 { } } - #[crate::multiversion(@"v4", @"v3", @"v2:fma", #[cfg(target_endian = "little")] @"a3.256", @"a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")] + #[crate::multiversion(@"v4", @"v3", @"v2:fma", #[cfg(target_endian = "little")] @"a3.256", @"a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1")] pub fn reduce_sum_of_x2(this: &[f32]) -> f32 { let n = this.len(); let mut x2 = 0.0f32; for i in 0..n { - #[cfg(not(feature = "experimental"))] + #[cfg(not(feature = "experimental_math"))] { x2 += this[i] * this[i]; } - #[cfg(feature = "experimental")] + #[cfg(feature = "experimental_math")] { x2 = x2.algebraic_add(this[i].algebraic_mul(this[i])); } @@ -1245,7 +1245,7 @@ mod reduce_min_max_of_x { } } - #[crate::multiversion(@"v4", @"v3", @"v2", #[cfg(target_endian = "little")] @"a3.256", @"a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")] + #[crate::multiversion(@"v4", @"v3", @"v2", #[cfg(target_endian = "little")] @"a3.256", @"a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1")] pub fn reduce_min_max_of_x(this: &[f32]) -> (f32, f32) { let mut min = f32::INFINITY; let mut max = f32::NEG_INFINITY; @@ -1562,17 +1562,17 @@ mod reduce_sum_of_xy { } } - #[crate::multiversion(@"v4", @"v3", @"v2:fma", #[cfg(target_endian = "little")] @"a3.256", @"a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")] + #[crate::multiversion(@"v4", @"v3", @"v2:fma", #[cfg(target_endian = "little")] @"a3.256", @"a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1")] pub fn reduce_sum_of_xy(lhs: &[f32], rhs: &[f32]) -> f32 { assert!(lhs.len() == rhs.len()); let n = lhs.len(); let mut xy = 0.0f32; for i in 0..n { - #[cfg(not(feature = "experimental"))] + #[cfg(not(feature = "experimental_math"))] { xy += lhs[i] * rhs[i]; } - #[cfg(feature = "experimental")] + #[cfg(feature = "experimental_math")] { xy = xy.algebraic_add(lhs[i].algebraic_mul(rhs[i])); } @@ -1894,18 +1894,18 @@ mod reduce_sum_of_d2 { } } - #[crate::multiversion(@"v4", @"v3", @"v2:fma", #[cfg(target_endian = "little")] @"a3.256", @"a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")] + #[crate::multiversion(@"v4", @"v3", @"v2:fma", #[cfg(target_endian = "little")] @"a3.256", @"a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1")] pub fn reduce_sum_of_d2(lhs: &[f32], rhs: &[f32]) -> f32 { assert!(lhs.len() == rhs.len()); let n = lhs.len(); let mut d2 = 0.0f32; for i in 0..n { - #[cfg(not(feature = "experimental"))] + #[cfg(not(feature = "experimental_math"))] { let d = lhs[i] - rhs[i]; d2 += d * d; } - #[cfg(feature = "experimental")] + #[cfg(feature = "experimental_math")] { let d = lhs[i].algebraic_sub(rhs[i]); d2 = d2.algebraic_add(d.algebraic_mul(d)); @@ -2004,7 +2004,7 @@ mod reduce_sum_of_xy_sparse { } } - #[crate::multiversion(@"v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")] + #[crate::multiversion(@"v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1")] pub fn reduce_sum_of_xy_sparse(lidx: &[u32], lval: &[f32], ridx: &[u32], rval: &[f32]) -> f32 { use std::cmp::Ordering; assert_eq!(lidx.len(), lval.len()); @@ -2154,7 +2154,7 @@ mod reduce_sum_of_d2_sparse { } } - #[crate::multiversion(@"v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")] + #[crate::multiversion(@"v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1")] pub fn reduce_sum_of_d2_sparse(lidx: &[u32], lval: &[f32], ridx: &[u32], rval: &[f32]) -> f32 { use std::cmp::Ordering; assert_eq!(lidx.len(), lval.len()); @@ -2192,7 +2192,7 @@ mod reduce_sum_of_d2_sparse { mod vector_add { #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn vector_add(lhs: &[f32], rhs: &[f32]) -> Vec { assert_eq!(lhs.len(), rhs.len()); @@ -2212,7 +2212,7 @@ mod vector_add { mod vector_add_inplace { #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn vector_add_inplace(lhs: &mut [f32], rhs: &[f32]) { assert_eq!(lhs.len(), rhs.len()); @@ -2225,7 +2225,7 @@ mod vector_add_inplace { mod vector_sub { #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn vector_sub(lhs: &[f32], rhs: &[f32]) -> Vec { assert_eq!(lhs.len(), rhs.len()); @@ -2245,7 +2245,7 @@ mod vector_sub { mod vector_mul { #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn vector_mul(lhs: &[f32], rhs: &[f32]) -> Vec { assert_eq!(lhs.len(), rhs.len()); @@ -2265,7 +2265,7 @@ mod vector_mul { mod vector_mul_scalar { #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn vector_mul_scalar(lhs: &[f32], rhs: f32) -> Vec { let n = lhs.len(); @@ -2284,7 +2284,7 @@ mod vector_mul_scalar { mod vector_mul_scalar_inplace { #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn vector_mul_scalar_inplace(lhs: &mut [f32], rhs: f32) { let n = lhs.len(); @@ -2296,7 +2296,7 @@ mod vector_mul_scalar_inplace { mod vector_abs_inplace { #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn vector_abs_inplace(this: &mut [f32]) { let n = this.len(); diff --git a/crates/simd/src/lib.rs b/crates/simd/src/lib.rs index 5e2c91a4..7422f795 100644 --- a/crates/simd/src/lib.rs +++ b/crates/simd/src/lib.rs @@ -13,14 +13,17 @@ // Copyright (c) 2025 TensorChord Inc. #![allow(unsafe_code, internal_features)] -#![cfg_attr(feature = "experimental", feature(float_algebraic, f16))] -#![cfg_attr(feature = "experimental", feature(core_intrinsics))] +#![cfg_attr(feature = "experimental_f16", feature(f16))] +#![cfg_attr(feature = "experimental_math", feature(float_algebraic))] #![cfg_attr(target_arch = "s390x", feature(stdarch_s390x_feature_detection))] #![cfg_attr(target_arch = "s390x", feature(s390x_target_feature))] #![cfg_attr(target_arch = "s390x", feature(stdarch_s390x))] #![cfg_attr(target_arch = "powerpc64", feature(stdarch_powerpc_feature_detection))] #![cfg_attr(target_arch = "powerpc64", feature(powerpc_target_feature))] #![cfg_attr(target_arch = "powerpc64", feature(stdarch_powerpc))] +#![cfg_attr(target_arch = "powerpc64", feature(core_intrinsics))] +#![cfg_attr(target_arch = "riscv64", feature(stdarch_riscv_feature_detection))] +#![cfg_attr(target_arch = "riscv64", feature(riscv_target_feature))] mod aligned; mod emulate; @@ -34,10 +37,10 @@ pub mod quantize; pub mod rotate; pub mod u8; -#[cfg(not(feature = "experimental"))] +#[cfg(not(feature = "experimental_f16"))] pub use half::f16; -#[cfg(feature = "experimental")] +#[cfg(feature = "experimental_f16")] pub use f16; pub trait F16: Sized { @@ -48,7 +51,7 @@ pub trait F16: Sized { fn _to_f32(self) -> f32; } -#[cfg(not(feature = "experimental"))] +#[cfg(not(feature = "experimental_f16"))] impl F16 for f16 { const _ZERO: Self = f16::ZERO; @@ -61,7 +64,7 @@ impl F16 for f16 { } } -#[cfg(feature = "experimental")] +#[cfg(feature = "experimental_f16")] impl F16 for f16 { const _ZERO: Self = 0.0; @@ -121,6 +124,9 @@ mod internal { #[cfg(target_arch = "powerpc64")] simd_macros::define_is_cpu_detected!("powerpc64"); + #[cfg(target_arch = "riscv64")] + simd_macros::define_is_cpu_detected!("riscv64"); + #[cfg(target_arch = "x86_64")] #[allow(unused_imports)] pub use is_x86_64_cpu_detected; @@ -137,6 +143,10 @@ mod internal { #[allow(unused_imports)] pub use is_powerpc64_cpu_detected; + #[cfg(target_arch = "riscv64")] + #[allow(unused_imports)] + pub use is_riscv64_cpu_detected; + #[cfg(target_arch = "x86_64")] pub fn is_v4_detected() -> bool { std::arch::is_x86_feature_detected!("avx512bw") @@ -291,6 +301,11 @@ mod internal { std::arch::is_powerpc64_feature_detected!("altivec") && std::arch::is_powerpc64_feature_detected!("vsx") } + + #[cfg(target_arch = "riscv64")] + pub fn is_r1_detected() -> bool { + std::arch::is_riscv_feature_detected!("v") + } } pub use simd_macros::{multiversion, target_cpu}; @@ -326,3 +341,7 @@ pub use internal::is_s390x_cpu_detected as is_cpu_detected; #[cfg(target_arch = "powerpc64")] #[allow(unused_imports)] pub use internal::is_powerpc64_cpu_detected as is_cpu_detected; + +#[cfg(target_arch = "riscv64")] +#[allow(unused_imports)] +pub use internal::is_riscv64_cpu_detected as is_cpu_detected; diff --git a/crates/simd/src/quantize.rs b/crates/simd/src/quantize.rs index 24e99830..90c88fcd 100644 --- a/crates/simd/src/quantize.rs +++ b/crates/simd/src/quantize.rs @@ -283,7 +283,7 @@ mod mul_add_round { } } - #[crate::multiversion(@"v4", @"v3", @"v2:fma", @"a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")] + #[crate::multiversion(@"v4", @"v3", @"v2:fma", @"a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1")] pub fn mul_add_round(this: &[f32], k: f32, b: f32) -> Vec { let n = this.len(); let mut r = Vec::::with_capacity(n); diff --git a/crates/simd/src/rotate.rs b/crates/simd/src/rotate.rs index 795ea28c..15916229 100644 --- a/crates/simd/src/rotate.rs +++ b/crates/simd/src/rotate.rs @@ -14,7 +14,7 @@ pub mod givens { #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn givens(lhs: &mut [f32], rhs: &mut [f32]) { assert!(lhs.len() == rhs.len()); @@ -32,7 +32,7 @@ pub fn givens(lhs: &mut [f32], rhs: &mut [f32]) { pub mod flip { #[crate::multiversion( - "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7" + "v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1" )] pub fn flip(bits: &[u64; 1024], result: &mut [f32]) { use std::hint::select_unpredictable; diff --git a/crates/simd/src/u8.rs b/crates/simd/src/u8.rs index 628bedf3..f0cfa839 100644 --- a/crates/simd/src/u8.rs +++ b/crates/simd/src/u8.rs @@ -311,7 +311,7 @@ mod reduce_sum_of_x_as_u32_y_as_u32 { } } - #[crate::multiversion(@"v4", @"v3", @"v2", @"a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")] + #[crate::multiversion(@"v4", @"v3", @"v2", @"a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1")] pub fn reduce_sum_of_x_as_u32_y_as_u32(s: &[u8], t: &[u8]) -> u32 { assert_eq!(s.len(), t.len()); let n = s.len(); @@ -517,7 +517,7 @@ mod reduce_sum_of_x_as_u16 { } } - #[crate::multiversion(@"v4", @"v3", @"v2", @"a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")] + #[crate::multiversion(@"v4", @"v3", @"v2", @"a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1")] pub fn reduce_sum_of_x_as_u16(this: &[u8]) -> u16 { let n = this.len(); let mut sum = 0; @@ -722,7 +722,7 @@ mod reduce_sum_of_x_as_u32 { } } - #[crate::multiversion(@"v4", @"v3", @"v2", @"a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")] + #[crate::multiversion(@"v4", @"v3", @"v2", @"a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7", "r1")] pub fn reduce_sum_of_x_as_u32(this: &[u8]) -> u32 { let n = this.len(); let mut sum = 0; diff --git a/crates/simd_macros/src/target.rs b/crates/simd_macros/src/target.rs index ba94cf60..5903f72a 100644 --- a/crates/simd_macros/src/target.rs +++ b/crates/simd_macros/src/target.rs @@ -153,4 +153,9 @@ pub const TARGET_CPUS: &[TargetCpu] = &[ target_arch: "powerpc64", target_features: &["altivec", "vsx"], }, + TargetCpu { + target_cpu: "r1", + target_arch: "riscv64", + target_features: &["v"], + }, ]; diff --git a/src/bin/pgrx_embed.rs b/src/bin/pgrx_embed.rs index 505d1b69..36ae772e 100644 --- a/src/bin/pgrx_embed.rs +++ b/src/bin/pgrx_embed.rs @@ -19,8 +19,17 @@ #[macro_export] macro_rules! schema_generation { - ($($symbol:ident)*) => { + ($($symbol:ident)*; $($import:ident)*) => { pub fn main() -> Result<(), Box> { + $( + const _: () = { + #[unsafe(no_mangle)] + unsafe extern "C" fn $import() { + panic!("{} is called unexpectedly.", stringify!($import)); + } + }; + )* + extern crate vchord as _; use ::pgrx::pgrx_sql_entity_graph::ControlFile;