diff --git a/.github/composite-actions/download-libraries/action.yml b/.github/composite-actions/download-libraries/action.yml deleted file mode 100644 index 0cc80b53a1..0000000000 --- a/.github/composite-actions/download-libraries/action.yml +++ /dev/null @@ -1,102 +0,0 @@ -name: 'Download libraries' -description: 'Download files necessary for compilation and testing' -inputs: - download-pybind: - type: boolean - description: 'Download pybind11' - default: false - - download-googletest: - type: boolean - description: 'Download googletest' - default: true - - install-boost: - type: boolean - description: 'Install boost' - default: true - -runs: - using: 'composite' - steps: - - uses: actions/checkout@v3 - - name: Install build tools - run: | - sudo apt-get update -y - sudo apt-get install gcc-10 g++-10 cmake build-essential -y - shell: bash - - - name: Make lib directory - run: | - mkdir -p lib - shell: bash - - - name: Download googletest - uses: ./.github/composite-actions/download-library - with: - directory: googletest - download-command: git clone https://github.com/google/googletest/ --branch release-1.12.1 --depth 1 - if: inputs.download-googletest != 'false' - - - name: Download easyloggingpp - uses: ./.github/composite-actions/download-library - with: - directory: easyloggingpp - download-command: git clone https://github.com/amrayn/easyloggingpp/ --branch v9.97.0 --depth 1 - - - name: Download better-enums - uses: ./.github/composite-actions/download-library - with: - directory: better-enums - download-command: git clone https://github.com/aantron/better-enums.git --branch 0.11.3 --depth 1 - - - name: Download pybind11 - uses: ./.github/composite-actions/download-library - with: - directory: pybind11 - download-command: git clone https://github.com/pybind/pybind11.git --branch v2.13.4 --depth 1 - if: inputs.download-pybind != 'false' - - name: Download emhash - uses: ./.github/composite-actions/download-library - with: - directory: emhash - download-command: git clone https://github.com/ktprime/emhash.git --depth 1 - - name: Download atomicbitvector - uses: ./.github/composite-actions/download-library - with: - directory: atomicbitvector - download-command: git clone https://github.com/ekg/atomicbitvector.git --depth 1 - - name: Download boost - uses: ./.github/composite-actions/download-library - with: - directory: boost - download-command: wget -O boost_1_81_0.tar.gz https://sourceforge.net/projects/boost/files/boost/1.81.0/boost_1_81_0.tar.gz/download && tar xzvf boost_1_81_0.tar.gz && mv boost_1_81_0 boost - - - name: Install Boost - run: | - cd lib/boost - ./bootstrap.sh --prefix=/usr - sudo ./b2 install --prefix=/usr - shell: bash - if: inputs.install-boost != 'false' - - name: Download frozen - uses: ./.github/composite-actions/download-library - with: - directory: frozen - download-command: git clone https://github.com/serge-sans-paille/frozen.git --depth 1 - -# Uncomment this if we set up our own git lfs server -# - name: Install git-lfs -# run: | -# curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash -# git lfs install -# shell: bash -# - name: Generate lfs file list -# run: git lfs ls-files -l | cut -d' ' -f1 | sort > .lfs-assets-id -# shell: bash -# - name: Restore lfs cache -# uses: actions/cache@v3 -# id: lfs-cache -# with: -# path: .git/lfs -# key: ${{ runner.os }}-lfs-${{ hashFiles('.lfs-assets-id') }}-v1 diff --git a/.github/composite-actions/download-library/action.yml b/.github/composite-actions/download-library/action.yml index 989b9d9abc..e456714aed 100644 --- a/.github/composite-actions/download-library/action.yml +++ b/.github/composite-actions/download-library/action.yml @@ -6,22 +6,56 @@ inputs: required: true download-command: description: 'Command to download the library' + type: choice + options: + git-clone + wget + default: git-clone + url: + description: 'URL of the library to download' required: true + git-branch: + description: 'Branch of git repo to download' runs: using: 'composite' steps: + - name: Set library hash as commit hash + id: get-commit-hash + run: | + git ls-remote ${{inputs.url}} ${{inputs.git-branch}} | awk '{print $1}' + shell: bash + env: + LIBRARY_HASH: ${{ steps.get-commit-hash.outputs.stdout }} + if: inputs.download-command == 'git-clone' + + - name: Set library hash as url + id: get-url-hash + run: | + echo "LIBRARY_HASH=${{ inputs.url }}" >> $GITHUB_ENV + shell: bash + - uses: actions/cache/restore@v4 id: cache-library with: path: ${{github.workspace}}/lib/${{inputs.directory}} - key: ${{runner.os}}-${{inputs.directory}} + key: ${{runner.os}}-${{inputs.directory}}-${{env.LIBRARY_HASH}} + - run: | cd lib - ${{inputs.download-command}} + git clone ${{inputs.url}} --branch ${{inputs.git-branch}} --depth 1 shell: bash - if: steps.cache-library.outputs.cache-hit != 'true' + if: steps.cache-library.outputs.cache-hit != 'true' && inputs.download-command == 'git-clone' + + - run: | + cd lib + wget ${{inputs.url}} -O ${{inputs.directory}}.tar.gz + tar xzvf ${{inputs.directory}}.tar.gz + rm ${{inputs.directory}}.tar.gz + shell: bash + if: steps.cache-library.outputs.cache-hit != 'true' && inputs.download-command == 'wget' + - uses: actions/cache/save@v4 with: path: ${{github.workspace}}/lib/${{inputs.directory}} - key: ${{runner.os}}-${{inputs.directory}} + key: ${{runner.os}}-${{inputs.directory}}-${{env.LIBRARY_HASH}} if: steps.cache-library.outputs.cache-hit != 'true' diff --git a/.github/composite-actions/install-dependencies/action.yml b/.github/composite-actions/install-dependencies/action.yml new file mode 100644 index 0000000000..d53d99c437 --- /dev/null +++ b/.github/composite-actions/install-dependencies/action.yml @@ -0,0 +1,255 @@ +name: 'Install dependencies' +description: 'Download and install build system and libraries' +inputs: + download-pybind: + type: boolean + description: 'Download pybind11' + default: false + + download-googletest: + type: boolean + description: 'Download googletest' + default: true + + os: + type: string + required: true + + toolset: + type: choice + options: + gcc + llvm-clang + apple-clang + default: gcc + + install-boost: + type: boolean + default: true + +runs: + using: 'composite' + steps: + + # --- Set up environment --- + - name: Get short OS name + run: | + function get_short_os_name() { + if [[ $1 == *"ubuntu"* ]]; then + echo "ubuntu" + elif [[ $1 == *"macos"* ]]; then + echo "macos" + elif [[ $1 == *"windows"* ]]; then + echo "windows" + else + echo "ERROR: unknown OS" + exit 1 + fi + } + + echo "OS=$(get_short_os_name ${{ inputs.os }})" >> $GITHUB_ENV + shell: bash + + - uses: msys2/setup-msys2@v2 + with: + msystem: MINGW64 + install: >- + git + if: env.OS == 'windows' + + # --- Install tools --- + + # Install build tools + - name: Install build tools using apt + run: | + sudo apt-get update -y + sudo apt-get install cmake ninja-build -y + shell: bash + if: env.OS == 'ubuntu' + - name: Install build tools using brew + run: | + brew install cmake --formula ninja + shell: bash + if: env.OS == 'macos' + - name: Install build tools using pacman + run: | + pacman -S --noconfirm mingw-w64-x86_64-cmake mingw-w64-x86_64-ninja + shell: msys2 {0} + if: env.OS == 'windows' + + # Install GCC toolset + - name: Install GCC toolset (on Ubuntu) + run: | + sudo apt-get install gcc-10 g++-10 -y + shell: bash + if: inputs.toolset == 'gcc' && env.OS == 'ubuntu' + - name: Install GCC toolset (on macOS) + run: | + brew install gcc@14 + shell: bash + if: inputs.toolset == 'gcc' && env.OS == 'macos' + - name: Install GCC toolset (on Windows) + run: | + pacman -S --noconfirm mingw-w64-x86_64-gcc + shell: msys2 {0} + if: inputs.toolset == 'gcc' && env.OS == 'windows' + + # Install Clang toolset + - name: Install Clang toolset (on Ubuntu) + # "all" option is needed to install libc++ and libc++abi + # apt is hardcoded in llvm.sh, so we can't use it everywhere + run: | + wget https://apt.llvm.org/llvm.sh + chmod +x llvm.sh + sudo ./llvm.sh 17 all + shell: bash + if: inputs.toolset == 'llvm-clang' && env.OS == 'ubuntu' + - name: Install LLVM Clang toolset (on macOS) + run: | + brew install llvm@17 + shell: bash + if: inputs.toolset == 'llvm-clang' && env.OS == 'macos' + + # Apple Clang is installed by default on macOS runner + + # --- Install libraries --- + + # Set up git to not convert line endings on Windows + - run: | + git config --global core.autocrlf input + shell: msys2 {0} + if: env.OS == 'windows' + + - uses: actions/checkout@v4 + + - name: Make lib directory + run: | + mkdir -p lib + shell: bash + + - name: Download atomicbitvector + uses: ./.github/composite-actions/download-library + with: + directory: atomicbitvector + download-command: git-clone + url: https://github.com/ekg/atomicbitvector.git + git-branch: master + + - name: Download better-enums + uses: ./.github/composite-actions/download-library + with: + directory: better-enums + download-command: git-clone + url: https://github.com/aantron/better-enums.git + git-branch: "0.11.3" + + - name: Download boost + uses: ./.github/composite-actions/download-library + with: + directory: boost + download-command: wget + url: https://sourceforge.net/projects/boost/files/boost/1.85.0/boost_1_85_0.tar.gz/download + if: inputs.install-boost == 'true' && (env.OS == 'ubuntu' || env.OS == 'macos') + + - name: Download easyloggingpp + uses: ./.github/composite-actions/download-library + with: + directory: easyloggingpp + download-command: git-clone + url: https://github.com/amrayn/easyloggingpp/ + git-branch: v9.97.0 + + - name: Download emhash + uses: ./.github/composite-actions/download-library + with: + directory: emhash + download-command: git-clone + url: https://github.com/Vdaleke/emhash.git + git-branch: master + + - name: Download googletest + uses: ./.github/composite-actions/download-library + with: + directory: googletest + download-command: git-clone + url: https://github.com/google/googletest/ + git-branch: v1.14.0 + if: inputs.download-googletest == 'true' + + - name: Download pybind11 + uses: ./.github/composite-actions/download-library + with: + directory: pybind11 + download-command: git-clone + url: https://github.com/pybind/pybind11.git + git-branch: v2.13.4 + if: inputs.download-pybind == 'true' + + # Install Boost built with GCC + - name: Install Boost built with GCC (on Ubuntu) + run: | + cd lib/boost + ./bootstrap.sh --prefix=/usr --with-libraries=container,thread,graph + sudo ./b2 install --prefix=/usr + shell: bash + if: inputs.install-boost == 'true' && inputs.toolset == 'gcc' && env.OS == 'ubuntu' + - name: Install Boost built with GCC (on macOS) + run: | + cd lib/boost + ./bootstrap.sh --with-libraries=container,thread,graph + echo "using darwin : : g++-14 ;" > user-config.jam + sudo ./b2 install -a --user-config=user-config.jam --prefix=/usr/local + shell: bash + if: inputs.install-boost == 'true' && inputs.toolset == 'gcc' && env.OS == 'macos' + - name: Install Boost built with GCC (on Windows) + run: | + pacman -S --noconfirm mingw-w64-x86_64-boost + if: inputs.install-boost == 'true' && inputs.toolset == 'gcc' && env.OS == 'windows' + shell: msys2 {0} + + # Install Boost built with Clang + - name: Install Boost built with Clang (on Ubuntu) + run: | + cd lib/boost + ./bootstrap.sh --with-libraries=container,thread,graph + sudo ./b2 install -a --prefix=/usr toolset=clang cxxflags="-stdlib=libc++" \ + linkflags="-stdlib=libc++" + shell: bash + if: inputs.install-boost == 'true' && inputs.toolset == 'llvm-clang' && env.OS == 'ubuntu' + - name: Install Boost built with LLVM Clang (on macOS) + run: | + cd lib/boost + ./bootstrap.sh --with-libraries=container,thread,graph + echo "using darwin : : $(brew --prefix llvm@17)/bin/clang++ ;" > user-config.jam + sudo ./b2 install -a --user-config=user-config.jam --prefix=/usr/local \ + cxxflags="-std=c++11 -I$(brew --prefix llvm@17)/include" \ + linkflags="-L$(brew --prefix llvm@17)/lib/c++" + shell: bash + if: inputs.install-boost == 'true' && inputs.toolset == 'llvm-clang' && env.OS == 'macos' + + # Install Boost built with Apple Clang + - name: Install Boost built with Apple Clang + run: | + brew install boost + shell: bash + if: inputs.install-boost == 'true' && inputs.toolset == 'apple-clang' + + - name: Download frozen + uses: ./.github/composite-actions/download-library + with: + directory: frozen + download-command: git clone https://github.com/serge-sans-paille/frozen.git --depth 1 + +# Uncomment this if we set up our own git lfs server +# - name: Install git-lfs +# run: | +# curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash +# git lfs install +# - name: Generate lfs file list +# run: git lfs ls-files -l | cut -d' ' -f1 | sort > .lfs-assets-id +# - name: Restore lfs cache +# uses: actions/cache@v3 +# id: lfs-cache +# with: +# path: .git/lfs +# key: ${{ runner.os }}-lfs-${{ hashFiles('.lfs-assets-id') }}-v1 diff --git a/.github/workflows/bindings-tests.yml b/.github/workflows/bindings-tests.yml index 7f60254a29..dcbc70827f 100644 --- a/.github/workflows/bindings-tests.yml +++ b/.github/workflows/bindings-tests.yml @@ -28,25 +28,57 @@ on: - build.* - pull_datasets.sh workflow_dispatch: +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + # Cancel in-progress runs when a new workflow with the same group name is triggered + cancel-in-progress: true jobs: test-python-bindings: - runs-on: ubuntu-latest + continue-on-error: true + strategy: + matrix: + include: + - os: ubuntu-latest + compiler: gcc + env: CXX=g++-10 + - os: ubuntu-latest + compiler: llvm-clang + env: CXX=clang++-17 CXXFLAGS="-stdlib=libc++" LDFLAGS="-lc++abi" + # Uncomment this to enable macOS gcc and llvm-clang tests: + # - os: macos-latest + # compiler: gcc + # env: CXX=g++-14 BOOST_ROOT=/usr/local + # runtime-env: DYLD_LIBRARY_PATH=/usr/local/lib:${DYLD_LIBRARY_PATH} + # - os: macos-latest + # compiler: llvm-clang + # env: CXX=$(brew --prefix llvm@17)/bin/clang++ BOOST_ROOT=/usr/local + # runtime-env: DYLD_LIBRARY_PATH=/usr/local/lib:${DYLD_LIBRARY_PATH} + - os: macos-latest + compiler: apple-clang + env: CXX=clang++ BOOST_ROOT=$(brew --prefix boost) + runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v3 - - name: Download libraries - uses: ./.github/composite-actions/download-libraries + - name: Install dependencies + uses: ./.github/composite-actions/install-dependencies with: + os: ${{ matrix.os }} + toolset: ${{ matrix.compiler }} download-pybind: true download-googletest: false - name: Build pip package shell: bash run: | + export ${{ matrix.env }} + python3 -m venv venv source venv/bin/activate python3 -m pip install . - name: Test pip package shell: bash run: | + export ${{ matrix.runtime-env }} + source venv/bin/activate cp test_input_data/WDC_satellites.csv src/python_bindings/ @@ -60,6 +92,8 @@ jobs: working-directory: ${{github.workspace}} shell: bash run: | + export ${{ matrix.runtime-env }} + source venv/bin/activate cp test_input_data/TestDataStats.csv src/python_bindings diff --git a/.github/workflows/check-codestyle.yml b/.github/workflows/check-codestyle.yml index 67d79e43e9..382ad3f7fa 100644 --- a/.github/workflows/check-codestyle.yml +++ b/.github/workflows/check-codestyle.yml @@ -13,6 +13,10 @@ on: - build.* - pull_datasets.sh - pyproject.toml +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + # Cancel in-progress runs when a new workflow with the same group name is triggered + cancel-in-progress: true jobs: clang-format-check: runs-on: ubuntu-latest @@ -49,14 +53,15 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - name: Download libraries - uses: ./.github/composite-actions/download-libraries + - name: Install dependencies + uses: ./.github/composite-actions/install-dependencies with: + os: ubuntu + toolset: gcc download-pybind: true - name: Generate compile_commands.json run: | - cmake -DCMAKE_C_COMPILER=gcc-10 \ - -DCMAKE_CXX_COMPILER=g++-10 \ + cmake -DCMAKE_CXX_COMPILER=g++-10 \ -DCMAKE_BUILD_TYPE=Debug \ -Dgtest_disable_pthreads=OFF \ -DASAN=OFF \ diff --git a/.github/workflows/core-tests.yml b/.github/workflows/core-tests.yml index 058ca3d2fe..d29da53b74 100644 --- a/.github/workflows/core-tests.yml +++ b/.github/workflows/core-tests.yml @@ -26,30 +26,64 @@ on: #- pyproject.toml #- examples/** workflow_dispatch: +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + # Cancel in-progress runs when a new workflow with the same group name is triggered + cancel-in-progress: true jobs: run_tests: - runs-on: ubuntu-latest + name: Run tests on ${{ matrix.system.os }} with ${{ matrix.system.toolset }}, (${{ matrix.cfg.BUILD_TYPE }}, ${{ matrix.cfg.SANITIZER }}) + continue-on-error: true strategy: matrix: + system: + - { os: ubuntu-latest, + toolset: gcc, + env: CXX=g++-10 } + - { os: ubuntu-latest, + toolset: llvm-clang, + env: CXX=clang++-17 CXXFLAGS="-stdlib=libc++" LDFLAGS="-lc++abi" } + - { os: macos-latest, + toolset: apple-clang, + env: CXX=clang++ BOOST_ROOT=$(brew --prefix boost) } + - { os: windows-latest, + toolset: gcc, + env: CXX=g++ } cfg: - { BUILD_TYPE: Release } - { BUILD_TYPE: Debug } - { BUILD_TYPE: Debug, SANITIZER : ADDRESS } - { BUILD_TYPE: Debug, SANITIZER : UB } + + runs-on: ${{ matrix.system.os }} + + defaults: + run: + shell: ${{ matrix.system.os == 'windows-latest' && 'msys2 {0}' || 'bash' }} steps: - - uses: actions/checkout@v3 - - name: Download libraries - uses: ./.github/composite-actions/download-libraries + - uses: actions/checkout@v4 + + - name: Install dependencies + uses: ./.github/composite-actions/install-dependencies + with: + os: ${{ matrix.system.os }} + toolset: ${{ matrix.system.toolset }} - name: Download datasets uses: ./.github/composite-actions/download-datasets - name: Build run: | - if [[ "${{matrix.cfg.BUILD_TYPE}}" == "Debug" ]]; then + export ${{ matrix.system.env }} + + if [[ "${{ matrix.cfg.BUILD_TYPE }}" == "Debug" ]]; then ./build.sh --debug --sanitizer=${{ matrix.cfg.SANITIZER }} else ./build.sh fi - name: Test - working-directory: ${{github.workspace}}/build/target - shell: bash - run: ./Desbordante_test --gtest_filter='*:-*HeavyDatasets*' + working-directory: ${{ github.workspace }}/build/target + run: | + if [[ ${{ matrix.system.os }} == 'macos-latest' ]]; then + export DYLD_LIBRARY_PATH=/usr/local/lib:${DYLD_LIBRARY_PATH} + fi + + ./Desbordante_test --gtest_filter='*:-*HeavyDatasets*' diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index 451a6b4293..430a97086e 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -31,61 +31,122 @@ on: release: types: - published +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + # Cancel in-progress runs when a new workflow with the same group name is triggered + cancel-in-progress: true jobs: - generate-wheels-matrix: - # https://iscinumpy.dev/post/cibuildwheel-2-10-0/ - name: Generate wheels matrix + generate-linux-wheels-matrix: + name: Generate Linux wheel matrix runs-on: ubuntu-latest outputs: include: ${{ steps.set-matrix.outputs.include }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install cibuildwheel - run: pipx install cibuildwheel==2.16.2 + run: pipx install cibuildwheel==2.22.0 - id: set-matrix run: | MATRIX=$( { cibuildwheel --print-build-identifiers --platform linux \ - | jq -nRc '{"only": inputs, "os": "ubuntu-latest"}' - } | jq -sc - ) + | jq -nRc '{"only": inputs, "os": "ubuntu-latest"}' + } | jq -sc) echo "include=$MATRIX" >> $GITHUB_OUTPUT + + - name: Check matrix + run: echo "${{ steps.set-matrix.outputs.include }}" env: CIBW_ARCHS_LINUX: x86_64 - # Builds wheels for PyPy & CPython on manylinux CIBW_BUILD: "*manylinux*" CIBW_TEST_REQUIRES: pytest CIBW_BUILD_VERBOSITY: 1 CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014 + generate-macos-wheels-matrix: + name: Generate macOS wheel matrix + runs-on: macos-latest + outputs: + include: ${{ steps.set-matrix.outputs.include }} + steps: + - uses: actions/checkout@v4 + - name: Install cibuildwheel + run: pipx install cibuildwheel==2.22.0 + - id: set-matrix + run: | + MATRIX=$( + cibuildwheel --print-build-identifiers --platform macos \ + | jq -sR 'split("\n") | map(select(length > 0)) | map({"only": ., "os": (if contains("x86_64") then "macos-13" else "macos-latest" end)})' | jq -c + ) + echo "include=$MATRIX" >> $GITHUB_OUTPUT + + - name: Check matrix + run: echo "${{ steps.set-matrix.outputs.include }}" + env: + CIBW_ARCHS_MACOS: arm64 x86_64 + CIBW_BUILD: "*macos*" + CIBW_SKIP: "cp37-*" + CIBW_TEST_REQUIRES: pytest + CIBW_BUILD_VERBOSITY: 1 + + merge-matrices: + name: Merge wheel matrices + needs: [generate-linux-wheels-matrix, generate-macos-wheels-matrix] + runs-on: ubuntu-latest + outputs: + include: ${{ steps.merge.outputs.include }} + steps: + - name: Merge JSON matrices + id: merge + run: | + LINUX_MATRIX='${{ needs.generate-linux-wheels-matrix.outputs.include }}' + MACOS_MATRIX='${{ needs.generate-macos-wheels-matrix.outputs.include }}' + + MERGED_MATRIX=$(jq -c -n --argjson var1 "$LINUX_MATRIX" --argjson var2 "$MACOS_MATRIX" '$var1 + $var2') + echo "include=$MERGED_MATRIX" >> $GITHUB_OUTPUT + - name: Check merged matrix + run: echo "${{ steps.merge.outputs.include }}" + build-wheels: name: Build ${{ matrix.only }} - needs: generate-wheels-matrix + continue-on-error: true + needs: [merge-matrices] strategy: fail-fast: false matrix: - include: ${{ fromJson(needs.generate-wheels-matrix.outputs.include) }} + include: ${{ fromJson(needs.merge-matrices.outputs.include) }} runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - name: Download libraries - uses: ./.github/composite-actions/download-libraries + - name: Install dependencies + uses: ./.github/composite-actions/install-dependencies with: + os: ${{ matrix.os }} + toolset: ${{ contains(matrix.os, 'macos') && 'apple-clang' || 'gcc' }} download-pybind: true download-googletest: false install-boost: false - name: Build wheels - uses: pypa/cibuildwheel@v2.16.2 + uses: pypa/cibuildwheel@v2.22.0 with: only: ${{ matrix.only }} env: - CIBW_BEFORE_ALL: > + MACOSX_DEPLOYMENT_TARGET: 11.0 + CIBW_BEFORE_ALL_LINUX: > + cd lib/boost && + ./bootstrap.sh --with-libraries=container,thread,graph && + ./b2 install -j4 --prefix=/usr && + export BOOST_ROOT=/usr && + export CXX=g++-10 + CIBW_BEFORE_ALL_MACOS: > cd lib/boost && - ./bootstrap.sh --prefix=/usr && - ./b2 install -j4 --prefix=/usr + ./bootstrap.sh --with-libraries=container,thread,graph && + sudo ./b2 install -j3 ---prefix=/usr/local cxxflags="-mmacosx-version-min=${MACOSX_DEPLOYMENT_TARGET} -std=c++20" linkflags="-mmacosx-version-min=${MACOSX_DEPLOYMENT_TARGET}" && + export BOOST_ROOT=/usr/local && + export CXX=clang++ && + export DYLD_LIBRARY_PATH=/usr/local/lib:${DYLD_LIBRARY_PATH} CIBW_TEST_COMMAND: > cp {project}/test_input_data/WDC_satellites.csv {project}/src/python_bindings && cp {project}/test_input_data/transactional_data/rules-kaggle-rows.csv {project}/src/python_bindings && @@ -93,6 +154,8 @@ jobs: cp {project}/test_input_data/TestWide.csv {project}/src/python_bindings && cd {project}/src/python_bindings && python3 {project}/src/python_bindings/test_bindings.py + CIBW_REPAIR_WHEEL_COMMAND_MACOS: > + DYLD_LIBRARY_PATH=/usr/local/lib:${DYLD_LIBRARY_PATH} delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel} - name: Upload artifact uses: actions/upload-artifact@v4 @@ -106,7 +169,7 @@ jobs: runs-on: ubuntu-latest needs: build-wheels steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Download all artifacts from previous step uses: actions/download-artifact@v4 with: @@ -140,7 +203,7 @@ jobs: runs-on: ubuntu-latest if: github.event_name == 'release' && github.event.action == 'published' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Download wheels uses: actions/download-artifact@v4 with: diff --git a/CMakeLists.txt b/CMakeLists.txt index 922769cbea..2c3b325613 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ if (POLICY CMP0144) cmake_policy(SET CMP0144 NEW) endif() -project(Desbordante) +project(Desbordante CXX) option(COPY_PYTHON_EXAMPLES "Copy Python examples" OFF) option(COMPILE_TESTS "Build tests" ON) @@ -74,6 +74,19 @@ else() if (ASAN) # Set DEBUG build options specific for build with ASAN set(ASAN_OPTS "-fsanitize=address") + + execute_process(COMMAND grep -q "Ubuntu" /etc/ose-release RESULT_VARIABLE IS_UBUNTU) + if (IS_UBUNTU AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + # alloc-dealloc-mismatch generates false positives on boost exceptions + # This applies only to Ubuntu package: + # https://github.com/llvm/llvm-project/issues/59432?ysclid=m4y0iqca2c577414782 + # Disable this check on files listed in address_sanitizer_ignore_list.txt if compiler + # is Clang and host distro is Ubuntu: + message(WARNING "Running on Ubuntu. ASAN is broken in Ubuntu package, therefore alloc-dealloc-mismatch check will be supressed. + Consider using another distro for full ASAN coverage") + string(JOIN ";" ASAN_OPTS "-fsanitize-ignorelist=${CMAKE_SOURCE_DIR}/address_sanitizer_ignore_list.txt") + endif() + string(JOIN ";" DEBUG_BUILD_OPTS "${DEBUG_BUILD_OPTS}" "-O1" "-Wno-error" # Use of -Werror is discouraged with sanitizers @@ -89,6 +102,12 @@ else() "-fno-sanitize=signed-integer-overflow" # Remove this when CustomRandom gets fixed "-fno-sanitize=shift" # Remove this when CustomRandom gets fixed "-fno-sanitize-recover=all") # For tests to fail if UBSan finds an error + if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_HOST_APPLE) + # Limit some UB sanitizer checks to "src" directory on macOS when building with Clang, + # because libraries (STL, googletest, boost, etc.) are somehow broken + string(JOIN ";" UBSAN_OPTS + "-fsanitize-ignorelist=${CMAKE_SOURCE_DIR}/ub_sanitizer_ignore_list.txt") + endif() string(JOIN ";" DEBUG_BUILD_OPTS "${DEBUG_BUILD_OPTS}" "-O1" "${UBSAN_OPTS}") @@ -103,6 +122,20 @@ else() add_compile_options(-ggdb3) endif() + # Workaround clang-18 bug: + # https://github.com/llvm/llvm-project/issues/76515?ysclid=m406q4it5k674680045 + if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + string(FIND "${CMAKE_CXX_COMPILER_VERSION}" "18" IDX) + if (IDX EQUAL 0) # clang major version is 18 + message(WARNING "C++ compiler is Clang++-18. Supressing deprecated declaration warnings. Consider using another version of Clang") + string(JOIN ";" DEBUG_BUILD_OPTS "${DEBUG_BUILD_OPTS}" "-Wno-deprecated-declarations") + endif() + endif() + + if(WIN32 AND (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")) + link_libraries(ws2_32) + endif() + add_compile_options("$<$:${DEBUG_BUILD_OPTS}>") add_link_options("$<$:${DEBUG_LINK_OPTS}>") @@ -137,7 +170,7 @@ include_directories(SYSTEM "lib/easyloggingpp/src" "lib/better-enums/" "lib/emha # adding submodules if (COMPILE_TESTS) - add_subdirectory("lib/googletest") + add_subdirectory("lib/googletest" SYSTEM) endif() set( CMAKE_BUILD_TYPE_COPY "${CMAKE_BUILD_TYPE}" ) @@ -147,9 +180,9 @@ if (PYTHON STREQUAL INSTALL) # Relies on undocumented behaviour. EXCLUDE_FROM_ALL is used to prevent install commands # inside the easyloggingpp CMakeLists from executing and subsequently failing with a permission error, # making it impossible to install the Python package as a normal user. - add_subdirectory("lib/easyloggingpp" EXCLUDE_FROM_ALL) + add_subdirectory("lib/easyloggingpp" EXCLUDE_FROM_ALL SYSTEM) else () - add_subdirectory("lib/easyloggingpp") + add_subdirectory("lib/easyloggingpp" SYSTEM) endif () set( CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE_COPY} ) @@ -170,7 +203,7 @@ endif() add_subdirectory("cfg") if (PYTHON) - add_subdirectory("lib/pybind11") + add_subdirectory("lib/pybind11" SYSTEM) add_subdirectory("src/python_bindings") endif() diff --git a/README.md b/README.md index 520c630e5e..c9b3118bfd 100644 --- a/README.md +++ b/README.md @@ -236,34 +236,36 @@ To install Desbordante type: $ pip install desbordante ``` -However, as Desbordante core uses C++, additional requirements on the machine are imposed. Therefore this installation option may not work for everyone. Currently, only manylinux2014 (Ubuntu 20.04+, or any other linux distribution with gcc 10+) is supported. If the above does not work for you consider building from sources. +However, as Desbordante core uses C++, additional requirements on the machine are imposed. Therefore this installation option may not work for everyone. Currently, only manylinux2014 (Ubuntu 20.04+, or any other linux distribution with gcc 10+) and macOS 11.0+ (arm64, x86_64) is supported. If the above does not work for you consider building from sources. ## Build instructions ### Ubuntu and macOS -The following instructions were tested on Ubuntu 20.04+ LTS and macOS Sonoma 14.7 (Apple Silicon). +The following instructions were tested on Ubuntu 20.04+ LTS and macOS Sonoma 14.7+ (Apple Silicon). ### Dependencies Prior to cloning the repository and attempting to build the project, ensure that you have the following software: -- GNU GCC, version 10+ +- GNU GCC, version 10+, LLVM Clang, version 16+, or Apple Clang, version 15+ - CMake, version 3.15+ -- Boost library built with GCC, version 1.81.0+ +- Boost library built with compiler you're going to use (GCC or Clang), version 1.85.0+ To use test datasets you will need: - Git Large File Storage, version 3.0.2+ -#### Ubuntu dependencies installation +Instructions below are given for GCC (on Linux) and Apple Clang (on macOS). +Instructions for other supported compilers can be found in [Desbordante wiki](https://github.com/Desbordante/desbordante-core/wiki/Building). + +#### Ubuntu dependencies installation (GCC) Run the following commands: ```sh -sudo apt install gcc g++ cmake libboost-all-dev git-lfs python3 -export CC=gcc +sudo apt install g++ cmake libboost-all-dev git-lfs python3 export CXX=g++ ``` -The last 2 lines set gcc as CMake compiler in your terminal session. +The last line set g++ as CMake compiler in your terminal session. You can also add them to the end of `~/.profile` to set this by default in all sessions. -#### MacOS dependencies installation +#### macOS dependencies installation (Apple Clang) Install Xcode Command Line Tools if you don't have them. Run: ```sh @@ -271,43 +273,23 @@ xcode-select --install ``` Follow the prompts to continue. -To install GCC, CMake and python on macOS we recommend to use [Homebrew](https://brew.sh/) package manager. With Homebrew +To install CMake and Boost on macOS we recommend to use [Homebrew](https://brew.sh/) package manager. With Homebrew installed, run the following commands: ```sh -brew install gcc@14 cmake python3 +brew install cmake boost ``` After installation, check `cmake --version`. If command is not found, then you need to add to environment path to homebrew installed packages. To do this open `~/.zprofile` (for Zsh) or `~/.bash_profile` (for Bash) and add to the end of the file the output of `brew shellenv`. After that, restart the terminal and check the version of CMake again, now it should be displayed. -Then you need to install Boost library built with GCC. Please avoid using Homebrew for this, as the Boost version provided by Homebrew -is built with Clang, which has a different ABI. Instead, download the latest version of Boost from the [official website](https://www.boost.org/users/download/), open terminal and run: -```sh -cd ~/Downloads -curl https://archives.boost.io/release/1.86.0/source/boost_1_86_0.tar.bz2 --output "boost_1_86_0.tar.bz2" -tar xvjf boost_1_86_0.tar.bz2 && rm boost_1_86_0.tar.bz2 -cd boost_1_86_0 -``` -Navigate to the unpacked Boost directory in the terminal and run the following commands: -```sh -./bootstrap.sh -echo "using darwin : : g++-14 ;" > user-config.jam -sudo ./b2 install --user-config=user-config.jam --layout=versioned -export BOOST_ROOT=/usr/local/ # export Boost_ROOT=/usr/local/ for CMake 3.26 and below. -``` -You can also add the last export with current path to `~/.zprofile` or `~/.bash_profile` to set this boost path by default. - -Before building the project you must set locally or in the above-mentioned dotfiles the following CMake environment variables: +Run the following commands: ```sh -export CC=gcc-14 -export CXX=g++-14 -export SDKROOT=/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/ -export DYLD_LIBRARY_PATH=/usr/local/lib:${DYLD_LIBRARY_PATH} +export CXX=clang++ +export BOOST_ROOT=$(brew --prefix boost) ``` -The first two lines set GCC as the default compiler in CMake. The `SDKROOT` export is also necessary due to issues with GCC 14 and -the last macOS 15 SDK used by CMake by default, you can read more about this [here](https://gist.github.com/scivision/d69faebbc56da9714798087b56de925a) -and [here](https://github.com/iains/gcc-14-branch/issues/11). The last export is the solution for dynamic linking with python module. +These commands set Apple Clang and Homebrew Boost as default in CMake in your terminal session. +You can also add them to the end of `~/.profile` to set this by default in all sessions. ### Building the project #### Building the Python module using pip diff --git a/address_sanitizer_ignore_list.txt b/address_sanitizer_ignore_list.txt new file mode 100644 index 0000000000..e14a6175f9 --- /dev/null +++ b/address_sanitizer_ignore_list.txt @@ -0,0 +1,4 @@ +# Disable alloc_dealloc_mismatch ASAN check +[alloc_dealloc_mismatch] +# in file: +src:typed_column_data.h diff --git a/build.sh b/build.sh index c89d854cab..3ae286d531 100755 --- a/build.sh +++ b/build.sh @@ -1,5 +1,8 @@ #!/bin/bash +# Stop on error: +set -e + function print_help() { cat << EOF Usage: ./build.sh [options] @@ -72,7 +75,7 @@ if [[ ! -d "pybind11" ]] ; then git clone https://github.com/pybind/pybind11.git --branch v2.13.4 --depth 1 fi if [[ ! -d "emhash" ]] ; then - git clone https://github.com/ktprime/emhash.git --depth 1 + git clone https://github.com/Vdaleke/emhash.git --depth 1 fi if [[ ! -d "atomicbitvector" ]] ; then git clone https://github.com/ekg/atomicbitvector.git --depth 1 @@ -85,7 +88,7 @@ if [[ $NO_TESTS == true ]]; then PREFIX="$PREFIX -D COMPILE_TESTS=OFF" else if [[ ! -d "googletest" ]] ; then - git clone https://github.com/google/googletest/ --branch v1.13.0 --depth 1 + git clone https://github.com/google/googletest/ --branch v1.14.0 --depth 1 fi fi @@ -116,5 +119,5 @@ fi cd .. mkdir -p build cd build -rm CMakeCache.txt -cmake $PREFIX .. && make $JOBS_OPTION +rm -f CMakeCache.txt +cmake -G Ninja $PREFIX .. && cmake --build . $JOBS_OPTION diff --git a/src/core/algorithms/dd/split/model/distance_position_list_index.cpp b/src/core/algorithms/dd/split/model/distance_position_list_index.cpp index 204d11df0d..9558e7df18 100644 --- a/src/core/algorithms/dd/split/model/distance_position_list_index.cpp +++ b/src/core/algorithms/dd/split/model/distance_position_list_index.cpp @@ -12,7 +12,7 @@ void DistancePositionListIndex::AddValue(T&& value) { auto&& [it, is_value_new] = value_mapping_.try_emplace(std::forward(value), next_cluster_index_); if (is_value_new) { - clusters_.emplace_back(cur_tuple_index_, 0); + clusters_.push_back({cur_tuple_index_, 0}); ++next_cluster_index_; } ++clusters_[it->second].size; diff --git a/src/core/algorithms/dd/split/split.cpp b/src/core/algorithms/dd/split/split.cpp index a63ea9df64..d66f6efa88 100644 --- a/src/core/algorithms/dd/split/split.cpp +++ b/src/core/algorithms/dd/split/split.cpp @@ -428,7 +428,7 @@ std::vector Split::IndexSearchSpace(model::ColumnIndex index) { // differential functions should be put in this exact order for further reducing for (int i = num_dfs_per_column_ - 1; i >= 0; i--) { if (min_max_dif_[index].IsWithinExclusive(i)) { - dfs.emplace_back(min_max_dif_[index].lower_bound, i); + dfs.push_back({min_max_dif_[index].lower_bound, static_cast(i)}); } } return dfs; @@ -616,7 +616,7 @@ std::list
Split::NegativePruningReduce(DF const& rhs, std::vector const& auto const [prune, remainder] = NegativeSplit(search, last_df); std::list
dds = NegativePruningReduce(rhs, prune, cnt); - if (dds.empty() && IsFeasible(last_df)) dds.emplace_back(last_df, rhs); + if (!dds.size() && IsFeasible(last_df)) dds.push_back({last_df, rhs}); std::list
const remaining_dds = NegativePruningReduce(rhs, remainder, cnt); std::list
merged_dds = MergeReducedResults(dds, remaining_dds); @@ -635,7 +635,7 @@ std::list
Split::HybridPruningReduce(DF const& rhs, std::vector const& s cnt++; if (VerifyDD(first_df, rhs)) { - if (IsFeasible(first_df)) dds.emplace_back(first_df, rhs); + if (IsFeasible(first_df)) dds.push_back({first_df, rhs}); std::vector remainder = DoPositivePruning(search, first_df); std::list
remaining_dds = HybridPruningReduce(rhs, remainder, cnt); dds.splice(dds.end(), remaining_dds); @@ -685,7 +685,7 @@ std::list
Split::InstanceExclusionReduce(std::vector const& tup } if (no_pairs_left) { - if (IsFeasible(first_df)) dds.emplace_back(first_df, rhs); + if (IsFeasible(first_df)) dds.push_back({first_df, rhs}); std::vector remainder = DoPositivePruning(search, first_df); std::list
remaining_dds = InstanceExclusionReduce(tuple_pair_indices, remainder, rhs, cnt); @@ -749,7 +749,7 @@ model::DDString Split::DDToDDString(DD const& dd) const { std::list Split::GetDDStringList() const { std::list dd_strings; for (auto const& result_dd : dd_collection_) { - dd_strings.emplace_back(DDToDDString(result_dd)); + dd_strings.push_back(DDToDDString(result_dd)); } return dd_strings; } diff --git a/src/core/algorithms/fd/fdep/fd_tree_element.cpp b/src/core/algorithms/fd/fdep/fd_tree_element.cpp index a1cd92678e..60bb130af2 100644 --- a/src/core/algorithms/fd/fdep/fd_tree_element.cpp +++ b/src/core/algorithms/fd/fdep/fd_tree_element.cpp @@ -1,6 +1,7 @@ #include "fd_tree_element.h" #include "boost/dynamic_bitset.hpp" +#include "util/bitset_extensions.h" FDTreeElement::FDTreeElement(size_t max_attribute_number) : max_attribute_number_(max_attribute_number) { @@ -45,7 +46,7 @@ bool FDTreeElement::ContainsGeneralization(std::bitset const& lhs, return true; } - size_t next_set_attr = lhs._Find_next(current_attr); + size_t next_set_attr = util::FindNext(lhs, current_attr); if (next_set_attr == kMaxAttrNum) { return false; } @@ -71,7 +72,7 @@ bool FDTreeElement::GetGeneralizationAndDelete(std::bitset const& l return true; } - size_t next_set_attr = lhs._Find_next(current_attr); + size_t next_set_attr = util::FindNext(lhs, current_attr); if (next_set_attr == kMaxAttrNum) { return false; } @@ -104,7 +105,7 @@ bool FDTreeElement::GetSpecialization(std::bitset const& lhs, size_ bool found = false; size_t attr = (current_attr > 1 ? current_attr : 1); - size_t next_set_attr = lhs._Find_next(current_attr); + size_t next_set_attr = util::FindNext(lhs, current_attr); if (next_set_attr == kMaxAttrNum) { while (!found && attr <= this->max_attribute_number_) { @@ -153,7 +154,8 @@ void FDTreeElement::AddFunctionalDependency(std::bitset const& lhs, FDTreeElement* current_node = this; this->AddRhsAttribute(attr_num); - for (size_t i = lhs._Find_first(); i != kMaxAttrNum; i = lhs._Find_next(i)) { + auto iter = util::BitsetIterator(lhs); + for (size_t i = iter.Pos(); i != kMaxAttrNum; iter.Next(), i = iter.Pos()) { if (current_node->children_[i - 1] == nullptr) { current_node->children_[i - 1] = std::make_unique(this->max_attribute_number_); @@ -215,8 +217,8 @@ void FDTreeElement::PrintDependencies(std::bitset& active_path, std if (this->is_fd_[attr - 1]) { out = "{"; - for (size_t i = active_path._Find_first(); i != kMaxAttrNum; - i = active_path._Find_next(i)) { + auto iter = util::BitsetIterator(active_path); + for (size_t i = iter.Pos(); i != kMaxAttrNum; iter.Next(), i = iter.Pos()) { if (!column_id.empty()) out += column_id + std::to_string(std::stoi(column_names[i - 1]) + 1) + ","; else @@ -257,11 +259,8 @@ void FDTreeElement::TransformTreeFdCollection(std::bitset& active_p for (size_t attr = 1; attr <= this->max_attribute_number_; ++attr) { if (this->is_fd_[attr - 1]) { - boost::dynamic_bitset<> lhs_bitset(this->max_attribute_number_); - for (size_t i = active_path._Find_first(); i != kMaxAttrNum; - i = active_path._Find_next(i)) { - lhs_bitset.set(i - 1); - } + auto lhs_bitset = + util::CreateShiftedDynamicBitset(active_path, this->max_attribute_number_); Vertical lhs(scheme.get(), lhs_bitset); Column rhs(scheme.get(), scheme->GetColumn(attr - 1)->GetName(), attr - 1); fd_collection.emplace_back(FD{lhs, rhs, scheme}); diff --git a/src/core/algorithms/fd/fdep/fdep.cpp b/src/core/algorithms/fd/fdep/fdep.cpp index 293902a270..e53fefcae3 100644 --- a/src/core/algorithms/fd/fdep/fdep.cpp +++ b/src/core/algorithms/fd/fdep/fdep.cpp @@ -5,6 +5,7 @@ #include "config/equal_nulls/option.h" #include "config/tabular_data/input_table/option.h" #include "model/table/column_layout_relation_data.h" +#include "util/bitset_extensions.h" // #ifndef PRINT_FDS // #define PRINT_FDS @@ -96,8 +97,9 @@ void FDep::AddViolatedFDs(std::vector const& t1, std::vector con } equal_attr &= (~diff_attr); - for (size_t attr = diff_attr._Find_first(); attr != FDTreeElement::kMaxAttrNum; - attr = diff_attr._Find_next(attr)) { + auto iter = util::BitsetIterator(diff_attr); + for (size_t attr = iter.Pos(); attr != FDTreeElement::kMaxAttrNum; + iter.Next(), attr = iter.Pos()) { this->neg_cover_tree_->AddFunctionalDependency(equal_attr, attr); } } diff --git a/src/core/algorithms/fd/pyrocommon/model/pli_cache.h b/src/core/algorithms/fd/pyrocommon/model/pli_cache.h index d2933337e0..732c9103a9 100644 --- a/src/core/algorithms/fd/pyrocommon/model/pli_cache.h +++ b/src/core/algorithms/fd/pyrocommon/model/pli_cache.h @@ -8,6 +8,7 @@ class ProfilingContext; #include "cache_eviction_method.h" #include "caching_method.h" #include "model/table/column_layout_relation_data.h" +#include "util/maybe_unused_private_field.h" namespace model { @@ -29,20 +30,24 @@ class PLICache { std::unique_ptr> index_; // usageCounter - for parallelism - int saved_intersections_ = 0; + // All these MAYBE_UNUSED_PRIVATE_FIELD variables are required to support Pyro's caching + // strategies from our ADBIS paper: + // https://link.springer.com/chapter/10.1007/978-3-030-30278-8_7 + + MAYBE_UNUSED_PRIVATE_FIELD int saved_intersections_ = 0; mutable std::mutex getting_pli_mutex_; CachingMethod caching_method_; - CacheEvictionMethod eviction_method_; - double caching_method_value_; + MAYBE_UNUSED_PRIVATE_FIELD CacheEvictionMethod eviction_method_; + MAYBE_UNUSED_PRIVATE_FIELD double caching_method_value_; // long long maximumAvailableMemory_ = 0; double maximum_entropy_; - double mean_entropy_; - double min_entropy_; - double median_entropy_; - double median_gini_; - double median_inverted_entropy_; + MAYBE_UNUSED_PRIVATE_FIELD double mean_entropy_; + MAYBE_UNUSED_PRIVATE_FIELD double min_entropy_; + MAYBE_UNUSED_PRIVATE_FIELD double median_entropy_; + MAYBE_UNUSED_PRIVATE_FIELD double median_gini_; + MAYBE_UNUSED_PRIVATE_FIELD double median_inverted_entropy_; std::variant> CachingProcess( Vertical const& vertical, std::unique_ptr pli, diff --git a/src/core/algorithms/fd/sfd/cords.cpp b/src/core/algorithms/fd/sfd/cords.cpp index af719871a7..5e5ec75c4f 100644 --- a/src/core/algorithms/fd/sfd/cords.cpp +++ b/src/core/algorithms/fd/sfd/cords.cpp @@ -163,7 +163,7 @@ unsigned long long Cords::ExecuteInternal() { Init(column_count, data); - auto start_time = std::chrono::high_resolution_clock::now(); + auto start_time = std::chrono::system_clock::now(); SetProgress(kTotalProgressPercent); ToNextProgressPhase(); diff --git a/src/core/algorithms/fd/sfd/frequency_handler.cpp b/src/core/algorithms/fd/sfd/frequency_handler.cpp index 0d2d700600..3157d14462 100644 --- a/src/core/algorithms/fd/sfd/frequency_handler.cpp +++ b/src/core/algorithms/fd/sfd/frequency_handler.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -30,7 +31,9 @@ void FrequencyHandler::InitFrequencyHandler(std::vector auto cmp = [](std::pair const &left, std::pair const &right) { - return left.second > right.second; + // Compare frequencies. + // If frequencies are equal, compare values lexicographically. + return std::tie(left.second, left.first) > std::tie(right.second, right.first); }; std::sort(values_ordered_by_frequencies.begin(), values_ordered_by_frequencies.end(), cmp); diff --git a/src/core/algorithms/fd/sfd/sample.h b/src/core/algorithms/fd/sfd/sample.h index db1d50eaff..ca80b39c34 100644 --- a/src/core/algorithms/fd/sfd/sample.h +++ b/src/core/algorithms/fd/sfd/sample.h @@ -25,9 +25,9 @@ class Sample { size_t concat_cardinality_; public: - Sample(bool fixed_sample, unsigned long long sample_size, size_t rows, model::ColumnIndex lhs, - model::ColumnIndex rhs, std::vector const &data, - RelationalSchema const *rel_schema_); + Sample(bool fixed_sample, unsigned long long sample_size, model::TupleIndex rows, + model::ColumnIndex lhs, model::ColumnIndex rhs, + std::vector const &data, RelationalSchema const *rel_schema_); void Filter(FrequencyHandler const &handler, std::vector const &data, model::ColumnIndex col_ind); @@ -37,7 +37,7 @@ class Sample { long double max_false_positive_probability, long double delta); - [[nodiscard]] std::vector const &GetRowIndices() const { + [[nodiscard]] std::vector const &GetRowIndices() const { return row_indices_; } diff --git a/src/core/algorithms/fd/tane/pfdtane.cpp b/src/core/algorithms/fd/tane/pfdtane.cpp index 68c7ccc106..2dfd0a013b 100644 --- a/src/core/algorithms/fd/tane/pfdtane.cpp +++ b/src/core/algorithms/fd/tane/pfdtane.cpp @@ -1,5 +1,7 @@ #include "pfdtane.h" +#include + #include "config/error/option.h" #include "config/error_measure/option.h" #include "enums.h" @@ -48,10 +50,10 @@ config::ErrorType PFDTane::CalculatePFDError(model::PositionListIndex const* x_p std::deque xa_index = xa_pli->GetIndex(); std::shared_ptr probing_table_ptr = x_pli->CalculateAndGetProbingTable(); auto const& probing_table = *probing_table_ptr; - std::sort(xa_index.begin(), xa_index.end(), - [&probing_table](Cluster const& a, Cluster const& b) { - return probing_table[a.front()] < probing_table[b.front()]; - }); + std::stable_sort(xa_index.begin(), xa_index.end(), + [&probing_table](Cluster const& a, Cluster const& b) { + return probing_table[a.front()] < probing_table[b.front()]; + }); double sum = 0.0; std::size_t cluster_rows_count = 0; std::deque const& x_index = x_pli->GetIndex(); diff --git a/src/core/algorithms/gfd/egfd_validation.cpp b/src/core/algorithms/gfd/egfd_validation.cpp index 21bf4be6a5..307b2fff42 100644 --- a/src/core/algorithms/gfd/egfd_validation.cpp +++ b/src/core/algorithms/gfd/egfd_validation.cpp @@ -342,7 +342,7 @@ void ReverseConstruction(std::set const& lev, graph_t const& graph, gr std::map>& candidates, std::map& cnts, std::map>& unvisited_neighbours) { - for (std::set::iterator j = --lev.end(); j != std::next(lev.begin(), -1); --j) { + for (auto j = lev.rbegin(); j != lev.rend(); ++j) { vertex_t u = *j; int cnt = 0; if (unvisited_neighbours.find(u) != unvisited_neighbours.end()) { diff --git a/src/core/algorithms/gfd/gfd_validation.cpp b/src/core/algorithms/gfd/gfd_validation.cpp index 1f9bf64dec..39a3e4b0c9 100644 --- a/src/core/algorithms/gfd/gfd_validation.cpp +++ b/src/core/algorithms/gfd/gfd_validation.cpp @@ -25,6 +25,10 @@ std::vector> GetPartition(std::vector const& can config::ThreadNumType const& threads_num) { std::vector> result = {}; + if (candidates.empty()) { + return {}; + } + int musthave = candidates.size() / threads_num; int oversized_num = candidates.size() % threads_num; diff --git a/src/core/algorithms/md/hymd/lattice/cardinality/min_picker_lattice.cpp b/src/core/algorithms/md/hymd/lattice/cardinality/min_picker_lattice.cpp index b2dc2e2abf..455d8a95b4 100644 --- a/src/core/algorithms/md/hymd/lattice/cardinality/min_picker_lattice.cpp +++ b/src/core/algorithms/md/hymd/lattice/cardinality/min_picker_lattice.cpp @@ -92,8 +92,8 @@ void MinPickerLattice::AddGeneralizations(MdLattice::MdVerificationMessenger& me if (considered_indices.none()) return; } RemoveSpecializations(root_, messenger, lhs.begin(), considered_indices); - ValidationInfo& added_ref = info_.emplace_back(&messenger, std::move(considered_indices)); - Add(&added_ref); + info_.push_back({&messenger, std::move(considered_indices)}); + Add(&info_.back()); } std::vector MinPickerLattice::GetAll() noexcept(kNeedsEmptyRemoval) { diff --git a/src/core/algorithms/md/hymd/lattice/cardinality/one_by_one_min_picker.cpp b/src/core/algorithms/md/hymd/lattice/cardinality/one_by_one_min_picker.cpp index 90e4bffda6..1c24f60531 100644 --- a/src/core/algorithms/md/hymd/lattice/cardinality/one_by_one_min_picker.cpp +++ b/src/core/algorithms/md/hymd/lattice/cardinality/one_by_one_min_picker.cpp @@ -107,7 +107,7 @@ void OneByOnePicker::AddGeneralizations(MdLattice::MdVerificationMessenger& mess } } assert(!considered_indices.none()); - currently_picked_.emplace_back(&messenger, std::move(considered_indices)); + currently_picked_.push_back({&messenger, std::move(considered_indices)}); } std::vector OneByOnePicker::GetAll() noexcept { diff --git a/src/core/algorithms/md/hymd/lattice/md_lattice.cpp b/src/core/algorithms/md/hymd/lattice/md_lattice.cpp index 0709d2b508..3fa8ee57c2 100644 --- a/src/core/algorithms/md/hymd/lattice/md_lattice.cpp +++ b/src/core/algorithms/md/hymd/lattice/md_lattice.cpp @@ -478,7 +478,8 @@ class Specializer { get_lhs_ccv_id_(std::move(get_lhs_ccv_id)), get_nonlhs_ccv_id_(std::move(get_nonlhs_ccv_id)), prune_nondisjoint_(prune_nondisjoint), - current_specialization_({lhs, {lhs.begin(), {}}}, rhs) {} + current_specialization_( + {LhsSpecialization{lhs, SpecializationData{lhs.begin(), LhsNode{}}}, rhs}) {} void Specialize() { if (GetLhs().Cardinality() == cardinality_limit_) { @@ -674,7 +675,7 @@ void MdLattice::TryDeleteEmptyNode(MdLhs const& lhs) { } else { DESBORDANTE_ASSUME(it != map.end()); } - path_to_node.emplace_back(cur_node_ptr, &map, it); + path_to_node.push_back({cur_node_ptr, &map, it}); cur_node_ptr = &it->second; } @@ -875,7 +876,7 @@ std::vector MdLattice::GetAll() { std::vector collected; MdLhs current_lhs(column_matches_size_); GetAll(md_root_, current_lhs, [&collected](MdLhs& cur_node_lhs, MdNode& cur_node) { - collected.emplace_back(cur_node_lhs, &cur_node); + collected.push_back({cur_node_lhs, &cur_node}); }); assert(std::ranges::none_of(collected, [this](MdLatticeNodeInfo const& node_info) { return IsUnsupported(node_info.lhs); diff --git a/src/core/algorithms/md/hymd/md_lhs.h b/src/core/algorithms/md/hymd/md_lhs.h index 3b41b45dc4..b11e304ab7 100644 --- a/src/core/algorithms/md/hymd/md_lhs.h +++ b/src/core/algorithms/md/hymd/md_lhs.h @@ -10,7 +10,7 @@ namespace algos::hymd { struct LhsNode { model::Index offset; - ColumnClassifierValueId ccv_id; + ColumnClassifierValueId ccv_id = 0; friend bool operator==(LhsNode const& l, LhsNode const& r) { return l.offset == r.offset && l.ccv_id == r.ccv_id; @@ -34,7 +34,8 @@ class MdLhs { } ColumnClassifierValueId& AddNext(model::Index offset) { - return values_.emplace_back(offset).ccv_id; + values_.push_back({offset}); + return values_.back().ccv_id; } void RemoveLast() { diff --git a/src/core/algorithms/md/hymd/preprocessing/ccv_id_pickers/index_uniform.h b/src/core/algorithms/md/hymd/preprocessing/ccv_id_pickers/index_uniform.h index 6b8b2bb843..8a70e5383c 100644 --- a/src/core/algorithms/md/hymd/preprocessing/ccv_id_pickers/index_uniform.h +++ b/src/core/algorithms/md/hymd/preprocessing/ccv_id_pickers/index_uniform.h @@ -20,7 +20,7 @@ class IndexUniform final { std::vector lhs_ccv_ids; if (size_limit_ == 0 || ccv_number <= size_limit_) { lhs_ccv_ids.reserve(ccv_number); - auto iota = std::views::iota(0ul, ccv_number); + auto iota = std::views::iota(static_cast(0), ccv_number); lhs_ccv_ids.assign(iota.begin(), iota.end()); } else { lhs_ccv_ids.reserve(size_limit_ + 1); diff --git a/src/core/algorithms/md/hymd/similarity_data.cpp b/src/core/algorithms/md/hymd/similarity_data.cpp index ae3139cc5f..fb75fd5e28 100644 --- a/src/core/algorithms/md/hymd/similarity_data.cpp +++ b/src/core/algorithms/md/hymd/similarity_data.cpp @@ -42,7 +42,7 @@ class SimilarityData::Creator { column_match_index); } else { non_trivial_indices.push_back(column_match_index); - column_matches_info.emplace_back(std::move(indexes), left_col_index, right_col_index); + column_matches_info.push_back({std::move(indexes), left_col_index, right_col_index}); all_lhs_ccv_ids_info.push_back(std::move(lhs_ccv_id_info)); short_sampling_enable.push_back(column_match->IsSymmetricalAndEqIsMax()); } diff --git a/src/core/algorithms/md/hymd/utility/md_less.h b/src/core/algorithms/md/hymd/utility/md_less.h index 526a91bff0..2fd17e17b1 100644 --- a/src/core/algorithms/md/hymd/utility/md_less.h +++ b/src/core/algorithms/md/hymd/utility/md_less.h @@ -20,7 +20,19 @@ inline bool MdLessPairs(MdPair const& pair_left, MdPair const& pair_right) { } else if (cardinality_left > cardinality_right) { return false; } + +#if __cpp_lib_three_way_comparison == 201907L auto comp = lhs_left <=> lhs_right; +#else + signed char comp; + if (lhs_left < lhs_right) { + comp = -1; + } else if (lhs_left == lhs_right) { + comp = 0; + } else { + comp = 1; + } +#endif if (comp < 0) { return true; } else if (comp > 0) { diff --git a/src/core/algorithms/md/hymd/validator.cpp b/src/core/algorithms/md/hymd/validator.cpp index 491d7a5963..95c3a74a81 100644 --- a/src/core/algorithms/md/hymd/validator.cpp +++ b/src/core/algorithms/md/hymd/validator.cpp @@ -311,8 +311,8 @@ class BatchValidator::MultiCardPartitionElementProvider { kSLTVPartitionColumn; // LHS has cardinality greater than 1, so is not empty. DESBORDANTE_ASSUME(lhs_iter_ != lhs_end_); - rhs_records_matching_criteria_.emplace_back(cur_col_match_index_, kSLTVPartitionColumn, - lhs_iter_->ccv_id); + rhs_records_matching_criteria_.push_back( + {cur_col_match_index_, kSLTVPartitionColumn, lhs_iter_->ccv_id}); ++cur_col_match_index_; ++lhs_iter_; } @@ -333,8 +333,8 @@ class BatchValidator::MultiCardPartitionElementProvider { sltvpe_partition_key_.push_back(left_column_index); } - rhs_records_matching_criteria_.emplace_back(cur_col_match_index_, - partition_key_index, ccv_id); + rhs_records_matching_criteria_.push_back( + {cur_col_match_index_, partition_key_index, ccv_id}); ++cur_col_match_index_; } } diff --git a/src/core/algorithms/md/hymd/validator.h b/src/core/algorithms/md/hymd/validator.h index 54da8ce8b8..fa206a5695 100644 --- a/src/core/algorithms/md/hymd/validator.h +++ b/src/core/algorithms/md/hymd/validator.h @@ -71,7 +71,7 @@ class BatchValidator { void AddRecommendations(RecordCluster const& same_left_value_records, CompressedRecord const& right_record) { for (RecPtr left_record_ptr : same_left_value_records) { - recommendations_->emplace_back(left_record_ptr, &right_record); + recommendations_->push_back({left_record_ptr, &right_record}); } } @@ -79,7 +79,7 @@ class BatchValidator { CompressedRecord const& right_record) { current_ccv_id_ = kLowestCCValueId; for (RecPtr left_record_ptr : same_left_value_records) { - recommendations_->emplace_back(left_record_ptr, &right_record); + recommendations_->push_back({left_record_ptr, &right_record}); } } diff --git a/src/core/algorithms/md/md.cpp b/src/core/algorithms/md/md.cpp index 5aa8664a75..7a77af18eb 100644 --- a/src/core/algorithms/md/md.cpp +++ b/src/core/algorithms/md/md.cpp @@ -115,9 +115,9 @@ MDDescription MD::GetDescription() const { std::vector lhs_description = util::GetPreallocatedVector(lhs_.size()); for (md::LhsColumnSimilarityClassifier const& lhs_classifier : lhs_) { - lhs_description.emplace_back( - GetColumnMatchDescription(lhs_classifier.GetColumnMatchIndex()), - lhs_classifier.GetDecisionBoundary(), lhs_classifier.GetMaxDisprovedBound()); + lhs_description.push_back({GetColumnMatchDescription(lhs_classifier.GetColumnMatchIndex()), + lhs_classifier.GetDecisionBoundary(), + lhs_classifier.GetMaxDisprovedBound()}); } return {left_schema_->GetName(), right_schema_->GetName(), diff --git a/src/core/algorithms/nar/des/des.cpp b/src/core/algorithms/nar/des/des.cpp index e77f8cce69..ea700da835 100644 --- a/src/core/algorithms/nar/des/des.cpp +++ b/src/core/algorithms/nar/des/des.cpp @@ -57,7 +57,7 @@ std::vector DES::GetRandomPopulationInDomains(FeatureDomains const& auto compare_by_fitness = [](EncodedNAR const& a, EncodedNAR const& b) { return a.GetQualities().fitness > b.GetQualities().fitness; }; - std::ranges::sort(population, compare_by_fitness); + std::ranges::stable_sort(population, compare_by_fitness); return population; } diff --git a/src/core/algorithms/nar/des/differential_functions.cpp b/src/core/algorithms/nar/des/differential_functions.cpp index 9bd0267c40..ef35d4b272 100644 --- a/src/core/algorithms/nar/des/differential_functions.cpp +++ b/src/core/algorithms/nar/des/differential_functions.cpp @@ -8,8 +8,7 @@ namespace algos::des { std::vector GetRandIndices(size_t except_index, size_t population, size_t number_of_indices, RNG& rng) { assert(number_of_indices <= population - 1); - std::unordered_set indices; - indices.reserve(number_of_indices + 1); + std::set indices; indices.insert(except_index); while (indices.size() < number_of_indices + 1) { size_t random_index = rng.Next() * population; diff --git a/src/core/algorithms/nar/des/rng.h b/src/core/algorithms/nar/des/rng.h index a5eefa52be..792199e69a 100644 --- a/src/core/algorithms/nar/des/rng.h +++ b/src/core/algorithms/nar/des/rng.h @@ -7,7 +7,8 @@ namespace algos::des { class RNG { private: long unsigned const kSeed_ = 2; - std::mt19937 rng_{kSeed_}; + // result_type can differ on different STL implementations and data models + std::mt19937 rng_{static_cast(kSeed_)}; std::uniform_real_distribution uni_{0.0, 1.0}; public: diff --git a/src/core/algorithms/od/fastod/model/attribute_set.h b/src/core/algorithms/od/fastod/model/attribute_set.h index 7ac9cec2a6..c0744f5239 100644 --- a/src/core/algorithms/od/fastod/model/attribute_set.h +++ b/src/core/algorithms/od/fastod/model/attribute_set.h @@ -8,6 +8,7 @@ #include #include "model/table/column_index.h" +#include "util/bitset_extensions.h" namespace algos::fastod { @@ -93,11 +94,11 @@ class AttributeSet { } model::ColumnIndex FindFirst() const noexcept { - return bitset_._Find_first(); + return util::FindFirst(bitset_); } model::ColumnIndex FindNext(model::ColumnIndex pos) const noexcept { - return bitset_._Find_next(pos); + return util::FindNext(bitset_, pos); } std::string ToString() const; diff --git a/src/core/algorithms/od/fastod/model/canonical_od.cpp b/src/core/algorithms/od/fastod/model/canonical_od.cpp index 74a6242ad8..a809bb348a 100644 --- a/src/core/algorithms/od/fastod/model/canonical_od.cpp +++ b/src/core/algorithms/od/fastod/model/canonical_od.cpp @@ -2,6 +2,8 @@ #include +#include "algorithms/od/fastod/partitions/complex_stripped_partition.h" + namespace algos::fastod { template @@ -11,7 +13,8 @@ CanonicalOD::CanonicalOD(AttributeSet const& context, model::ColumnIn template bool CanonicalOD::IsValid(std::shared_ptr data, PartitionCache& cache) const { - return !(cache.GetStrippedPartition(context_, data).Swap(ap_.left, ap_.right)); + return !(cache.GetStrippedPartition(context_, data) + .template Swap(ap_.left, ap_.right)); } template diff --git a/src/core/algorithms/od/fastod/util/timer.h b/src/core/algorithms/od/fastod/util/timer.h index 4601f980bc..23ada501b1 100644 --- a/src/core/algorithms/od/fastod/util/timer.h +++ b/src/core/algorithms/od/fastod/util/timer.h @@ -4,7 +4,7 @@ namespace algos::fastod { -using TimePoint = std::chrono::_V2::high_resolution_clock::time_point; +using TimePoint = std::chrono::high_resolution_clock::time_point; class Timer { private: diff --git a/src/core/algorithms/od/order/order_utility.cpp b/src/core/algorithms/od/order/order_utility.cpp index 9148dce74b..2aea7aa76d 100644 --- a/src/core/algorithms/od/order/order_utility.cpp +++ b/src/core/algorithms/od/order/order_utility.cpp @@ -83,7 +83,7 @@ std::vector GetIndexedByteData( if (null_rows.find(k) != null_rows.end()) { continue; } - indexed_byte_data.emplace_back(k, byte_data[k]); + indexed_byte_data.push_back({model::TupleIndex(k), byte_data[k]}); } return indexed_byte_data; } diff --git a/src/core/algorithms/statistics/data_stats.cpp b/src/core/algorithms/statistics/data_stats.cpp index 809cf42863..121b044c8a 100644 --- a/src/core/algorithms/statistics/data_stats.cpp +++ b/src/core/algorithms/statistics/data_stats.cpp @@ -464,8 +464,8 @@ Statistic DataStats::GetMedianAD(size_t index) const { return all_stats_[index].median_ad; } mo::TypedColumnData const& col = col_data_[index]; - auto const& type = static_cast(col.GetType()); if (!col.IsNumeric()) return {}; + auto const& type = static_cast(col.GetType()); std::vector data = DeleteNullAndEmpties(index); std::byte* median = MedianOfNumericVector(data, type); diff --git a/src/core/config/exceptions.h b/src/core/config/exceptions.h index adb2641153..49e55296ec 100644 --- a/src/core/config/exceptions.h +++ b/src/core/config/exceptions.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace config { diff --git a/src/core/model/table/agree_set_factory.cpp b/src/core/model/table/agree_set_factory.cpp index f48cccdb78..73a27fc8e5 100644 --- a/src/core/model/table/agree_set_factory.cpp +++ b/src/core/model/table/agree_set_factory.cpp @@ -159,8 +159,8 @@ AgreeSetFactory::SetOfAgreeSets AgreeSetFactory::GenAsUsingMapOfIdSets() const { */ unsigned short const actual_threads_num = std::min(max_representation.size(), (size_t)config_.threads_num); - auto task = [&identifier_sets, &agree_sets, percent_per_cluster, actual_threads_num, - &map_init_mutex, this, &threads_agree_sets, &map_init_cv, + auto task = [&identifier_sets, percent_per_cluster, actual_threads_num, &map_init_mutex, + this, &threads_agree_sets, &map_init_cv, &map_initialized](SetOfVectors::value_type const& cluster) { std::thread::id const thread_id = std::this_thread::get_id(); diff --git a/src/core/model/table/column_domain.cpp b/src/core/model/table/column_domain.cpp index 556f2767ec..99cf110687 100644 --- a/src/core/model/table/column_domain.cpp +++ b/src/core/model/table/column_domain.cpp @@ -174,7 +174,7 @@ class DomainManager { if (processed_block_count_ == 0) { auto const approx_block_count = static_cast(Partition::kMaximumBytesPerChar * block_capacity_); - return std::max(1UL, mem_limit_ / approx_block_count); + return std::max(static_cast(1), mem_limit_ / approx_block_count); } /* otherwise, use the average amount of memory spent per processed block */ size_t const per_block_mem_usage = mem_usage_ / processed_block_count_; diff --git a/src/core/model/table/vertical_map.cpp b/src/core/model/table/vertical_map.cpp index 1e57a1ac6a..47ba0e14ea 100644 --- a/src/core/model/table/vertical_map.cpp +++ b/src/core/model/table/vertical_map.cpp @@ -1,6 +1,7 @@ #include "vertical_map.h" #include +#include #include #include @@ -425,7 +426,7 @@ void VerticalMap::Shrink(double factor, std::function key_queue.push(entry); } }); - unsigned int num_of_removed = 0; + // unsigned int num_of_removed = 0; unsigned int target_size = size_ * factor; while (!key_queue.empty() && size_ > target_size) { auto key = key_queue.top().first; @@ -433,7 +434,7 @@ void VerticalMap::Shrink(double factor, std::function // insert additional logging - num_of_removed++; + // num_of_removed++; Remove(key); } shrink_invocations_++; @@ -467,14 +468,14 @@ void VerticalMap::Shrink(std::unordered_map& usag key_queue.push(entry); } }); - unsigned int num_of_removed = 0; + // unsigned int num_of_removed = 0; while (!key_queue.empty()) { auto key = key_queue.front().first; key_queue.pop(); // insert additional logging - num_of_removed++; + // num_of_removed++; Remove(key); RemoveFromUsageCounter(usage_counter, key); } diff --git a/src/core/util/auto_join_thread.h b/src/core/util/auto_join_thread.h new file mode 100644 index 0000000000..c217e564a0 --- /dev/null +++ b/src/core/util/auto_join_thread.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include + +#include + +namespace util::jthread { + +/// @brief Simple RAII wrapper for std::thread. Joins on destruction. +/// @remark The class is inspired by Scott Meyers' ThreadRAII (from Effective Modern C++) +class AutoJoinThread { +public: + explicit AutoJoinThread(std::thread&& t) : t_(std::move(t)) {} + + AutoJoinThread(AutoJoinThread&&) = default; + AutoJoinThread& operator=(AutoJoinThread&&) = default; + // std::thread is not copyable: + AutoJoinThread(AutoJoinThread&) = delete; + AutoJoinThread& operator=(AutoJoinThread&) = delete; + + template + explicit AutoJoinThread(F&& f, Args&&... args) + : AutoJoinThread(std::thread{std::forward(f), std::forward(args)...}) {} + + ~AutoJoinThread() try { + if (t_.joinable()) { + t_.join(); + } + } catch (std::system_error const& e) { + LOG(ERROR) << e.what(); + return; // Don't pass exception on + } + + std::thread& Get() { + return t_; + } + +private: + std::thread t_; +}; + +} // namespace util::jthread + +namespace util { + +#ifdef __cpp_lib_jthread +using JThread = std::jthread; +#else +using JThread = jthread::AutoJoinThread; +#endif + +} // namespace util diff --git a/src/core/util/bitset_extensions.cpp b/src/core/util/bitset_extensions.cpp new file mode 100644 index 0000000000..a555d7061e --- /dev/null +++ b/src/core/util/bitset_extensions.cpp @@ -0,0 +1,45 @@ +#include "bitset_extensions.h" + +#include +#include + +namespace util::bitset_extensions { + +CONSTEXPR_IF_VECTOR_IS_CONSTEXPR unsigned char GetByte(unsigned long long val, size_t byte_num) { + return (val & kBytes[byte_num]) >> (byte_num * 8ul); +} + +size_t FindFirstFixedWidth(std::bitset const& bs) { + if (bs.none()) { + return kWidth; + } + unsigned long long val = bs.to_ullong(); + for (size_t byte_idx{0ul}; byte_idx < kNumBytes; ++byte_idx) { + auto byte = GetByte(val, byte_idx); + if (byte > 0ul) { + return byte_idx * 8ul + std::countr_zero(byte); + } + } + __builtin_unreachable(); +} + +size_t FindNextFixedWidth(std::bitset const& bs, size_t pos) { + if (bs.none()) { + return kWidth; + } + unsigned long long val = bs.to_ullong(); + size_t start_byte = pos / 8ul; + size_t bit_pos = pos % 8ul; + for (size_t byte_idx{start_byte}; byte_idx < kNumBytes; ++byte_idx) { + auto byte = GetByte(val, byte_idx); + if (byte_idx == start_byte) { + byte &= kFirstBits[bit_pos]; + } + if (byte > 0ul) { + return byte_idx * 8ul + std::countr_zero(byte); + } + } + return kWidth; +} + +} // namespace util::bitset_extensions diff --git a/src/core/util/bitset_extensions.h b/src/core/util/bitset_extensions.h new file mode 100644 index 0000000000..f30fa331a1 --- /dev/null +++ b/src/core/util/bitset_extensions.h @@ -0,0 +1,187 @@ +/* This file contains custom implementation of _Find_first and _Find_next gcc-specific methods +(which come from SGI extensions) of std::bitset for 64-bit bitsets. +These implementations are close to what is in SGI (and are competitive in terms of efficiency). +It shouldn't be so hard to adapt them for bitsets of any width -- see, for example, +https://cocode.se/c++/unsigned_split.html. +If you need _Find_first or _Find_next methods, consider using FindFirst and FindNext from this file. +FindFirst and FindNext are wrappers that use custom implementations if (and only if) gcc intrinsiscs +aren't availible. */ + +#pragma once + +#include +#include +#include + +#include + +namespace util { + +namespace bitset_extensions { + +static std::vector const kBytes{ + 0x00'00'00'00'00'00'00'ff, 0x00'00'00'00'00'00'ff'00, 0x00'00'00'00'00'ff'00'00, + 0x00'00'00'00'ff'00'00'00, 0x00'00'00'ff'00'00'00'00, 0x00'00'ff'00'00'00'00'00, + 0x00'ff'00'00'00'00'00'00, 0xff'00'00'00'00'00'00'00}; +static std::vector const kFirstBits{0b11111110, 0b11111100, 0b11111000, 0b11110000, + 0b11100000, 0b11000000, 0b10000000, 0b00000000}; +constexpr static size_t kNumBytes = 8; +constexpr static size_t kWidth = 64; + +#if (__cpp_lib_constexpr_vector == 201907L) +#define CONSTEXPR_IF_VECTOR_IS_CONSTEXPR constexpr +#else +#define CONSTEXPR_IF_VECTOR_IS_CONSTEXPR /* Ignore */ +#endif + +CONSTEXPR_IF_VECTOR_IS_CONSTEXPR unsigned char GetByte(unsigned long long val, size_t byte_num); + +size_t FindFirstFixedWidth(std::bitset const&); + +size_t FindNextFixedWidth(std::bitset const&, size_t pos); + +template +concept HasFindFirst = requires(Bitset bs) { bs._Find_first(); }; + +template +concept HasFindNext = requires(Bitset bs) { bs._Find_next(0); }; + +} // namespace bitset_extensions + +/// @brief Call bs._Find_first if it's availible, use custom implementation otherwise +template + requires bitset_extensions::HasFindFirst> +inline size_t FindFirst(std::bitset const& bs) noexcept { + return bs._Find_first(); +} + +/// @brief Call bs._Find_first if it's availible, use custom implementation otherwise +template +inline size_t FindFirst(std::bitset const& bs) noexcept { + if constexpr (S == 64) { + return bitset_extensions::FindFirstFixedWidth(bs); + } else { + // TODO(senichenkov): implement custom FindFirst for 256-bit (or custom width) bitsets + boost::dynamic_bitset<> dbs(bs.to_string()); + auto result = dbs.find_first(); + return result <= S ? result : S; + } +} + +/// @brief Call bs._Find_next if it's availible, use custom implementation otherwise +template + requires bitset_extensions::HasFindNext> +inline size_t FindNext(std::bitset const& bs, size_t pos) noexcept { + return bs._Find_next(pos); +} + +/// @brief Call bs._Find_next if it's availible, use custom implementation otherwise +template +inline size_t FindNext(std::bitset const& bs, size_t pos) noexcept { + if constexpr (S == 64) { + return bitset_extensions::FindNextFixedWidth(bs, pos); + } else { + // TODO(senichenkov): implement custom FindNext for 256-bit (or custom width) bitsets + boost::dynamic_bitset<> dbs(bs.to_string()); + auto result = dbs.find_next(pos); + return result <= S ? result : S; + } +} + +/// @brief If _Find_next is availible, copy every set bit, else copy biset to dynamic_bitset +/// through string representation. Bitset is shifted 1 bit left. +template + requires bitset_extensions::HasFindFirst> && + bitset_extensions::HasFindNext> +inline boost::dynamic_bitset<> CreateShiftedDynamicBitset(std::bitset const& bs, + std::size_t size = S) noexcept { + boost::dynamic_bitset<> dyn_bitset(size); + for (size_t i = bs._Find_first(); i != S; i = bs._Find_next(i)) { + if (i > 0) { + dyn_bitset.set(i - 1); + } + } + return dyn_bitset; +} + +/// @brief If _Find_next is availible, copy every set bit, else copy biset to dynamic_bitset +/// through string representation. Bitset is shifted 1 bit left. +template +inline boost::dynamic_bitset<> CreateShiftedDynamicBitset(std::bitset const& bs, + std::size_t size = S) noexcept { + size_t start = S - size - 1; + return boost::dynamic_bitset(bs.to_string(), start, size); +} + +/// @brief Wrapper for std::bitset to iterate through set bits using temporary +/// boost::dynamic_bitset. +template +class BitsetIterator { +private: + boost::dynamic_bitset<> bs_; + size_t pos_; + +public: + BitsetIterator(std::bitset const& bs) : bs_(bs.to_string()), pos_(bs_.find_first()) { + if (pos_ == boost::dynamic_bitset<>::npos) { + pos_ = bs_.size(); + } + } + + size_t Pos() const noexcept { + return pos_; + } + + void Next() noexcept { + pos_ = bs_.find_next(pos_); + if (pos_ == boost::dynamic_bitset<>::npos) { + pos_ = bs_.size(); + } + } +}; + +/// @brief Wrapper for std::bitset to iterate through set bits using GCC intrinsics. +/// If reference to bitset is invalidated, behaviour is undefined! +template + requires bitset_extensions::HasFindFirst> && + bitset_extensions::HasFindNext> +class BitsetIterator { +private: + std::bitset const& bs_; + size_t pos_; + +public: + BitsetIterator(std::bitset const& bs) : bs_(bs), pos_(bs_._Find_first()) {} + + size_t Pos() const noexcept { + return pos_; + } + + void Next() noexcept { + pos_ = bs_._Find_next(pos_); + } +}; + +/// @brief Wrapper for 64-bit std::bitset to iterate through set bits using custom implementations. +/// If reference to bitset is invalidated, behaviour is undefined! +template + requires(S == 64) && (!bitset_extensions::HasFindFirst>) && + (!bitset_extensions::HasFindNext>) +class BitsetIterator { +private: + std::bitset<64> const& bs_; + size_t pos_; + +public: + BitsetIterator(std::bitset<64> const& bs) : bs_(bs), pos_(FindFirst(bs_)) {} + + size_t Pos() const noexcept { + return pos_; + } + + void Next() noexcept { + pos_ = FindNext(bs_, pos_); + } +}; + +} // namespace util diff --git a/src/core/util/kdtree.h b/src/core/util/kdtree.h index 5e47d1b7de..761f3b394a 100644 --- a/src/core/util/kdtree.h +++ b/src/core/util/kdtree.h @@ -191,12 +191,12 @@ size_t KDTree::Size() const { template KDTree::KDTree(std::vector const& points) : KDTree() { - std::for_each(points.begin(), points.end(), this->Insert); + std::for_each(points.begin(), points.end(), &Insert); } template KDTree::KDTree(std::initializer_list const& points) : KDTree() { - std::for_each(points.begin(), points.end(), this->Insert); + std::for_each(points.begin(), points.end(), &Insert); } template diff --git a/src/core/util/maybe_unused_private_field.h b/src/core/util/maybe_unused_private_field.h new file mode 100644 index 0000000000..3d4842a286 --- /dev/null +++ b/src/core/util/maybe_unused_private_field.h @@ -0,0 +1,13 @@ +#pragma once + +// clang produces warning on unused private fields, so they need to be marked as [[maybe_unused]], +// but g++ doesn't recognize [[maybe_unused]] on class fields and produces warning. +// This macro expands to [[maybe_unused]], when compiler is clang, nop otherwise +// (see +// https://stackoverflow.com/questions/50646334/maybe-unused-on-member-variable-gcc-warns-incorrectly-that-attribute-is +// and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72789) +#ifdef __clang__ +#define MAYBE_UNUSED_PRIVATE_FIELD [[maybe_unused]] +#else +#define MAYBE_UNUSED_PRIVATE_FIELD /* Ignore */ +#endif diff --git a/src/core/util/worker_thread_pool.h b/src/core/util/worker_thread_pool.h index 9d4449245a..37e2cb9e9b 100644 --- a/src/core/util/worker_thread_pool.h +++ b/src/core/util/worker_thread_pool.h @@ -7,11 +7,11 @@ #include #include #include -#include #include #include #include "model/index.h" +#include "util/auto_join_thread.h" #include "util/barrier.h" #include "util/desbordante_assume.h" @@ -44,7 +44,7 @@ class WorkerThreadPool { }; Worker work_; - std::vector worker_threads_; + std::vector worker_threads_; std::vector> tasks_; util::Barrier barrier_; std::condition_variable working_var_; diff --git a/src/python_bindings/od/bind_od.cpp b/src/python_bindings/od/bind_od.cpp index 33ca161e5e..2d1f787c18 100644 --- a/src/python_bindings/od/bind_od.cpp +++ b/src/python_bindings/od/bind_od.cpp @@ -65,7 +65,7 @@ void BindOd(py::module_& main_module) { std::vector res; for (auto const& [lhs, rhs_list] : map_res) { for (AttributeList const& rhs : rhs_list) { - res.emplace_back(lhs, rhs); + res.push_back({lhs, rhs}); } } return res; diff --git a/src/python_bindings/py_util/bind_primitive.h b/src/python_bindings/py_util/bind_primitive.h index 76f933868b..3a4e741685 100644 --- a/src/python_bindings/py_util/bind_primitive.h +++ b/src/python_bindings/py_util/bind_primitive.h @@ -2,6 +2,8 @@ #include #include +#include +#include #include #include diff --git a/src/tests/test_algo_interfaces.cpp b/src/tests/test_algo_interfaces.cpp index 83a309d850..1e28dff3ae 100644 --- a/src/tests/test_algo_interfaces.cpp +++ b/src/tests/test_algo_interfaces.cpp @@ -15,6 +15,9 @@ namespace tests { struct KeysTestParams { std::vector const expected; CSVConfig const& csv_config; + + KeysTestParams(std::vector&& expected, CSVConfig const& csv_config) + : expected(expected), csv_config(csv_config) {} }; class KeysTest : public ::testing::TestWithParam {}; diff --git a/src/tests/test_dc_verifier.cpp b/src/tests/test_dc_verifier.cpp index 614d107da7..236c489f03 100644 --- a/src/tests/test_dc_verifier.cpp +++ b/src/tests/test_dc_verifier.cpp @@ -25,6 +25,9 @@ struct DCTestParams { std::string dc_string; CSVConfig csv_config; bool expected; + + DCTestParams(std::string&& dc_string, CSVConfig const& csv_config, bool expected) + : dc_string(dc_string), csv_config(csv_config), expected(expected) {} }; class TestDCVerifier : public ::testing::TestWithParam {}; diff --git a/src/tests/test_des.cpp b/src/tests/test_des.cpp index 2b58482245..e7bb4561e3 100644 --- a/src/tests/test_des.cpp +++ b/src/tests/test_des.cpp @@ -42,10 +42,11 @@ TEST_F(DESTest, LaunchTest1) { algos::des::DifferentialStrategy::rand1Bin); algorithm->Execute(); auto result = ExtractFitnessValues(algorithm->GetNARVector()); - std::vector expected = {"0.634851", "0.566887", "0.549872", "0.520035", - "0.518598", "0.481561", "0.460124", "0.407567", - "0.333972", "0.313191", "0.274753", "0.190558", - "0.187335", "0.161869", "0.113770", "0.111297"}; + std::vector expected = {"0.609963", "0.518598", "0.514174", "0.497616", "0.483237", + "0.444604", "0.418879", "0.341235", "0.321052", "0.310425", + "0.299355", "0.296755", "0.296456", "0.259754", "0.235430", + "0.213631", "0.192706", "0.186377", "0.168266", "0.164281", + "0.098144", "0.085561", "0.076805", "0.076043"}; ASSERT_EQ(result, expected); } @@ -54,7 +55,8 @@ TEST_F(DESTest, LaunchTest2) { algos::des::DifferentialStrategy::rand1Bin); algorithm->Execute(); auto result = ExtractFitnessValues(algorithm->GetNARVector()); - std::vector expected = {"0.735697", "0.622020", "0.606939", "0.564184"}; + std::vector expected = {"0.598929", "0.587854", "0.566134", "0.524618", + "0.522095", "0.489909", "0.376434"}; ASSERT_EQ(result, expected); } diff --git a/src/tests/test_ind_verifier.cpp b/src/tests/test_ind_verifier.cpp index 743322a304..c77e5f4258 100644 --- a/src/tests/test_ind_verifier.cpp +++ b/src/tests/test_ind_verifier.cpp @@ -23,6 +23,10 @@ struct INDVerifierTestConfig { algos::INDVerifier::RawIND ind; /* `std::nullopt` iff IND holds */ std::optional error_opt{}; + + INDVerifierTestConfig(CSVConfigs const& csv_configs, algos::INDVerifier::RawIND&& ind, + std::optional&& error_opt = {}) + : csv_configs(csv_configs), ind(std::move(ind)), error_opt(error_opt) {} }; namespace { diff --git a/src/tests/test_nd_verifier.cpp b/src/tests/test_nd_verifier.cpp index 9a7884d366..de9e2e3f87 100644 --- a/src/tests/test_nd_verifier.cpp +++ b/src/tests/test_nd_verifier.cpp @@ -51,14 +51,14 @@ INSTANTIATE_TEST_SUITE_P( NDVerifyingParams({1, 2, 3}, {6}, 2) )); -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( NDVerifierHeavyDatasets, TestNDVerifying, ::testing::Values( NDVerifyingParams({5}, {6}, 1000000, kIowa1kk), // I just want to see execution time. Real weight doesn't matter (but it shouldn't be very big) NDVerifyingParams({16, 17, 18}, {20, 23}, 1000000, kIowa1kk) // Also, I want to see how execution time depends on number of columns )); -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( NDVerifierTestNullEqualNull, TestNDVerifying, ::testing::Values( // 6-th column contains 2 values and 7 empty cells NDVerifyingParams({0}, {6}, 3, kTestND, true), diff --git a/src/tests/test_pfdtane.cpp b/src/tests/test_pfdtane.cpp index 1721ec7cf7..34b4cfc05d 100644 --- a/src/tests/test_pfdtane.cpp +++ b/src/tests/test_pfdtane.cpp @@ -32,6 +32,10 @@ struct PFDTaneValidationParams { std::vector fds; algos::PfdErrorMeasure error_measure; CSVConfig csv_config; + + PFDTaneValidationParams(std::vector&& fds, algos::PfdErrorMeasure const& error_measure, + CSVConfig const& csv_config) + : fds(fds), error_measure(error_measure), csv_config(csv_config) {} }; class TestPFDTaneMining : public ::testing::TestWithParam {}; @@ -65,9 +69,9 @@ INSTANTIATE_TEST_SUITE_P( PFDTaneTestMiningSuite, TestPFDTaneMining, ::testing::Values( PFDTaneMiningParams(44381, 0.3, +algos::PfdErrorMeasure::per_value, kTestFD), - PFDTaneMiningParams(39491, 0.1, +algos::PfdErrorMeasure::per_value, kIris), + PFDTaneMiningParams(19266, 0.1, +algos::PfdErrorMeasure::per_value, kIris), PFDTaneMiningParams(10695, 0.01, +algos::PfdErrorMeasure::per_value, kIris), - PFDTaneMiningParams(7893, 0.1, +algos::PfdErrorMeasure::per_value, kNeighbors10k), + PFDTaneMiningParams(44088, 0.1, +algos::PfdErrorMeasure::per_value, kNeighbors10k), PFDTaneMiningParams(41837, 0.01, +algos::PfdErrorMeasure::per_value, kNeighbors10k) )); diff --git a/src/tests/test_sfd.cpp b/src/tests/test_sfd.cpp index f754af2966..0392b6bda8 100644 --- a/src/tests/test_sfd.cpp +++ b/src/tests/test_sfd.cpp @@ -48,41 +48,41 @@ namespace tests { TEST(TestCordsUtils, FrequenciesOfIris) { std::vector>> expected = { - {{"7.400000", 34}, {"7.600000", 33}, {"4.300000", 32}, {"7.100000", 31}, - {"6.100000", 11}, {"5.400000", 8}, {"6.600000", 25}, {"6.400000", 7}, - {"5.800000", 5}, {"4.800000", 14}, {"5.200000", 18}, {"4.900000", 9}, - {"5.500000", 6}, {"5.700000", 4}, {"4.600000", 16}, {"5.100000", 2}, - {"6.000000", 10}, {"5.600000", 12}, {"4.500000", 27}, {"6.700000", 3}, - {"6.300000", 1}, {"6.500000", 13}, {"6.200000", 15}, {"7.300000", 30}, - {"7.900000", 29}, {"6.900000", 17}, {"6.800000", 20}, {"7.000000", 28}, - {"5.900000", 21}, {"4.700000", 24}, {"5.000000", 0}, {"4.400000", 22}, - {"7.700000", 19}, {"7.200000", 23}, {"5.300000", 26}}, - - {{"4.200000", 21}, {"4.400000", 19}, {"4.000000", 18}, {"2.400000", 16}, - {"3.700000", 15}, {"4.100000", 20}, {"3.600000", 14}, {"2.800000", 1}, + {{"4.300000", 34}, {"4.500000", 33}, {"5.300000", 32}, {"7.100000", 30}, + {"5.400000", 11}, {"6.600000", 24}, {"6.100000", 8}, {"7.600000", 27}, + {"5.800000", 6}, {"4.800000", 14}, {"5.200000", 18}, {"6.400000", 5}, + {"4.900000", 12}, {"5.500000", 7}, {"5.700000", 4}, {"4.600000", 19}, + {"5.100000", 2}, {"6.000000", 9}, {"5.600000", 10}, {"6.700000", 3}, + {"6.300000", 1}, {"6.500000", 13}, {"6.200000", 17}, {"7.700000", 15}, + {"7.200000", 20}, {"7.300000", 29}, {"6.900000", 16}, {"7.900000", 26}, + {"6.800000", 21}, {"7.000000", 31}, {"5.900000", 22}, {"4.700000", 25}, + {"5.000000", 0}, {"4.400000", 23}, {"7.400000", 28}}, + + {{"4.000000", 21}, {"4.200000", 19}, {"4.400000", 18}, {"2.400000", 15}, + {"4.100000", 20}, {"3.600000", 14}, {"3.700000", 13}, {"2.800000", 1}, {"3.800000", 8}, {"3.200000", 2}, {"3.900000", 17}, {"3.100000", 4}, {"3.300000", 10}, {"2.900000", 5}, {"2.000000", 22}, {"2.500000", 7}, - {"3.400000", 3}, {"3.500000", 9}, {"2.300000", 12}, {"2.200000", 13}, + {"3.400000", 3}, {"3.500000", 9}, {"2.300000", 12}, {"2.200000", 16}, {"3.000000", 0}, {"2.700000", 6}, {"2.600000", 11}}, - {{"6.900000", 42}, {"6.300000", 41}, {"6.600000", 40}, {"1.000000", 39}, - {"1.100000", 38}, {"3.000000", 37}, {"3.600000", 34}, {"5.400000", 32}, - {"6.700000", 31}, {"5.300000", 30}, {"5.900000", 29}, {"4.900000", 7}, - {"5.800000", 15}, {"3.700000", 36}, {"5.200000", 27}, {"4.800000", 10}, - {"1.900000", 28}, {"4.500000", 3}, {"1.300000", 4}, {"5.100000", 2}, - {"6.000000", 23}, {"1.600000", 5}, {"4.400000", 12}, {"5.000000", 11}, - {"6.400000", 35}, {"3.500000", 24}, {"1.500000", 0}, {"4.200000", 13}, - {"6.100000", 17}, {"3.800000", 33}, {"1.400000", 1}, {"5.700000", 16}, - {"4.600000", 19}, {"5.500000", 18}, {"1.200000", 22}, {"4.100000", 20}, - {"4.000000", 8}, {"5.600000", 6}, {"3.900000", 21}, {"4.700000", 9}, - {"1.700000", 14}, {"3.300000", 25}, {"4.300000", 26}}, - - {{"0.600000", 20}, {"1.700000", 19}, {"1.100000", 18}, {"1.900000", 13}, - {"0.200000", 0}, {"2.400000", 16}, {"1.300000", 1}, {"2.100000", 10}, - {"1.800000", 2}, {"2.200000", 15}, {"0.400000", 6}, {"1.500000", 3}, - {"0.100000", 9}, {"1.400000", 4}, {"2.300000", 5}, {"0.300000", 7}, - {"0.500000", 21}, {"2.500000", 17}, {"1.600000", 14}, {"2.000000", 11}, - {"1.200000", 12}, {"1.000000", 8}}, + {{"1.000000", 42}, {"1.100000", 41}, {"3.000000", 40}, {"3.600000", 39}, + {"6.300000", 36}, {"6.600000", 34}, {"6.900000", 33}, {"5.900000", 24}, + {"4.900000", 7}, {"4.500000", 3}, {"1.300000", 5}, {"1.900000", 31}, + {"4.800000", 11}, {"5.800000", 16}, {"3.700000", 38}, {"5.200000", 27}, + {"5.400000", 25}, {"5.100000", 2}, {"6.000000", 23}, {"1.600000", 4}, + {"4.400000", 12}, {"5.000000", 10}, {"6.700000", 22}, {"6.400000", 35}, + {"3.500000", 29}, {"1.500000", 0}, {"4.200000", 13}, {"6.100000", 15}, + {"3.300000", 30}, {"4.700000", 8}, {"1.700000", 14}, {"3.800000", 37}, + {"1.400000", 1}, {"5.700000", 17}, {"4.600000", 19}, {"5.500000", 18}, + {"1.200000", 32}, {"4.100000", 20}, {"4.000000", 9}, {"5.600000", 6}, + {"3.900000", 21}, {"5.300000", 26}, {"4.300000", 28}}, + + {{"0.600000", 20}, {"1.700000", 19}, {"1.100000", 18}, {"1.200000", 13}, + {"0.200000", 0}, {"2.400000", 16}, {"1.300000", 1}, {"2.100000", 9}, + {"1.800000", 2}, {"2.200000", 17}, {"0.400000", 7}, {"1.500000", 3}, + {"0.100000", 11}, {"2.300000", 4}, {"1.400000", 5}, {"1.000000", 6}, + {"0.300000", 8}, {"0.500000", 21}, {"2.500000", 15}, {"1.600000", 14}, + {"2.000000", 10}, {"1.900000", 12}}, {{"Iris-setosa", 2}, {"Iris-versicolor", 1}, {"Iris-virginica", 0}}}; diff --git a/src/tests/test_tane_afd_measures.cpp b/src/tests/test_tane_afd_measures.cpp index d98da1d030..8baf14925c 100644 --- a/src/tests/test_tane_afd_measures.cpp +++ b/src/tests/test_tane_afd_measures.cpp @@ -37,6 +37,10 @@ struct TaneValidationParams { algos::AfdErrorMeasure error_measure; std::vector afds; CSVConfig csv_config; + + TaneValidationParams(algos::AfdErrorMeasure error_measure, std::vector&& afds, + CSVConfig const& csv_config) + : error_measure(error_measure), afds(afds), csv_config(csv_config) {} }; struct ColumnErr { @@ -47,6 +51,9 @@ struct ColumnErr { struct PdepSelfValidationParams { std::vector errors; CSVConfig csv_config; + + PdepSelfValidationParams(std::vector&& errors, CSVConfig const& csv_config) + : errors(errors), csv_config(csv_config) {} }; class TestTanePdepSelfValidation : public ::testing::TestWithParam {}; diff --git a/src/tests/test_typed_column_data.cpp b/src/tests/test_typed_column_data.cpp index 25f86c01ee..85a6847db5 100644 --- a/src/tests/test_typed_column_data.cpp +++ b/src/tests/test_typed_column_data.cpp @@ -17,6 +17,9 @@ using mo::TypeId; struct TypeParsingParams { std::vector expected; CSVConfig const& csv_config; + + TypeParsingParams(std::vector&& expected, CSVConfig const& csv_config) + : expected(std::move(expected)), csv_config(csv_config) {} }; class TestTypeParsing : public ::testing::TestWithParam {}; diff --git a/src/tests/test_types.cpp b/src/tests/test_types.cpp index c0ccf2917f..d11a79deb2 100644 --- a/src/tests/test_types.cpp +++ b/src/tests/test_types.cpp @@ -97,8 +97,8 @@ TYPED_TEST(TestNumeric, Negate) { }; test(0); - test(-123.5); - test(321.4); + test(typename TypeParam::UnderlyingType(-123.5)); + test(typename TypeParam::UnderlyingType(321.4)); } TYPED_TEST(TestNumeric, Abs) { @@ -108,8 +108,8 @@ TYPED_TEST(TestNumeric, Abs) { }; test(0); - test(-123.5); - test(321.4); + test(typename TypeParam::UnderlyingType(-123.5)); + test(typename TypeParam::UnderlyingType(321.4)); } TYPED_TEST(TestNumeric, Add) { @@ -135,7 +135,7 @@ TYPED_TEST(TestNumeric, Div) { test(0, 100); test(22, 1); test(123, 321); - test(11.4, 3.14); + test(Type(11.4), Type(3.14)); test(-102, 11); test(-123, 123); test(-21, -7); @@ -150,7 +150,7 @@ TYPED_TEST(TestNumeric, Sub) { test(0, 100); test(22, 12); test(123, 321); - test(2.72, 1.3123141); + test(Type(2.72), Type(1.3123141)); test(-102, 11); test(-123, 123); test(-21, -7); @@ -166,7 +166,7 @@ TYPED_TEST(TestNumeric, Mul) { test(100, 0); test(22, 12); test(123, 321); - test(2.72, 1.3123141); + test(Type(2.72), Type(1.3123141)); test(-102, 11); test(-123, 123); test(-21, -7); @@ -182,10 +182,13 @@ TYPED_TEST(TestNumeric, Pow) { test(0, 100); test(22, 12); - test(123, 321); - test(2.72, 1.3123141); - test(-102, 11); - test(-123, 123); + test(Type(2.72), 1.3123141); + // 123^321, -102^11 and -123^123 won't fit into long (i. e. IntType) -- it's UB + if constexpr (!std::is_base_of_v) { + test(123, 321); + test(-102, 11); + test(-123, 123); + } test(-21, -7); } @@ -200,7 +203,7 @@ TYPED_TEST(TestNumeric, Dist) { test(0, 100); test(22, 12); test(123, 321); - test(2.72, 1.3123141); + test(Type(2.72), Type(1.3123141)); test(-102, 11); test(-123, 123); test(-21, -7); @@ -214,8 +217,8 @@ TYPED_TEST(TestNumeric, ValueToString) { test(0); test(123); - test(3.14123123182387); - test(-1231.123456678987654321); + test(typename TypeParam::UnderlyingType(3.14123123182387)); + test(typename TypeParam::UnderlyingType(-1231.123456678987654321)); } struct TestStringParam { diff --git a/src/tests/test_util.cpp b/src/tests/test_util.cpp index db5e5ff08a..999ef14917 100644 --- a/src/tests/test_util.cpp +++ b/src/tests/test_util.cpp @@ -79,7 +79,7 @@ TEST(pliIntersectChecker, first) { } TEST(testingBitsetToLonglong, first) { - size_t encoded_num = 1254; + unsigned long encoded_num = 1254; boost::dynamic_bitset<> simple_bitset{20, encoded_num}; auto res_vector = *model::ListAgreeSetSample::BitSetToLongLongVector(simple_bitset); diff --git a/ub_sanitizer_ignore_list.txt b/ub_sanitizer_ignore_list.txt new file mode 100644 index 0000000000..b75b0bd24c --- /dev/null +++ b/ub_sanitizer_ignore_list.txt @@ -0,0 +1,6 @@ +# Disable UB sanitizer, "Indirect call of function through a pointer of the wrong type" check: +[function] +# in all files: +src:* +# enable again only in our code: +src:src/*=sanitize