Skip to content

Commit

Permalink
Reorder Jacobian calculation (#707)
Browse files Browse the repository at this point in the history
* calculate jacobian by column

* account for multiple of the same reactant in Jacobian calculations

* relax tolerance for terminator test slightly

* update jitted jacobian function

* update cuda jacobian calcs

* debug cuda code

* simplify the singleton call

* fix jacobian algorithm

* use iterators in jacobian function

---------

Co-authored-by: Jian Sun <[email protected]>
  • Loading branch information
mattldawson and sjsprecious authored Feb 11, 2025
1 parent b342305 commit cbdc7bf
Show file tree
Hide file tree
Showing 12 changed files with 507 additions and 347 deletions.
7 changes: 6 additions & 1 deletion .github/workflows/docker_and_coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,14 @@ jobs:

- name: Run tests in container
# only run this if we are not running coverage tests or have built the cuda files
if: matrix.dockerfile != 'Dockerfile.coverage' && matrix.dockerfile != 'Dockerfile.nvhpc'
if: matrix.dockerfile != 'Dockerfile.coverage' && matrix.dockerfile != 'Dockerfile.nvhpc' && matrix.dockerfile != 'Dockerfile.llvm'
run: docker run --name test-container -t micm bash -c 'make test ARGS="--rerun-failed --output-on-failure -j8"'

- name: Run tests using LLVM in container
# excludes some very expensive memcheck tests
if: matrix.dockerfile == 'Dockerfile.llvm'
run: docker run --name test-container -t micm bash -c 'make test ARGS="--rerun-failed --output-on-failure -j8 -E \"memcheck_jit_linear_solver|memcheck_analytical_rosenbrock_integration\""'

- name: Run coverage tests in container
if: matrix.dockerfile == 'Dockerfile.coverage'
run: docker run --name test-container -t micm bash -c 'make coverage ARGS="--rerun-failed --output-on-failure -j8"'
Expand Down
21 changes: 12 additions & 9 deletions .github/workflows/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ jobs:
runs-on: windows-2019
strategy:
matrix:
build_type: [Release]
architecture: [x64]

steps:
Expand All @@ -27,15 +28,15 @@ jobs:
version: 12.2.0 # https://github.com/egor-tensin/setup-mingw/issues/14

- name: Run Cmake
run: cmake -S . -B build -G "MinGW Makefiles"
run: cmake -S . -B build -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -G "MinGW Makefiles"

- name: Build
run: cmake --build build --parallel 10
run: cmake --build build --config ${{ matrix.build_type }} --parallel 10

- name: Run tests
run: |
cd build
ctest -C Debug --rerun-failed --output-on-failure . --verbose
ctest -C ${{ matrix.build_type }} --rerun-failed --output-on-failure . --verbose
msvc2022:
runs-on: windows-2022
Expand All @@ -59,31 +60,33 @@ jobs:
continue-on-error: true
strategy:
matrix:
build_type: [Release]
version: [11, 12, 13, 14, 15]

steps:
- uses: actions/checkout@v4
- name: Install Clang
run: curl -fsSL -o LLVM${{ matrix.version }}.exe https://github.com/llvm/llvm-project/releases/download/llvmorg-${{ matrix.version }}.0.0/LLVM-${{ matrix.version }}.0.0-win64.exe ; 7z x LLVM${{ matrix.version }}.exe -y -o"C:/Program Files/LLVM"
- name: Run CMake
run: cmake -S . -B build -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang++.exe" -G"MinGW Makefiles"
run: cmake -S . -B build -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang++.exe" -G"MinGW Makefiles"
- name: Build
run: cmake --build build --parallel 10
run: cmake --build build --config ${{ matrix.build_type }} --parallel 10
- name: Test
run: cd build ; ctest -j 10 -C Debug --exclude-regex "test-unicode" --output-on-failure
run: cd build ; ctest -j 10 -C ${{ matrix.build_type }} --exclude-regex "test-unicode" --output-on-failure

clang-cl-11:
runs-on: windows-2019
continue-on-error: true
strategy:
matrix:
build_type: [Release]
architecture: [x64]

steps:
- uses: actions/checkout@v4
- name: Run CMake
run: cmake -S . -B build -G "Visual Studio 16 2019" -A ${{ matrix.architecture }} -T ClangCL
run: cmake -S . -B build -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -G "Visual Studio 16 2019" -A ${{ matrix.architecture }} -T ClangCL
- name: Build
run: cmake --build build --config Debug --parallel 10
run: cmake --build build --config ${{ matrix.build_type }} --parallel 10
- name: Test
run: cd build ; ctest -j 10 -C Debug --exclude-regex "test-unicode" --output-on-failure
run: cd build ; ctest -j 10 -C ${{ matrix.build_type }} --exclude-regex "test-unicode" --output-on-failure
2 changes: 1 addition & 1 deletion docker/Dockerfile.nvhpc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ RUN mkdir /build \
-D MICM_ENABLE_CUDA=ON \
-D MICM_GPU_TYPE="a100" \
../micm \
&& make
&& make -j 8

WORKDIR /build
7 changes: 3 additions & 4 deletions include/micm/cuda/process/cuda_process_set.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,9 @@ namespace micm
/// members of class "CudaProcessSet" on the device
void FreeConstData(ProcessSetParam& devstruct);

/// This is the function that will copy the "jacobian_flat_id"
/// of class "ProcessSet" to the device, after the matrix
/// structure is known
void CopyJacobiFlatId(ProcessSetParam& hoststruct, ProcessSetParam& devstruct);
/// This is the function that will copy the information needed
/// to calculated Jacobian terms to the device
void CopyJacobianParams(ProcessSetParam& hoststruct, ProcessSetParam& devstruct);

/// This is the host function that will call the CUDA kernel
/// to calculate the forcing terms
Expand Down
20 changes: 19 additions & 1 deletion include/micm/cuda/process/cuda_process_set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,29 @@ namespace micm
micm::ProcessSet::SetJacobianFlatIds(matrix);

ProcessSetParam hoststruct;
std::vector<ProcessInfoParam> jacobian_process_info(this->jacobian_process_info_.size());
std::size_t i_process = 0;
for (const auto & process_info : this->jacobian_process_info_)
{
jacobian_process_info[i_process].process_id_ = process_info.process_id_;
jacobian_process_info[i_process].independent_id_ = process_info.independent_id_;
jacobian_process_info[i_process].number_of_dependent_reactants_ = process_info.number_of_dependent_reactants_;
jacobian_process_info[i_process].number_of_products_ = process_info.number_of_products_;
++i_process;
}
hoststruct.jacobian_process_info_ = jacobian_process_info.data();
hoststruct.jacobian_process_info_size_ = jacobian_process_info.size();
hoststruct.jacobian_reactant_ids_ = this->jacobian_reactant_ids_.data();
hoststruct.jacobian_reactant_ids_size_ = this->jacobian_reactant_ids_.size();
hoststruct.jacobian_product_ids_ = this->jacobian_product_ids_.data();
hoststruct.jacobian_product_ids_size_ = this->jacobian_product_ids_.size();
hoststruct.jacobian_yields_ = this->jacobian_yields_.data();
hoststruct.jacobian_yields_size_ = this->jacobian_yields_.size();
hoststruct.jacobian_flat_ids_ = this->jacobian_flat_ids_.data();
hoststruct.jacobian_flat_ids_size_ = this->jacobian_flat_ids_.size();

// Copy the data from host struct to device struct
micm::cuda::CopyJacobiFlatId(hoststruct, this->devstruct_);
micm::cuda::CopyJacobianParams(hoststruct, this->devstruct_);
}

template<class MatrixPolicy>
Expand Down
17 changes: 17 additions & 0 deletions include/micm/cuda/util/cuda_param.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,15 @@
// make sure to choose the BLOCK_SIZE from [32, 64, 128, 256, 512, 1024]
const std::size_t BLOCK_SIZE = 512;

/// This struct holds information about a process for the Jacobian calculation
struct ProcessInfoParam
{
std::size_t process_id_;
std::size_t independent_id_;
std::size_t number_of_dependent_reactants_;
std::size_t number_of_products_;
};

/// This struct holds the (1) pointer to, and (2) size of
/// each constatnt data member from the class "ProcessSet";
/// This struct could be allocated on the host or device;
Expand All @@ -19,12 +28,20 @@ struct ProcessSetParam
std::size_t* number_of_products_ = nullptr;
std::size_t* product_ids_ = nullptr;
double* yields_ = nullptr;
ProcessInfoParam* jacobian_process_info_ = nullptr;
std::size_t* jacobian_reactant_ids_ = nullptr;
std::size_t* jacobian_product_ids_ = nullptr;
double* jacobian_yields_ = nullptr;
std::size_t* jacobian_flat_ids_ = nullptr;
std::size_t number_of_reactants_size_;
std::size_t reactant_ids_size_;
std::size_t number_of_products_size_;
std::size_t product_ids_size_;
std::size_t yields_size_;
std::size_t jacobian_process_info_size_;
std::size_t jacobian_reactant_ids_size_;
std::size_t jacobian_product_ids_size_;
std::size_t jacobian_yields_size_;
std::size_t jacobian_flat_ids_size_;
};

Expand Down
127 changes: 60 additions & 67 deletions include/micm/jit/process/jit_process_set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,81 +222,74 @@ namespace micm
llvm::AllocaInst *rate_array = func.builder_->CreateAlloca(
rate_array_type, llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, 1)), "d_rate_d_ind_array");

auto react_ids = reactant_ids_.begin();
auto yields = yields_.begin();
auto react_ids = jacobian_reactant_ids_.begin();
auto yields = jacobian_yields_.begin();
auto flat_id = jacobian_flat_ids_.begin();
for (std::size_t i_rxn = 0; i_rxn < number_of_reactants_.size(); ++i_rxn)
for (const auto& process_info : jacobian_process_info_)
{
llvm::Value *rc_start = llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, i_rxn * L));
for (std::size_t i_ind = 0; i_ind < number_of_reactants_[i_rxn]; ++i_ind)
llvm::Value *rc_start = llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, process_info.process_id_ * L));
// save rate constant in d_rate_d_ind for each grid cell
auto loop = func.StartLoop("rate constant", 0, L);
llvm::Value *ptr_index[1];
ptr_index[0] = func.builder_->CreateNSWAdd(loop.index_, rc_start);
llvm::Value *rate_const = func.GetArrayElement(func.arguments_[0], ptr_index, JitType::Double);
llvm::Value *array_index[2];
array_index[0] = zero;
array_index[1] = loop.index_;
llvm::Value *rate_ptr = func.builder_->CreateInBoundsGEP(rate_array_type, rate_array, array_index);
func.builder_->CreateStore(rate_const, rate_ptr);
func.EndLoop(loop);

// d_rate_d_ind[i_cell] *= reactant_concentration for each reactant except ind
for (std::size_t i_react = 0; i_react < process_info.number_of_dependent_reactants_; ++i_react)
{
loop = func.StartLoop("d_rate_d_ind calc", 0, L);
llvm::Value *react_id = llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, react_ids[i_react] * L));
ptr_index[0] = func.builder_->CreateNSWAdd(loop.index_, react_id);
llvm::Value *react_conc = func.GetArrayElement(func.arguments_[1], ptr_index, JitType::Double);
array_index[1] = loop.index_;
rate_ptr = func.builder_->CreateInBoundsGEP(rate_array_type, rate_array, array_index);
llvm::Value *rate = func.builder_->CreateLoad(double_type, rate_ptr, "d_rate_d_ind");
rate = func.builder_->CreateFMul(rate, react_conc, "d_rate_d_ind");
func.builder_->CreateStore(rate, rate_ptr);
func.EndLoop(loop);
}

// set jacobian terms for each reactant jac[i_react][i_ind][i_cell] += d_rate_d_ind[i_cell]
for (std::size_t i_dep = 0; i_dep < process_info.number_of_dependent_reactants_+1; ++i_dep)
{
// save rate constant in d_rate_d_ind for each grid cell
auto loop = func.StartLoop("rate constant", 0, L);
llvm::Value *ptr_index[1];
ptr_index[0] = func.builder_->CreateNSWAdd(loop.index_, rc_start);
llvm::Value *rate_const = func.GetArrayElement(func.arguments_[0], ptr_index, JitType::Double);
llvm::Value *array_index[2];
array_index[0] = zero;
loop = func.StartLoop("reactant term", 0, L);
llvm::Value *dep_id = llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, *(flat_id++)));
ptr_index[0] = func.builder_->CreateNSWAdd(loop.index_, dep_id);
llvm::Value *dep_jac_ptr = func.builder_->CreateGEP(double_type, func.arguments_[2].ptr_, ptr_index);
llvm::Value *dep_jac = func.builder_->CreateLoad(double_type, dep_jac_ptr, "reactant jacobian term");
array_index[1] = loop.index_;
llvm::Value *rate_ptr = func.builder_->CreateInBoundsGEP(rate_array_type, rate_array, array_index);
func.builder_->CreateStore(rate_const, rate_ptr);
rate_ptr = func.builder_->CreateInBoundsGEP(rate_array_type, rate_array, array_index);
llvm::Value *rate = func.builder_->CreateLoad(double_type, rate_ptr, "d_rate_d_ind");
dep_jac = func.builder_->CreateFAdd(dep_jac, rate, "reactant jacobian term");
func.builder_->CreateStore(dep_jac, dep_jac_ptr);
func.EndLoop(loop);
}

// d_rate_d_ind[i_cell] *= reactant_concentration for each reactant except ind
for (std::size_t i_react = 0; i_react < number_of_reactants_[i_rxn]; ++i_react)
{
if (i_react == i_ind)
{
continue;
}
loop = func.StartLoop("d_rate_d_ind calc", 0, L);
llvm::Value *react_id = llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, react_ids[i_react] * L));
ptr_index[0] = func.builder_->CreateNSWAdd(loop.index_, react_id);
llvm::Value *react_conc = func.GetArrayElement(func.arguments_[1], ptr_index, JitType::Double);
array_index[1] = loop.index_;
rate_ptr = func.builder_->CreateInBoundsGEP(rate_array_type, rate_array, array_index);
llvm::Value *rate = func.builder_->CreateLoad(double_type, rate_ptr, "d_rate_d_ind");
rate = func.builder_->CreateFMul(rate, react_conc, "d_rate_d_ind");
func.builder_->CreateStore(rate, rate_ptr);
func.EndLoop(loop);
}

// set jacobian terms for each reactant jac[i_react][i_ind][i_cell] += d_rate_d_ind[i_cell]
for (std::size_t i_dep = 0; i_dep < number_of_reactants_[i_rxn]; ++i_dep)
// set jacobian terms for each product jac[i_prod][i_ind][i_cell] -= yield * d_rate_d_ind[i_cell]
for (std::size_t i_dep = 0; i_dep < process_info.number_of_products_; ++i_dep)
{
loop = func.StartLoop("reactant term", 0, L);
llvm::Value *dep_id = llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, *(flat_id++)));
ptr_index[0] = func.builder_->CreateNSWAdd(loop.index_, dep_id);
llvm::Value *dep_jac_ptr = func.builder_->CreateGEP(double_type, func.arguments_[2].ptr_, ptr_index);
llvm::Value *dep_jac = func.builder_->CreateLoad(double_type, dep_jac_ptr, "reactant jacobian term");
array_index[1] = loop.index_;
rate_ptr = func.builder_->CreateInBoundsGEP(rate_array_type, rate_array, array_index);
llvm::Value *rate = func.builder_->CreateLoad(double_type, rate_ptr, "d_rate_d_ind");
dep_jac = func.builder_->CreateFAdd(dep_jac, rate, "reactant jacobian term");
func.builder_->CreateStore(dep_jac, dep_jac_ptr);
func.EndLoop(loop);
}

// set jacobian terms for each product jac[i_prod][i_ind][i_cell] -= yield * d_rate_d_ind[i_cell]
for (std::size_t i_dep = 0; i_dep < number_of_products_[i_rxn]; ++i_dep)
{
loop = func.StartLoop("product term", 0, L);
llvm::Value *dep_id = llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, *(flat_id++)));
ptr_index[0] = func.builder_->CreateNSWAdd(loop.index_, dep_id);
llvm::Value *dep_jac_ptr = func.builder_->CreateGEP(double_type, func.arguments_[2].ptr_, ptr_index);
llvm::Value *dep_jac = func.builder_->CreateLoad(double_type, dep_jac_ptr, "product jacobian term");
array_index[1] = loop.index_;
rate_ptr = func.builder_->CreateInBoundsGEP(rate_array_type, rate_array, array_index);
llvm::Value *rate = func.builder_->CreateLoad(double_type, rate_ptr, "d_rate_d_ind");
llvm::Value *yield = llvm::ConstantFP::get(*(func.context_), llvm::APFloat(yields[i_dep]));
rate = func.builder_->CreateFMul(rate, yield, "product yield");
dep_jac = func.builder_->CreateFSub(dep_jac, rate, "product jacobian term");
func.builder_->CreateStore(dep_jac, dep_jac_ptr);
func.EndLoop(loop);
}
loop = func.StartLoop("product term", 0, L);
llvm::Value *dep_id = llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, *(flat_id++)));
ptr_index[0] = func.builder_->CreateNSWAdd(loop.index_, dep_id);
llvm::Value *dep_jac_ptr = func.builder_->CreateGEP(double_type, func.arguments_[2].ptr_, ptr_index);
llvm::Value *dep_jac = func.builder_->CreateLoad(double_type, dep_jac_ptr, "product jacobian term");
array_index[1] = loop.index_;
rate_ptr = func.builder_->CreateInBoundsGEP(rate_array_type, rate_array, array_index);
llvm::Value *rate = func.builder_->CreateLoad(double_type, rate_ptr, "d_rate_d_ind");
llvm::Value *yield = llvm::ConstantFP::get(*(func.context_), llvm::APFloat(yields[i_dep]));
rate = func.builder_->CreateFMul(rate, yield, "product yield");
dep_jac = func.builder_->CreateFSub(dep_jac, rate, "product jacobian term");
func.builder_->CreateStore(dep_jac, dep_jac_ptr);
func.EndLoop(loop);
}
react_ids += number_of_reactants_[i_rxn];
yields += number_of_products_[i_rxn];
react_ids += process_info.number_of_dependent_reactants_;
yields += process_info.number_of_products_;
}
func.builder_->CreateRetVoid();

Expand Down
Loading

0 comments on commit cbdc7bf

Please sign in to comment.