Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: remove omp as a dependency #42

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/build
/.cache
/.vscode
19 changes: 0 additions & 19 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,6 @@ option(PIBENCH_BUILD_LEVELDB "Build LevelDB wrapper" OFF)

include(CTest)

find_package(OpenMP REQUIRED)

if(CMAKE_C_COMPILER_ID MATCHES "Clang")
set(OpenMP_C "${CMAKE_C_COMPILER}")
set(OpenMP_C_FLAGS "-fopenmp=libomp")
set(OpenMP_C_LIB_NAMES "libomp" "libgomp" "libiomp5")
set(OpenMP_libomp_LIBRARY ${OpenMP_C_LIB_NAMES})
set(OpenMP_libgomp_LIBRARY ${OpenMP_C_LIB_NAMES})
set(OpenMP_libiomp5_LIBRARY ${OpenMP_C_LIB_NAMES})
endif()
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(OpenMP_CXX "${CMAKE_CXX_COMPILER}")
set(OpenMP_CXX_FLAGS "-fopenmp=libomp")
set(OpenMP_CXX_LIB_NAMES "libomp" "libgomp" "libiomp5")
set(OpenMP_libomp_LIBRARY ${OpenMP_CXX_LIB_NAMES})
set(OpenMP_libgomp_LIBRARY ${OpenMP_CXX_LIB_NAMES})
set(OpenMP_libiomp5_LIBRARY ${OpenMP_CXX_LIB_NAMES})
endif()

######################## Intel PCM ########################
add_custom_command(OUTPUT libPCM.a
COMMAND make lib
Expand Down
14 changes: 0 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,20 +72,6 @@ If so, you can comment the following line in `pcm/Makefile`:
CXXFLAGS += -DPCM_USE_PERF
```

# OpenMP
PiBench uses OpenMP internally for multithreading.
The environment variable `OMP_NESTED=true` must be set to guarantee correctness.
Check [here](https://docs.microsoft.com/en-us/cpp/parallel/openmp/reference/openmp-environment-variables?view=vs-2019#omp-nested) for details.

Other environment variables such as [`OMP_PLACES`](https://gnu.huihoo.org/gcc/gcc-4.9.4/libgomp/OMP_005fPLACES.html#OMP_005fPLACES) and [`OMP_PROC_BIND`](https://gnu.huihoo.org/gcc/gcc-4.9.4/libgomp/OMP_005fPROC_005fBIND.html) can be set to control the multithreaded behavior.

For example:

`$ OMP_PLACES=cores OMP_PROC_BIND=true OMP_NESTED=true ./PiBench [...]`

Note for Clang users: you may need to additionally install OpenMP runtime, on Arch Linux this can be done by installing the package `extra/openmp`.


# Running
The `PiBench` executable is generated and supports the following arguments:
```
Expand Down
4 changes: 2 additions & 2 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@ jobs:
Clang-6.0 Debug:
CC: clang-6.0
CXX: clang++-6.0
Packages: clang-6.0 libomp5 libomp-dev
Packages: clang-6.0
BuildType: Debug

Clang-6.0 Release:
CC: clang-6.0
CXX: clang++-6.0
Packages: clang-6.0 libomp5 libomp-dev
Packages: clang-6.0
BuildType: Release

steps:
Expand Down
166 changes: 166 additions & 0 deletions include/utils.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
#ifndef __UTILS_HPP__
#define __UTILS_HPP__

#include <atomic>
#include <cassert>
#include <condition_variable>
#include <cstdint>
#include <functional>
#include <iostream>
#include <mutex>
#include <thread>
#include <vector>
#include <algorithm>
#include <sstream>
#include <iterator>

namespace PiBench
{
Expand Down Expand Up @@ -150,6 +161,161 @@ namespace utils
asm volatile("" : : "g"(vptr) : "memory");
}
}

/**
* @brief implements a thread barrier
*/
class barrier {
public:
/**
* @brief Construct a new barrier object
*
* @param Threshold threshold of the barrier
*/
explicit barrier(std::uint64_t Threshold)
: threshold(Threshold), capacity(Threshold) {}

/**
* @brief 'holds' the threads untill specified number of threads arrive at
* the barrier. releases waiting threads at the same time.
*/
void arriveAndWait() {
std::unique_lock<std::mutex> lock(mtx);
uint64_t localGeneration = generation;
capacity--;

if (capacity == 0) {
generation++;
capacity = threshold;
cv.notify_all();
} else {
cv.wait(lock, [this, localGeneration] {
return localGeneration != generation;
});
}
}

private:
std::uint64_t threshold;
std::uint64_t capacity;
// used for preventing spurious wakeups
std::uint64_t generation = 0;
std::mutex mtx;
std::condition_variable cv;
};

/**
* @brief performs mathematical divison operation
*
* @param dividend dividend
* @param divisor divisor
* @return std::pair<uint64_t, uint64_t> pair containing quotient &
* remainder respectively
*/
inline std::pair<uint64_t, uint64_t> divide(const uint64_t dividend,
const uint64_t divisor) {
return {dividend / divisor, dividend % divisor};
};

/**
* @brief get id of current thread
*
* @return uint32_t thread id
*/
inline uint32_t getThreadId() {
return std::hash<std::thread::id>{}(std::this_thread::get_id());
}

//! cores threads should be pinned to. is in utils & not in `opt` to make
//! it easier to call `setAffinity` from outside of PiBench.
inline std::vector<uint32_t> cores;

/**
* @brief set affinity of a thread
*
* @param threadId id of the thread to pin
* @return true if affinity set successfully
* @return false if failed to set affinity
*/
inline bool setAffinity(uint32_t threadId = getThreadId()) {
if (cores.empty()) {
return false;
};

int myCpuId = cores[threadId % cores.size()];
cpu_set_t mySet;
CPU_ZERO(&mySet);
CPU_SET(myCpuId, &mySet);
sched_setaffinity(0, sizeof(cpu_set_t), &mySet);
return true;
}

/**
* @brief runs a for loop parallelly. the work load is equally divided among
* spcified number of threads.
*
* @param threadNum number of threads that should spawned for the workload
* @param preLoopTask this function is called right before the loop is
* executed
* @param task this function is called in the for loop
* @param iterations number of iterations for loop should perform
*/
inline void parallelForLoop(
const uint64_t threadNum, const std::vector<uint32_t> &cores,
const std::function<void(uint64_t)> &preLoopTask,
const std::function<void(uint64_t)> &task, const uint64_t iterations) {
std::vector<std::thread> threads;
barrier barr(threadNum);
const auto partitionedIterations = divide(iterations, threadNum);

for (uint64_t j = 0; j < threadNum; j++) {
threads.emplace_back(std::thread([&, partitionedIterations, j]() {
setAffinity();
uint64_t localThreadId = j;
barr.arriveAndWait();

uint64_t threadLoad = j == 0 ? partitionedIterations.first +
partitionedIterations.second
: partitionedIterations.first;
try {
preLoopTask(localThreadId);
} catch (std::exception &ex) {
std::cerr << "exception thrown in the pre-loop task: " << ex.what()
<< '\n';
};
for (uint64_t i = 0; i < threadLoad; i++) {
try {
task(localThreadId);
} catch (std::exception &ex) {
std::cerr << "exception thrown in the task: " << ex.what()
<< '\n';
};
};
}));
};

for (auto &i : threads) {
i.join();
};
};


/**
* @brief stringify a vector
*
* @tparam T underlying type of vector
* @param vec vector to stringify
* @return std::string string representation with `,` as delimiter
*/
template <class T>
std::string stringify(const std::vector<T>& vec){
if(vec.empty()){ return {}; };

std::ostringstream oss;
std::copy(vec.begin(), vec.end() - 1, std::ostream_iterator<T>(oss, ","));
oss << vec.back();
return oss.str();
}
} // namespace utils
} // namespace PiBench
#endif
Loading