Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding missing openmp compile guards #1

Open
wants to merge 12 commits into
base: 20230628-riscv-enabling
Choose a base branch
from
Open
7 changes: 6 additions & 1 deletion src/tools/info/sys_info.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/**
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2001-2015. ALL RIGHTS RESERVED.
* Copyright (C) Shanghai Zhaoxin Semiconductor Co., Ltd. 2020. ALL RIGHTS RESERVED.
* Copyright (C) Tactical Computing Labs, LLC. 2022. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -37,15 +38,19 @@ static const char *cpu_model_names[] = {
[UCS_CPU_MODEL_AMD_GENOA] = "Genoa",
[UCS_CPU_MODEL_ZHAOXIN_ZHANGJIANG] = "Zhangjiang",
[UCS_CPU_MODEL_ZHAOXIN_WUDAOKOU] = "Wudaokou",
[UCS_CPU_MODEL_ZHAOXIN_LUJIAZUI] = "Lujiazui"
[UCS_CPU_MODEL_ZHAOXIN_LUJIAZUI] = "Lujiazui",
[UCS_CPU_MODEL_RV64G] = "RV64G",
};



static const char* cpu_vendor_names[] = {
[UCS_CPU_VENDOR_UNKNOWN] = "unknown",
[UCS_CPU_VENDOR_INTEL] = "Intel",
[UCS_CPU_VENDOR_AMD] = "AMD",
[UCS_CPU_VENDOR_GENERIC_ARM] = "Generic ARM",
[UCS_CPU_VENDOR_GENERIC_PPC] = "Generic PPC",
[UCS_CPU_VENDOR_GENERIC_RV64G] = "Generic RV64G",
[UCS_CPU_VENDOR_FUJITSU_ARM] = "Fujitsu ARM",
[UCS_CPU_VENDOR_ZHAOXIN] = "Zhaoxin"
};
Expand Down
4 changes: 4 additions & 0 deletions src/tools/perf/lib/libperf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1353,11 +1353,15 @@ ucx_perf_do_warmup(ucx_perf_context_t *perf, const ucx_perf_params_t *params)
if (params->thread_count == 1) {
status = ucx_perf_test_exchange_status(perf, stop_status);
} else {
#if _OPENMP
#pragma omp barrier
#pragma omp single copyprivate(status)
#endif
/* Synchronize on whether to continue or stop the warmup phase */
status = ucx_perf_test_exchange_status(perf, stop_status);
#if _OPENMP
#pragma omp barrier
#endif
}

if (status != UCS_INPROGRESS) {
Expand Down
8 changes: 8 additions & 0 deletions src/tools/perf/perftest.c
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,11 @@ static unsigned sock_rte_group_index(void *rte_group)
static void sock_rte_barrier(void *rte_group, void (*progress)(void *arg),
void *arg)
{
#if _OPENMP
#pragma omp barrier

#pragma omp master
#endif
{
sock_rte_group_t *group = rte_group;

Expand All @@ -247,7 +249,9 @@ static void sock_rte_barrier(void *rte_group, void (*progress)(void *arg),
}
}
}
#if _OPENMP
#pragma omp barrier
#endif
}

static void sock_rte_post_vec(void *rte_group, const struct iovec *iovec,
Expand Down Expand Up @@ -567,9 +571,11 @@ static void mpi_rte_barrier(void *rte_group, void (*progress)(void *arg),
int dummy;
int flag;

#if _OPENMP
#pragma omp barrier

#pragma omp master
#endif
{
/*
* Naive non-blocking barrier implementation over send/recv, to call user
Expand Down Expand Up @@ -621,7 +627,9 @@ static void mpi_rte_barrier(void *rte_group, void (*progress)(void *arg),
}
}
}
#if _OPENMP
#pragma omp barrier
#endif
}

static void mpi_rte_post_vec(void *rte_group, const struct iovec *iovec,
Expand Down
6 changes: 4 additions & 2 deletions src/ucm/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ noinst_HEADERS = \
bistro/bistro.h \
bistro/bistro_x86_64.h \
bistro/bistro_aarch64.h \
bistro/bistro_ppc64.h
bistro/bistro_ppc64.h \
bistro/bistro_rv64.h

libucm_la_SOURCES = \
event/event.c \
Expand All @@ -44,7 +45,8 @@ libucm_la_SOURCES = \
bistro/bistro.c \
bistro/bistro_x86_64.c \
bistro/bistro_aarch64.c \
bistro/bistro_ppc64.c
bistro/bistro_ppc64.c \
bistro/bistro_rv64.c

if HAVE_UCM_PTMALLOC286
libucm_la_CPPFLAGS += \
Expand Down
19 changes: 12 additions & 7 deletions src/ucm/bistro/bistro.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2018. ALL RIGHTS RESERVED.
* Copyright (C) Tactical Computing Labs, LLC. 2022. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -50,12 +51,16 @@ static ucs_status_t ucm_bistro_protect(void *addr, size_t len, int prot)

void ucm_bistro_modify_code(void *dst, const ucm_bistro_lock_t *bytes)
{
uint32_t value;

UCS_STATIC_ASSERT(sizeof(*bytes) <= sizeof(value));
memcpy(&value, dst, sizeof(value));
memcpy(&value, bytes, sizeof(*bytes));
(void)ucs_atomic_swap32(dst, value);
UCS_STATIC_ASSERT(sizeof(*bytes) == 2 || sizeof(*bytes) == 4);
if (sizeof(*bytes) == 2) {
uint16_t value;
memcpy(&value, bytes, sizeof(*bytes));
(void)ucs_atomic_swap16(dst, value);
} else {
uint32_t value;
memcpy(&value, bytes, sizeof(*bytes));
(void)ucs_atomic_swap32(dst, value);
}
}

ucs_status_t
Expand Down Expand Up @@ -112,7 +117,7 @@ ucs_status_t ucm_bistro_apply_patch(void *dst, void *patch, size_t len)
return status;
}

#if defined(__x86_64__) || defined (__aarch64__)
#if defined(__x86_64__) || defined (__aarch64__) || defined (__riscv)
struct ucm_bistro_restore_point {
void *addr; /* address of function to restore */
size_t patch_len; /* patch length */
Expand Down
3 changes: 3 additions & 0 deletions src/ucm/bistro/bistro.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2018. ALL RIGHTS RESERVED.
* Copyright (C) Tactical Computing Labs, LLC. 2022. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
Expand All @@ -20,6 +21,8 @@ typedef struct ucm_bistro_restore_point ucm_bistro_restore_point_t;
# include "bistro_aarch64.h"
#elif defined(__x86_64__)
# include "bistro_x86_64.h"
#elif defined(__riscv)
# include "bistro_rv64.h"
#else
# error "Unsupported architecture"
#endif
Expand Down
146 changes: 146 additions & 0 deletions src/ucm/bistro/bistro_rv64.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
/**
* Copyright (C) Tactical Computing Labs, LLC. 2022. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/

#ifdef HAVE_CONFIG_H
# include "config.h"
#endif

#if defined(__riscv)

#include <ucs/arch/cpu.h>
#include <ucm/bistro/bistro.h>
#include <ucm/bistro/bistro_int.h>
#include <ucs/debug/assert.h>
#include <ucs/sys/math.h>
#include <ucm/util/sys.h>

#include <assert.h>
#include <dlfcn.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>

/* Registers numbers to use with the move immediate to register.
* The destination register is X31 (highest temporary).
* Register X28-X30 are used for block shifting and masking.
* Register X0 is always zero
*/
#define X31 31
#define X30 30
#define X0 0

/**
* @brief JALR - Add 12 bit immediate to source register, save to destination
* register, jump and link from destination register
*
* @param[in] _regs source register number (0-31)
* @param[in] _regd destination register number (0-31)
* @param[in] _imm 12 bit immmediate value
*/
#define JALR(_regs, _regd, _imm) \
(((_imm) << 20) | ((_regs) << 15) | (0b000 << 12) | ((_regd) << 7) | (0x67))

/**
* @brief C_J - Indirect jump (using compressed instruction)
*
* @param[in] _imm 12 bit immmediate value
*/
#define C_J(_imm) \
((0b101) << 13 | ((_imm >> 1) << 2) | (0b01))

/**
* @brief ADDI - Add 12 bit immediate to source register, save to destination
* register
*
* @param[in] _regs source register number (0-31)
* @param[in] _regd destination register number (0-31)
* @param[in] _imm 12 bit immmediate value
*/
#define ADDI(_regs, _regd, _imm) \
(((_imm) << 20) | ((_regs) << 15) | (0b000 << 12) | ((_regd) << 7) | (0x13))

/**
* @brief ADD - Add two registers together
*
* @param[in] _regs_a first source register number (0-31)
* @param[in] _regs_b second source register number (0-31)
* @param[in] _regd destination register number (0-31)
*/
#define ADD(_regs_a, _regs_b, _regd) \
((_regs_b << 20) | (_regs_a << 15) | (0b000 << 12) | ((_regd) << 7) | \
(0x33))

/**
* @brief LUI - load upper 20 bit immediate to destination register
*
* @param[in] _regd register number (0-31)
* @param[in] _imm 12 bit immmediate value
*/
#define LUI(_regd, _imm) (((_imm) << 12) | ((_regd) << 7) | (0x37))

/**
* @brief SLLI - left-shift immediate number of bits in source register into
* destination register
*
* @param[in] _regs source register number (0-31)
* @param[in] _regd destination register number (0-31)
* @param[in] _imm 12 bit immmediate value
*/
#define SLLI(_regs, _regd, _imm) \
(((_imm) << 20) | ((_regs) << 15) | (0b001 << 12) | ((_regd) << 7) | (0x13))

void ucm_bistro_patch_lock(void *dst)
{
static const ucm_bistro_lock_t self_jmp = {
.j = C_J(0)
};
ucm_bistro_modify_code(dst, &self_jmp);
}

ucs_status_t ucm_bistro_patch(void *func_ptr, void *hook, const char *symbol,
void **orig_func_p,
ucm_bistro_restore_point_t **rp)
{
ucs_status_t status;
uintptr_t hookp = (uintptr_t)hook;
ucm_bistro_patch_t patch;

/*
* Account for the fact that JALR, ADD, and ADDI sign extend and may result
* in subtractions by adding extra to compensate.
*/
hookp += ((hookp >> 11) & 0x1) << 12;
hookp += ((hookp >> 31) & 0x1) << 32;
hookp += ((hookp >> (32 + 11)) & 0x1) << (32 + 12);
patch = (ucm_bistro_patch_t){
.rega = LUI(X31, hookp >> (32 + 12)),
.regb = ADDI(X31, X31, ((hookp >> 32) & 0xFFF)),
.regc = LUI(X30, hookp >> 12),
.regd = SLLI(X31, X31, 32),
.rege = ADD(X30, X31, X31),
.regf = JALR(X31, X0, (hookp & 0xFFF))
};

if (orig_func_p != NULL) {
return UCS_ERR_UNSUPPORTED;
}

status = ucm_bistro_create_restore_point(func_ptr, sizeof(patch), rp);
if (UCS_STATUS_IS_ERR(status)) {
return status;
}

return ucm_bistro_apply_patch_atomic(func_ptr, &patch, sizeof(patch));
}

ucs_status_t ucm_bistro_relocate_one(ucm_bistro_relocate_context_t *ctx)
{
return UCS_ERR_UNSUPPORTED;
}

#endif
59 changes: 59 additions & 0 deletions src/ucm/bistro/bistro_rv64.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/**
* Copyright (C) Tactical Computing Labs, LLC. 2022. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/


#ifndef UCM_BISTRO_BISTRO_RV64_H_
#define UCM_BISTRO_BISTRO_RV64_H_

#include <ucs/type/status.h>
#include <ucs/sys/compiler_def.h>

#include <stddef.h>
#include <stdint.h>

#define UCM_BISTRO_PROLOGUE
#define UCM_BISTRO_EPILOGUE

typedef struct ucm_bistro_patch {
uint32_t rega; /* load bits 63-43 */
uint32_t regb; /* add bits 43-31 */
uint32_t regc; /* load bits 30-11 */
uint32_t regd; /* shift upper 32 bits left */
uint32_t rege; /* add bits 10-0 */
uint32_t regf; /* perform jump */
} UCS_S_PACKED ucm_bistro_patch_t;


/**
* Set library function call hook using Binary Instrumentation
* method (BISTRO): replace function body by user defined call
*
* @param func_ptr Pointer to function to patch.
* @param hook User-defined function-replacer.
* @param symbol Function name to replace.
* @param orig_func_p Unsupported on this architecture and must be NULL.
* If set to a non-NULL value, this function returns
* @ref UCS_ERR_UNSUPPORTED.
* @param rp Restore point used to restore original function.
* Optional, may be NULL.
*
* @return Error code as defined by @ref ucs_status_t
*/
ucs_status_t ucm_bistro_patch(void *func_ptr, void *hook, const char *symbol,
void **orig_func_p,
ucm_bistro_restore_point_t **rp);

/* Lock implementation */
typedef struct {
uint16_t j; /* jump to self */
} UCS_S_PACKED ucm_bistro_lock_t;

/**
* Helper functions to improve atomicity of function patching
*/
void ucm_bistro_patch_lock(void *dst);

#endif
Loading