diff --git a/src/main/cpp/jni/CMakeLists.txt b/src/main/cpp/jni/CMakeLists.txt new file mode 100644 index 00000000000..268e124b6bd --- /dev/null +++ b/src/main/cpp/jni/CMakeLists.txt @@ -0,0 +1,30 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +cmake_minimum_required(VERSION 3.18) +project(cujava_jni LANGUAGES CXX) + +# Build the subprojects +add_subdirectory(common) +add_subdirectory(runtime) +add_subdirectory(driver) +add_subdirectory(cusparse) +add_subdirectory(cublas) diff --git a/src/main/cpp/jni/build_cujava_libs.sh b/src/main/cpp/jni/build_cujava_libs.sh new file mode 100755 index 00000000000..4ceaab2373f --- /dev/null +++ b/src/main/cpp/jni/build_cujava_libs.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +set -euo pipefail + +# Usage (from src/main/cpp/jni): +# chmod +x build_cujava_libs.sh +# ./build_cujava_libs.sh # default build dir: ./build, type: Release + +BUILD_DIR="${1:-build}" +BUILD_TYPE="${BUILD_TYPE:-Release}" + +echo "==> Configuring (BUILD_DIR=$BUILD_DIR, BUILD_TYPE=$BUILD_TYPE)" +cmake -S . -B "$BUILD_DIR" -DCMAKE_BUILD_TYPE="$BUILD_TYPE" + +echo "==> Building" +cmake --build "$BUILD_DIR" --config "$BUILD_TYPE" -j + +echo "==> Done. Artifacts should be in ../../lib" +ls -l ../lib/libcujava_runtime.so || true diff --git a/src/main/cpp/jni/common/CMakeLists.txt b/src/main/cpp/jni/common/CMakeLists.txt new file mode 100644 index 00000000000..1450c394802 --- /dev/null +++ b/src/main/cpp/jni/common/CMakeLists.txt @@ -0,0 +1,54 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +cmake_minimum_required(VERSION 3.18) + +project(CuJavaCommonJNI LANGUAGES CXX) + +find_package(JNI REQUIRED) + +add_library(CuJavaCommonJNI STATIC + cujava_logger.cpp + cujava_jni_utils.cpp + cujava_pointer_utils.cpp +) + +# PIC because this static lib is linked into shared libs +set_target_properties(CuJavaCommonJNI PROPERTIES + CXX_STANDARD 11 + POSITION_INDEPENDENT_CODE ON + ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../../lib + LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../../lib + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../../lib +) + +target_include_directories(CuJavaCommonJNI + PUBLIC + ${JNI_INCLUDE_DIRS} + ${CMAKE_CURRENT_SOURCE_DIR} # headers in common/ +) + +# Propagate JNI to consumers (runtime, etc.) +target_link_libraries(CuJavaCommonJNI + PUBLIC + ${JNI_LIBRARIES} +) + diff --git a/src/main/cpp/jni/common/cujava_jni_utils.cpp b/src/main/cpp/jni/common/cujava_jni_utils.cpp new file mode 100644 index 00000000000..e6e64c632fb --- /dev/null +++ b/src/main/cpp/jni/common/cujava_jni_utils.cpp @@ -0,0 +1,276 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "cujava_jni_utils.hpp" +#include "cujava_logger.hpp" + +// Cached method ID (same as JCuda; useful for convertString if you add it later) +jmethodID String_getBytes = nullptr; + + +int initJNIUtils(JNIEnv *env) { + jclass cls = nullptr; + + // java.lang.String#getBytes()[B + if (!init(env, cls, "java/lang/String")) return JNI_ERR; + if (!init(env, cls, String_getBytes, "getBytes", "()[B")) return JNI_ERR; + + return JNI_VERSION_1_4; +} + +/** Find a class by name. */ +bool init(JNIEnv *env, jclass& cls, const char *name) { + cls = env->FindClass(name); + if (cls == nullptr) { + Logger::log(LOG_ERROR, "Failed to access class '%s'\n", name); + return false; + } + return true; +} + +/** Create a global ref to a class. */ +bool initGlobal(JNIEnv *env, jclass &globalCls, const char *className) { + jclass cls = nullptr; + if (!init(env, cls, className)) return false; + globalCls = (jclass)env->NewGlobalRef(cls); + if (globalCls == nullptr) { + Logger::log(LOG_ERROR, "Failed to create reference to class %s\n", className); + return false; + } + return true; +} + +/** Resolve a field ID. */ +bool init(JNIEnv *env, jclass cls, jfieldID& field, const char *name, const char *signature) { + field = env->GetFieldID(cls, name, signature); + if (field == nullptr) { + Logger::log(LOG_ERROR, "Failed to access field '%s' with signature '%s'\n", name, signature); + return false; + } + return true; +} + +/** Resolve a method ID. */ +bool init(JNIEnv *env, jclass cls, jmethodID& method, const char *name, const char *signature) { + method = env->GetMethodID(cls, name, signature); + if (method == nullptr) { + Logger::log(LOG_ERROR, "Failed to access method '%s' with signature '%s'\n", name, signature); + return false; + } + return true; +} + +/** Global class + no-args constructor, convenient helper. */ +bool init(JNIEnv *env, jclass &globalCls, jmethodID &constructor, const char *className) { + jclass cls = nullptr; + if (!init(env, cls, className)) return false; + if (!init(env, cls, constructor, "", "()V")) return false; + + globalCls = (jclass)env->NewGlobalRef(cls); + if (globalCls == nullptr) { + Logger::log(LOG_ERROR, "Failed to create reference to class %s\n", className); + return false; + } + return true; +} + +/** Resolve the standard 'long nativePointer' field for a class. */ +bool initNativePointer(JNIEnv *env, jfieldID& field, const char *className) { + jclass cls = env->FindClass(className); + if (cls == nullptr) { + Logger::log(LOG_ERROR, "Failed to access class %s\n", className); + return false; + } + return init(env, cls, field, "nativePointer", "J"); +} + +/** Throw a Java exception by FQN. */ +void ThrowByName(JNIEnv *env, const char *name, const char *msg) { + jclass cls = env->FindClass(name); + if (cls != nullptr) { + env->ThrowNew(cls, msg ? msg : ""); + env->DeleteLocalRef(cls); + } +} + +/** Utility to set one element of a long[] array. */ +bool set(JNIEnv *env, jlongArray ja, int index, jlong value) { + if (ja == nullptr) return true; + + jsize len = env->GetArrayLength(ja); + if (index < 0 || index >= len) { + ThrowByName(env, "java/lang/ArrayIndexOutOfBoundsException", + "Array index out of bounds"); + return false; + } + + jlong *a = (jlong*)env->GetPrimitiveArrayCritical(ja, nullptr); + if (a == nullptr) return false; + + a[index] = value; + env->ReleasePrimitiveArrayCritical(ja, a, 0); + return true; +} + +/** Utility to set one element of an int[] array. */ +bool set(JNIEnv *env, jintArray ja, int index, jint value) { + if (ja == nullptr) { + return true; + } + jsize len = env->GetArrayLength(ja); + if (index < 0 || index >= len) { + ThrowByName(env, "java/lang/ArrayIndexOutOfBoundsException", + "Array index out of bounds"); + return false; + } + jint *a = (jint*)env->GetPrimitiveArrayCritical(ja, NULL); + if (a == nullptr) { + return false; + } + a[index] = value; + env->ReleasePrimitiveArrayCritical(ja, a, 0); + return true; +} + +/** Helpers for setting cudaDeviceProperties. */ +bool setFieldBytes(JNIEnv* env, jobject obj, jfieldID fid, const jbyte* src, jsize n) { + jbyteArray arr = (jbyteArray)env->GetObjectField(obj, fid); + if (arr == nullptr || env->GetArrayLength(arr) < n) { + jbyteArray tmp = env->NewByteArray(n); + if (tmp == nullptr) return false; + env->SetObjectField(obj, fid, tmp); + arr = tmp; + } + env->SetByteArrayRegion(arr, 0, n, src); + return !env->ExceptionCheck(); +} + +bool setFieldInts(JNIEnv* env, jobject obj, jfieldID fid, const jint* src, jsize n) { + jintArray arr = (jintArray)env->GetObjectField(obj, fid); + if (arr == nullptr || env->GetArrayLength(arr) < n) { + jintArray tmp = env->NewIntArray(n); + if (tmp == nullptr) return false; + env->SetObjectField(obj, fid, tmp); + arr = tmp; + } + env->SetIntArrayRegion(arr, 0, n, src); + return !env->ExceptionCheck(); +} + +bool zeroFieldInts(JNIEnv* env, jobject obj, jfieldID fid) { + jintArray arr = (jintArray)env->GetObjectField(obj, fid); + if (arr == nullptr) return true; + jsize n = env->GetArrayLength(arr); + if (n <= 0) return true; + jint* zeros = new (std::nothrow) jint[n](); + if (!zeros) { + ThrowByName(env, "java/lang/OutOfMemoryError", "Out of memory zeroing int array"); + return false; + } + env->SetIntArrayRegion(arr, 0, n, zeros); + delete[] zeros; + return !env->ExceptionCheck(); +} + + +char* toNativeCString(JNIEnv* env, jstring js, int* length) { + if (js == nullptr) return nullptr; + + if (env->EnsureLocalCapacity(2) < 0) { + ThrowByName(env, "java/lang/OutOfMemoryError", + "Out of memory during string reference creation"); + return nullptr; + } + + jbyteArray bytes = (jbyteArray)env->CallObjectMethod(js, String_getBytes); + if (env->ExceptionCheck() || bytes == nullptr) { + return nullptr; + } + + jint len = env->GetArrayLength(bytes); + if (length) *length = (int)len; + + char* out = new char[len + 1]; + if (out == nullptr) { + ThrowByName(env, "java/lang/OutOfMemoryError", + "Out of memory during string creation"); + env->DeleteLocalRef(bytes); + return nullptr; + } + + env->GetByteArrayRegion(bytes, 0, len, (jbyte*)out); + out[len] = '\0'; + env->DeleteLocalRef(bytes); + return out; +} + + +bool allocNativeArrayFromJLongs(JNIEnv* env, jlongArray javaArr, size_t*& nativeArr, bool copyFromJava) { + if (javaArr == nullptr) { + nativeArr = nullptr; + return true; + } + jsize n = env->GetArrayLength(javaArr); + + size_t* tmp = new (std::nothrow) size_t[(size_t)n]; + if (!tmp) { + ThrowByName(env, "java/lang/OutOfMemoryError", "Out of memory during array creation"); + nativeArr = nullptr; + return false; + } + + if (copyFromJava) { + jlong* jptr = (jlong*)env->GetPrimitiveArrayCritical(javaArr, nullptr); + if (!jptr) { + delete[] tmp; nativeArr = nullptr; return false; + } + for (jsize i = 0; i < n; ++i) tmp[i] = (size_t)jptr[i]; + env->ReleasePrimitiveArrayCritical(javaArr, jptr, JNI_ABORT); // input-only + } + + nativeArr = tmp; + return true; +} + +bool commitAndFreeNativeArrayToJLongs(JNIEnv* env, size_t*& nativeArr, jlongArray javaArr, bool copyToJava) { + if (javaArr == nullptr) { + delete[] nativeArr; nativeArr = nullptr; return true; + } + if (copyToJava && nativeArr) { + jsize n = env->GetArrayLength(javaArr); + jlong* jptr = (jlong*)env->GetPrimitiveArrayCritical(javaArr, nullptr); + if (!jptr) { + delete[] nativeArr; nativeArr = nullptr; + return false; + } + for (jsize i = 0; i < n; ++i) jptr[i] = (jlong)nativeArr[i]; + env->ReleasePrimitiveArrayCritical(javaArr, jptr, 0); // commit + } + delete[] nativeArr; + nativeArr = nullptr; + return true; +} + +// Back-compat wrappers +bool initNative(JNIEnv* env, jlongArray javaArr, size_t*& nativeArr, bool fill) { + return allocNativeArrayFromJLongs(env, javaArr, nativeArr, fill); +} +bool releaseNative(JNIEnv* env, size_t*& nativeArr, jlongArray javaArr, bool writeBack) { + return commitAndFreeNativeArrayToJLongs(env, nativeArr, javaArr, writeBack); +} diff --git a/src/main/cpp/jni/common/cujava_jni_utils.hpp b/src/main/cpp/jni/common/cujava_jni_utils.hpp new file mode 100644 index 00000000000..179ba706a0c --- /dev/null +++ b/src/main/cpp/jni/common/cujava_jni_utils.hpp @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef CUJAVA_JNI_UTILS_HPP +#define CUJAVA_JNI_UTILS_HPP + +#include +#include + +bool init(JNIEnv* env, jclass& cls, const char* name); +bool initGlobal(JNIEnv* env, jclass& globalCls, const char* className); +bool init(JNIEnv* env, jclass cls, jfieldID& field, const char* name, const char* signature); +bool init(JNIEnv* env, jclass cls, jmethodID& method, const char* name, const char* signature); +bool init(JNIEnv* env, jclass& globalCls, jmethodID& constructor, const char* className); +bool initNativePointer(JNIEnv* env, jfieldID& field, const char* className); +bool set(JNIEnv *env, jlongArray ja, int index, jlong value); +bool set(JNIEnv *env, jintArray ja, int index, jint value); +bool setFieldBytes(JNIEnv* env, jobject obj, jfieldID fid, const jbyte* src, jsize n); +bool setFieldInts (JNIEnv* env, jobject obj, jfieldID fid, const jint* src, jsize n); +bool zeroFieldInts(JNIEnv* env, jobject obj, jfieldID fid); +char* toNativeCString(JNIEnv* env, jstring js, int* length = nullptr); +bool allocNativeArrayFromJLongs(JNIEnv* env, jlongArray javaArr, size_t*& nativeArr, bool copyFromJava); +bool commitAndFreeNativeArrayToJLongs(JNIEnv* env, size_t*& nativeArr, jlongArray javaArr, bool copyToJava); +bool initNative(JNIEnv* env, jlongArray javaArr, size_t*& nativeArr, bool fill); +bool releaseNative(JNIEnv* env, size_t*& nativeArr, jlongArray javaArr, bool writeBack); + + +// ---- Exceptions ---- +void ThrowByName(JNIEnv* env, const char* name, const char* msg); + +// ---- Module init (optional; keep if called from JNI_OnLoad) ---- +int initJNIUtils(JNIEnv* env); + +// ---- Cached IDs (minimal) ---- +extern jmethodID String_getBytes; // ()[B + +#endif // CUJAVA_JNI_UTILS_HPP + diff --git a/src/main/cpp/jni/common/cujava_logger.cpp b/src/main/cpp/jni/common/cujava_logger.cpp new file mode 100644 index 00000000000..367f68df62d --- /dev/null +++ b/src/main/cpp/jni/common/cujava_logger.cpp @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "cujava_logger.hpp" +#include +#include + +LogLevel Logger::currentLogLevel = LOG_ERROR; + +void Logger::log(LogLevel level, const char *message, ...) +{ + if (level <= Logger::currentLogLevel) + { + va_list argp; + va_start(argp, message); + vfprintf(stdout, message, argp); + va_end(argp); + fflush(stdout); + } +} + +void Logger::setLogLevel(LogLevel level) +{ + Logger::currentLogLevel = level; +} diff --git a/src/main/cpp/jni/common/cujava_logger.hpp b/src/main/cpp/jni/common/cujava_logger.hpp new file mode 100644 index 00000000000..58a3d8ed859 --- /dev/null +++ b/src/main/cpp/jni/common/cujava_logger.hpp @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +#ifndef CUJAVA_LOGGER_HPP +#define CUJAVA_LOGGER_HPP + +#include +#include + +enum LogLevel {LOG_QUIET, LOG_ERROR, LOG_WARNING, LOG_INFO, LOG_DEBUG, LOG_TRACE, LOG_DEBUGTRACE}; + +class Logger { +public: + static void log(LogLevel level, const char* message, ...); + static void setLogLevel(LogLevel level); +private: + static LogLevel currentLogLevel; +}; + +#endif diff --git a/src/main/cpp/jni/common/cujava_pointer_utils.cpp b/src/main/cpp/jni/common/cujava_pointer_utils.cpp new file mode 100644 index 00000000000..3a8480406f2 --- /dev/null +++ b/src/main/cpp/jni/common/cujava_pointer_utils.cpp @@ -0,0 +1,225 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +#include +#include +#include "cujava_logger.hpp" +#include "cujava_jni_utils.hpp" +#include "cujava_pointer_utils.hpp" + +// ---- cached IDs / classes (definitions; headers should declare them 'extern') ---- +jmethodID Object_getClass = nullptr; // ()Ljava/lang/Class; +jmethodID Class_getComponentType = nullptr; // ()Ljava/lang/Class; +jmethodID Class_newInstance = nullptr; // ()Ljava/lang/Object; + +jmethodID Buffer_isDirect = nullptr; // ()Z +jmethodID Buffer_hasArray = nullptr; // ()Z +jmethodID Buffer_array = nullptr; // ()Ljava/lang/Object; + +jfieldID NativePointerObject_nativePointer = nullptr; // long + +jclass Pointer_class = nullptr; // org.apache.sysds.cujava.Pointer (global ref) +jfieldID Pointer_buffer = nullptr; // Ljava/nio/Buffer; +jfieldID Pointer_pointers = nullptr; // [Lorg/apache/sysds/cujava/NativePointerObject; +jfieldID Pointer_byteOffset = nullptr; // long + +jmethodID Pointer_constructor = nullptr; // ()V + +// ----------------------------------------------------------------------------- +// Initialize field- and method IDs for Pointer/Buffer plumbing +// ----------------------------------------------------------------------------- +int initPointerUtils(JNIEnv *env) { + jclass cls = nullptr; + + // java.lang.Object#getClass() + if (!init(env, cls, "java/lang/Object")) return JNI_ERR; + if (!init(env, cls, Object_getClass, "getClass", "()Ljava/lang/Class;")) return JNI_ERR; + + // java.lang.Class methods we may need later (kept to match JCuda shape) + if (!init(env, cls, "java/lang/Class")) return JNI_ERR; + if (!init(env, cls, Class_getComponentType, "getComponentType", "()Ljava/lang/Class;")) return JNI_ERR; + if (!init(env, cls, Class_newInstance, "newInstance", "()Ljava/lang/Object;")) return JNI_ERR; + + // java.nio.Buffer: isDirect/hasArray/array + if (!init(env, cls, "java/nio/Buffer")) return JNI_ERR; + if (!init(env, cls, Buffer_isDirect, "isDirect", "()Z")) return JNI_ERR; + if (!init(env, cls, Buffer_hasArray, "hasArray", "()Z")) return JNI_ERR; + if (!init(env, cls, Buffer_array, "array", "()Ljava/lang/Object;")) return JNI_ERR; + + // org.apache.sysds.cujava.NativePointerObject.nativePointer (long) + if (!init(env, cls, "org/apache/sysds/cujava/NativePointerObject")) return JNI_ERR; + if (!init(env, cls, NativePointerObject_nativePointer, "nativePointer", "J")) return JNI_ERR; + + // org.apache.sysds.cujava.Pointer + if (!init(env, cls, "org/apache/sysds/cujava/Pointer")) return JNI_ERR; + Pointer_class = (jclass)env->NewGlobalRef(cls); + if (Pointer_class == nullptr) return JNI_ERR; + + if (!init(env, cls, Pointer_buffer, "buffer", "Ljava/nio/Buffer;")) return JNI_ERR; + if (!init(env, cls, Pointer_pointers, "pointers", "[Lorg/apache/sysds/cujava/NativePointerObject;")) return JNI_ERR; + if (!init(env, cls, Pointer_byteOffset, "byteOffset", "J")) return JNI_ERR; + if (!init(env, cls, Pointer_constructor, "", "()V")) return JNI_ERR; + + return JNI_VERSION_1_4; +} + +// ----------------------------------------------------------------------------- +// Helper: validate newly created PointerData +// ----------------------------------------------------------------------------- +static PointerData* validatePointerData(JNIEnv *env, jobject nativePointerObject, PointerData *pointerData) { + if (pointerData == nullptr) { + ThrowByName(env, "java/lang/OutOfMemoryError", + "Out of memory while creating pointer data"); + return nullptr; + } + if (!pointerData->init(env, nativePointerObject)) { + delete pointerData; + return nullptr; + } + return pointerData; +} + +// ----------------------------------------------------------------------------- +// Factory: create a PointerData matching the Java-side object +// (mirrors JCuda: Pointer array -> PointersArrayPointerData, +// Buffer direct -> DirectBufferPointerData, +// Buffer with array -> ArrayBufferPointerData, +// else Pointer(nativePointer+byteOffset) -> NativePointerData, +// else non-Pointer/NULL -> NativePointerObjectPointerData) +// ----------------------------------------------------------------------------- +PointerData* initPointerData(JNIEnv *env, jobject nativePointerObject) { + Logger::log(LOG_DEBUGTRACE, "Initializing pointer data for Java NativePointerObject %p\n", nativePointerObject); + + // NULL -> NativePointerObjectPointerData + if (nativePointerObject == nullptr) { + Logger::log(LOG_DEBUGTRACE, "Initializing NativePointerObjectPointerData\n"); + auto *pd = new NativePointerObjectPointerData(); + return validatePointerData(env, nativePointerObject, pd); + } + + // If not an instance of Pointer -> NativePointerObjectPointerData + jboolean isPointer = env->IsInstanceOf(nativePointerObject, Pointer_class); + if (!isPointer) { + Logger::log(LOG_DEBUGTRACE, "Initializing NativePointerObjectPointerData\n"); + auto *pd = new NativePointerObjectPointerData(); + return validatePointerData(env, nativePointerObject, pd); + } + + // If Pointer.pointers != null -> PointersArrayPointerData + jobjectArray pointersArray = (jobjectArray)env->GetObjectField(nativePointerObject, Pointer_pointers); + if (pointersArray != nullptr) { + Logger::log(LOG_DEBUGTRACE, "Initializing PointersArrayPointerData\n"); + auto *pd = new PointersArrayPointerData(); + return validatePointerData(env, nativePointerObject, pd); + } + + // If Pointer.buffer != null -> Buffer paths + jobject buffer = env->GetObjectField(nativePointerObject, Pointer_buffer); + if (buffer != nullptr) { + // Direct buffer? + jboolean isDirect = env->CallBooleanMethod(buffer, Buffer_isDirect); + if (env->ExceptionCheck()) return nullptr; + if (isDirect == JNI_TRUE) { + Logger::log(LOG_DEBUGTRACE, "Initializing DirectBufferPointerData\n"); + auto *pd = new DirectBufferPointerData(); + return validatePointerData(env, nativePointerObject, pd); + } + + // Backed by primitive array? + jboolean hasArray = env->CallBooleanMethod(buffer, Buffer_hasArray); + if (env->ExceptionCheck()) return nullptr; + if (hasArray == JNI_TRUE) { + Logger::log(LOG_DEBUGTRACE, "Initializing ArrayBufferPointerData\n"); + auto *pd = new ArrayBufferPointerData(); + return validatePointerData(env, nativePointerObject, pd); + } + + // Neither direct nor array-backed -> error (should have been checked in Java) + Logger::log(LOG_ERROR, "Buffer is neither direct nor has an array\n"); + ThrowByName(env, "java/lang/IllegalArgumentException", + "Buffer is neither direct nor has an array"); + return nullptr; + } + + // Plain Pointer: nativePointer + byteOffset + Logger::log(LOG_DEBUGTRACE, "Initializing NativePointerData\n"); + auto *pd = new NativePointerData(); + return validatePointerData(env, nativePointerObject, pd); +} + +// ----------------------------------------------------------------------------- +// Release helper: calls PointerData::release and deletes the object +// ----------------------------------------------------------------------------- +bool releasePointerData(JNIEnv *env, PointerData* &pointerData, jint mode) { + if (pointerData == nullptr) return true; + if (!pointerData->release(env, mode)) return false; + delete pointerData; + pointerData = nullptr; + return true; +} + +// ----------------------------------------------------------------------------- +// Misc helpers +// ----------------------------------------------------------------------------- +bool isDirectByteBuffer(JNIEnv *env, jobject buffer) { + if (buffer == nullptr) return false; + jboolean isDirect = env->CallBooleanMethod(buffer, Buffer_isDirect); + if (env->ExceptionCheck()) return false; + return (isDirect == JNI_TRUE); +} + +bool isPointerBackedByNativeMemory(JNIEnv *env, jobject object) { + if (object == nullptr) return false; + + jlong np = env->GetLongField(object, NativePointerObject_nativePointer); + if (np != 0) return true; + + jboolean isPtr = env->IsInstanceOf(object, Pointer_class); + if (isPtr) { + jobject buffer = env->GetObjectField(object, Pointer_buffer); + return isDirectByteBuffer(env, buffer); + } + return false; +} + +void setNativePointerValue(JNIEnv *env, jobject nativePointerObject, jlong pointer) { + if (nativePointerObject == nullptr) return; + env->SetLongField(nativePointerObject, NativePointerObject_nativePointer, pointer); +} + +void* getNativePointerValue(JNIEnv *env, jobject nativePointerObject) { + if (nativePointerObject == nullptr) return nullptr; + jlong p = env->GetLongField(nativePointerObject, NativePointerObject_nativePointer); + return (void*)(uintptr_t)p; +} + +void setPointer(JNIEnv *env, jobject pointerObject, jlong pointer) { + if (pointerObject == nullptr) return; + env->SetLongField(pointerObject, NativePointerObject_nativePointer, pointer); + env->SetLongField(pointerObject, Pointer_byteOffset, 0); +} + +void* getPointer(JNIEnv *env, jobject pointerObject) { + if (pointerObject == nullptr) return nullptr; + jlong start = env->GetLongField(pointerObject, NativePointerObject_nativePointer); + jlong off = env->GetLongField(pointerObject, Pointer_byteOffset); + jlong p = start + off; + return (void*)(uintptr_t)p; +} diff --git a/src/main/cpp/jni/common/cujava_pointer_utils.hpp b/src/main/cpp/jni/common/cujava_pointer_utils.hpp new file mode 100644 index 00000000000..9e23b84ffb0 --- /dev/null +++ b/src/main/cpp/jni/common/cujava_pointer_utils.hpp @@ -0,0 +1,499 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef CUJAVA_POINTER_UTILS_HPP +#define CUJAVA_POINTER_UTILS_HPP + +#include +#include "cujava_jni_utils.hpp" +#include "cujava_logger.hpp" + +// ----------------------------------------------------------------------------- +// Init + helpers +// ----------------------------------------------------------------------------- +int initPointerUtils(JNIEnv* env); + +class PointerData; + +PointerData* initPointerData(JNIEnv* env, jobject nativePointerObject); +bool releasePointerData(JNIEnv* env, PointerData*& pointerData, jint mode = 0); + +void setNativePointerValue(JNIEnv* env, jobject nativePointerObject, jlong pointer); +void* getNativePointerValue(JNIEnv* env, jobject nativePointerObject); + +void setPointer(JNIEnv* env, jobject pointerObject, jlong pointer); +void* getPointer(JNIEnv* env, jobject pointerObject); + +bool isDirectByteBuffer(JNIEnv* env, jobject buffer); +bool isPointerBackedByNativeMemory(JNIEnv* env, jobject object); + +// ----------------------------------------------------------------------------- +// Cached JNI IDs / classes (initialized in initPointerUtils) +// ----------------------------------------------------------------------------- +extern jmethodID Buffer_isDirect; // ()Z +extern jmethodID Buffer_hasArray; // ()Z +extern jmethodID Buffer_array; // ()Ljava/lang/Object; + +extern jfieldID NativePointerObject_nativePointer; // long + +extern jclass Pointer_class; // Global ref: org.apache.sysds.cujava.Pointer +extern jfieldID Pointer_buffer; // Ljava/nio/Buffer; +extern jfieldID Pointer_pointers; // [Lorg/apache/sysds/cujava/NativePointerObject; +extern jfieldID Pointer_byteOffset;// long + +extern jmethodID Pointer_constructor; // ()V + +extern jmethodID Object_getClass; // ()Ljava/lang/Class; +extern jmethodID Class_getComponentType; // ()Ljava/lang/Class; +extern jmethodID Class_newInstance; // ()Ljava/lang/Object; + +// ----------------------------------------------------------------------------- +// PointerData hierarchy +// ----------------------------------------------------------------------------- + +/** + * Virtual base class for all possible representations of pointers. + */ +class PointerData +{ +public: + virtual ~PointerData() {} + + virtual bool init(JNIEnv* env, jobject object) = 0; + virtual bool release(JNIEnv* env, jint mode = 0) = 0; + + virtual void* getPointer(JNIEnv* env) = 0; + virtual void releasePointer(JNIEnv* env, jint mode = 0) = 0; + + /** + * For pointers inside pointer arrays that may be updated by native code: + * write the new native address back into the Java object, if supported. + */ + virtual bool setNewNativePointerValue(JNIEnv* env, jlong nativePointerValue) = 0; +}; + + +/** + * Backed by a Java NativePointerObject that is NOT a Pointer instance. + * Stores only the nativePointer value. + */ +class NativePointerObjectPointerData : public PointerData +{ +private: + jobject nativePointerObject; // global ref (may be null) + jlong nativePointer; + +public: + NativePointerObjectPointerData() : nativePointerObject(NULL), nativePointer(0) {} + ~NativePointerObjectPointerData() {} + + bool init(JNIEnv* env, jobject object) + { + if (object != NULL) + { + nativePointerObject = env->NewGlobalRef(object); + if (nativePointerObject == NULL) + { + ThrowByName(env, "java/lang/OutOfMemoryError", + "Out of memory while creating global reference for pointer data"); + return false; + } + nativePointer = env->GetLongField(object, NativePointerObject_nativePointer); + if (env->ExceptionCheck()) return false; + } + Logger::log(LOG_DEBUGTRACE, "Initialized NativePointerObjectPointerData %p\n", (void*)nativePointer); + return true; + } + + bool release(JNIEnv* env, jint = 0) + { + Logger::log(LOG_DEBUGTRACE, "Releasing NativePointerObjectPointerData %p\n", (void*)nativePointer); + if (nativePointerObject != NULL) + { + env->SetLongField(nativePointerObject, NativePointerObject_nativePointer, nativePointer); + env->DeleteGlobalRef(nativePointerObject); + } + return true; + } + + void* getPointer(JNIEnv*) { return (void*)nativePointer; } + void releasePointer(JNIEnv*, jint = 0) {} + + bool setNewNativePointerValue(JNIEnv*, jlong nativePointerValue) + { + nativePointer = nativePointerValue; + return true; + } +}; + + +/** + * Backed by a Java Pointer (nativePointer + byteOffset). + */ +class NativePointerData : public PointerData +{ +private: + jobject pointer; // global ref + jlong nativePointer; + jlong byteOffset; + +public: + NativePointerData() : pointer(NULL), nativePointer(0), byteOffset(0) {} + ~NativePointerData() {} + + bool init(JNIEnv* env, jobject object) + { + pointer = env->NewGlobalRef(object); + if (pointer == NULL) + { + ThrowByName(env, "java/lang/OutOfMemoryError", + "Out of memory while creating global reference for pointer data"); + return false; + } + + nativePointer = env->GetLongField(object, NativePointerObject_nativePointer); + if (env->ExceptionCheck()) return false; + + byteOffset = env->GetLongField(object, Pointer_byteOffset); + if (env->ExceptionCheck()) return false; + + Logger::log(LOG_DEBUGTRACE, "Initialized NativePointerData %p\n", (void*)nativePointer); + return true; + } + + bool release(JNIEnv* env, jint = 0) + { + Logger::log(LOG_DEBUGTRACE, "Releasing NativePointerData %p\n", (void*)nativePointer); + env->SetLongField(pointer, NativePointerObject_nativePointer, nativePointer); + env->SetLongField(pointer, Pointer_byteOffset, byteOffset); + env->DeleteGlobalRef(pointer); + return true; + } + + void* getPointer(JNIEnv*) { return (void*)(((char*)nativePointer) + byteOffset); } + void releasePointer(JNIEnv*, jint = 0) {} + + bool setNewNativePointerValue(JNIEnv*, jlong nativePointerValue) + { + nativePointer = nativePointerValue; + byteOffset = 0; + return true; + } +}; + + +/** + * Backed by a Java Pointer that points to an array of NativePointerObjects. + */ +class PointersArrayPointerData : public PointerData +{ +private: + jobject nativePointerObject; // global ref to the Java Pointer + PointerData** arrayPointerDatas; // parallel to Java array + void* startPointer; // native array of void* (one per element) + jlong byteOffset; + bool localPointersInitialized; + + void initLocalPointers(JNIEnv* env) + { + Logger::log(LOG_DEBUGTRACE, "Initializing PointersArrayPointerData local pointers\n"); + jobjectArray pointersArray = (jobjectArray)env->GetObjectField( + nativePointerObject, Pointer_pointers); + long size = (long)env->GetArrayLength(pointersArray); + void** localPointer = (void**)startPointer; + for (int i = 0; i < size; i++) + { + if (arrayPointerDatas[i] != NULL) + localPointer[i] = arrayPointerDatas[i]->getPointer(env); + else + localPointer[i] = NULL; + } + localPointersInitialized = true; + Logger::log(LOG_DEBUGTRACE, "Initialized PointersArrayPointerData local pointers\n"); + } + +public: + PointersArrayPointerData() + : nativePointerObject(NULL), + arrayPointerDatas(NULL), + startPointer(NULL), + byteOffset(0), + localPointersInitialized(false) {} + + ~PointersArrayPointerData() {} + + bool init(JNIEnv* env, jobject object) + { + nativePointerObject = env->NewGlobalRef(object); + if (nativePointerObject == NULL) + { + ThrowByName(env, "java/lang/OutOfMemoryError", + "Out of memory while creating global reference for pointer data"); + return false; + } + + jobjectArray pointersArray = (jobjectArray)env->GetObjectField(object, Pointer_pointers); + long size = (long)env->GetArrayLength(pointersArray); + + void** localPointer = new void*[size]; + if (localPointer == NULL) + { + ThrowByName(env, "java/lang/OutOfMemoryError", + "Out of memory while initializing pointer array"); + return false; + } + startPointer = (void*)localPointer; + + arrayPointerDatas = new PointerData*[size]; + if (arrayPointerDatas == NULL) + { + ThrowByName(env, "java/lang/OutOfMemoryError", + "Out of memory while initializing pointer data array"); + return false; + } + + for (int i = 0; i < size; i++) + { + jobject p = env->GetObjectArrayElement(pointersArray, i); + if (env->ExceptionCheck()) return false; + + if (p != NULL) + { + PointerData* apd = initPointerData(env, p); + if (apd == NULL) return false; + arrayPointerDatas[i] = apd; + } + else + { + arrayPointerDatas[i] = NULL; + } + } + + byteOffset = env->GetLongField(object, Pointer_byteOffset); + if (env->ExceptionCheck()) return false; + + Logger::log(LOG_DEBUGTRACE, "Initialized PointersArrayPointerData %p\n", startPointer); + return true; + } + + bool release(JNIEnv* env, jint mode = 0) + { + Logger::log(LOG_DEBUGTRACE, "Releasing PointersArrayPointerData %p\n", startPointer); + + if (!localPointersInitialized) initLocalPointers(env); + + jobjectArray pointersArray = (jobjectArray)env->GetObjectField( + nativePointerObject, Pointer_pointers); + long size = (long)env->GetArrayLength(pointersArray); + + void** localPointer = (void**)startPointer; + if (mode != JNI_ABORT) + { + for (int i = 0; i < size; i++) + { + jobject p = env->GetObjectArrayElement(pointersArray, i); + if (env->ExceptionCheck()) return false; + + if (p != NULL) + { + void* oldLocalPointer = arrayPointerDatas[i]->getPointer(env); + + Logger::log(LOG_DEBUGTRACE, "About to write back pointer %d in PointersArrayPointerData\n", i); + Logger::log(LOG_DEBUGTRACE, "Old local pointer was %p\n", oldLocalPointer); + Logger::log(LOG_DEBUGTRACE, "New local pointer is %p\n", localPointer[i]); + + if (localPointer[i] != oldLocalPointer) + { + Logger::log(LOG_DEBUGTRACE, "In pointer %d setting value %p\n", i, localPointer[i]); + bool updated = arrayPointerDatas[i]->setNewNativePointerValue(env, (jlong)localPointer[i]); + if (!updated) return false; // pending IllegalArgumentException + } + } + else if (localPointer[i] != NULL) + { + ThrowByName(env, "java/lang/NullPointerException", + "Pointer points to an array containing a 'null' entry"); + return false; + } + } + } + + if (arrayPointerDatas != NULL) + { + for (int i = 0; i < size; i++) + { + if (arrayPointerDatas[i] != NULL) + { + if (!releasePointerData(env, arrayPointerDatas[i], mode)) return false; + } + } + delete[] arrayPointerDatas; + } + delete[] localPointer; + + env->DeleteGlobalRef(nativePointerObject); + return true; + } + + void* getPointer(JNIEnv* env) + { + if (!localPointersInitialized) initLocalPointers(env); + return (void*)(((char*)startPointer) + byteOffset); + } + + void releasePointer(JNIEnv*, jint = 0) {} + + bool setNewNativePointerValue(JNIEnv* env, jlong) + { + ThrowByName(env, "java/lang/IllegalArgumentException", + "Pointer to an array of pointers may not be overwritten"); + return false; + } +}; + + +/** + * Backed by a direct java.nio.Buffer. + */ +class DirectBufferPointerData : public PointerData +{ +private: + void* startPointer; + jlong byteOffset; + +public: + DirectBufferPointerData() : startPointer(NULL), byteOffset(0) {} + ~DirectBufferPointerData() {} + + bool init(JNIEnv* env, jobject object) + { + jobject buffer = env->GetObjectField(object, Pointer_buffer); + startPointer = env->GetDirectBufferAddress(buffer); + if (startPointer == 0) + { + ThrowByName(env, "java/lang/IllegalArgumentException", + "Failed to obtain direct buffer address"); + return false; + } + + byteOffset = env->GetLongField(object, Pointer_byteOffset); + if (env->ExceptionCheck()) return false; + + Logger::log(LOG_DEBUGTRACE, "Initialized DirectBufferPointerData %p\n", startPointer); + return true; + } + + bool release(JNIEnv*, jint = 0) + { + Logger::log(LOG_DEBUGTRACE, "Releasing DirectBufferPointerData %p\n", startPointer); + return true; + } + + void* getPointer(JNIEnv*) { return (void*)(((char*)startPointer) + byteOffset); } + void releasePointer(JNIEnv*, jint = 0) {} + + bool setNewNativePointerValue(JNIEnv* env, jlong) + { + ThrowByName(env, "java/lang/IllegalArgumentException", + "Pointer to a direct buffer may not be overwritten"); + return false; + } +}; + + +/** + * Backed by a primitive-array-backed Buffer (e.g., ByteBuffer.wrap(...)). + */ +class ArrayBufferPointerData : public PointerData +{ +private: + jarray array; // global ref to the primitive array + void* startPointer; // set on first getPointer() + jboolean isCopy; + jlong byteOffset; + +public: + ArrayBufferPointerData() + : array(NULL), startPointer(NULL), isCopy(JNI_FALSE), byteOffset(0) {} + ~ArrayBufferPointerData() {} + + bool init(JNIEnv* env, jobject object) + { + jobject buffer = env->GetObjectField(object, Pointer_buffer); + jobject localArray = env->CallObjectMethod(buffer, Buffer_array); + if (env->ExceptionCheck()) return false; + + array = (jarray)env->NewGlobalRef(localArray); + if (array == NULL) + { + ThrowByName(env, "java/lang/OutOfMemoryError", + "Out of memory while creating array reference"); + return false; + } + + byteOffset = env->GetLongField(object, Pointer_byteOffset); + if (env->ExceptionCheck()) return false; + + Logger::log(LOG_DEBUGTRACE, "Initialized ArrayBufferPointerData %p (deferred)\n", startPointer); + return true; + } + + bool release(JNIEnv* env, jint mode = 0) + { + Logger::log(LOG_DEBUGTRACE, "Releasing ArrayBufferPointerData %p\n", startPointer); + releasePointer(env, mode); + env->DeleteGlobalRef(array); + return true; + } + + void* getPointer(JNIEnv* env) + { + if (startPointer == NULL) + { + Logger::log(LOG_DEBUGTRACE, "Initializing ArrayBufferPointerData critical\n"); + isCopy = JNI_FALSE; + startPointer = env->GetPrimitiveArrayCritical(array, &isCopy); + if (startPointer == NULL) return NULL; + Logger::log(LOG_DEBUGTRACE, "Initialized ArrayBufferPointerData %p (isCopy %d)\n", startPointer, (int)isCopy); + } + return (void*)(((char*)startPointer) + byteOffset); + } + + void releasePointer(JNIEnv* env, jint mode = 0) + { + if (startPointer != NULL) + { + Logger::log(LOG_DEBUGTRACE, "Releasing ArrayBufferPointerData critical\n"); + if (!isCopy) + env->ReleasePrimitiveArrayCritical(array, startPointer, JNI_ABORT); + else + env->ReleasePrimitiveArrayCritical(array, startPointer, mode); + startPointer = NULL; + } + } + + bool setNewNativePointerValue(JNIEnv* env, jlong) + { + ThrowByName(env, "java/lang/IllegalArgumentException", + "Pointer to an array may not be overwritten"); + return false; + } +}; + +#endif // CUJAVA_POINTER_UTILS_HPP diff --git a/src/main/cpp/jni/cublas/CMakeLists.txt b/src/main/cpp/jni/cublas/CMakeLists.txt new file mode 100644 index 00000000000..02ca2e5dd37 --- /dev/null +++ b/src/main/cpp/jni/cublas/CMakeLists.txt @@ -0,0 +1,60 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + + +cmake_minimum_required(VERSION 3.18) + +project(CuJavaCublas LANGUAGES CXX) + +find_package(JNI REQUIRED) +find_package(CUDAToolkit REQUIRED) # provides CUDA::cublas (and CUDA::cudart) + +add_library(CuJavaCublas SHARED + cujava_cublas.cpp +) + +set_target_properties(CuJavaCublas PROPERTIES + CXX_STANDARD 11 + OUTPUT_NAME cujava_cublas # -> libcujava_cublas.so + LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../../lib + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../../lib + ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../../lib +) + +target_include_directories(CuJavaCublas + PRIVATE + ${JNI_INCLUDE_DIRS} + ${CUDAToolkit_INCLUDE_DIRS} + ${CMAKE_CURRENT_SOURCE_DIR} # headers in cublas/ + ${CMAKE_CURRENT_SOURCE_DIR}/../common # if including common headers +) + +# Link cuBLAS v2. Add cudart only if you call CUDA runtime APIs in this package. +target_link_libraries(CuJavaCublas + PRIVATE + CuJavaCommonJNI + CUDA::cublas + CUDA::cudart # needed for cudaDeviceSynchronize() + ${JNI_LIBRARIES} +) + + + diff --git a/src/main/cpp/jni/cublas/cujava_cublas.cpp b/src/main/cpp/jni/cublas/cujava_cublas.cpp new file mode 100644 index 00000000000..a2e448af56e --- /dev/null +++ b/src/main/cpp/jni/cublas/cujava_cublas.cpp @@ -0,0 +1,502 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +#include "cujava_cublas.hpp" +#include "cujava_cublas_common.hpp" + +#define CUJAVA_REQUIRE_NONNULL(env, obj, name, method) \ + do { \ + if ((obj) == nullptr) { \ + ThrowByName((env), "java/lang/NullPointerException", \ + "Parameter '" name "' is null for " method); \ + return CUJAVA_CUBLAS_INTERNAL_ERROR; \ + } \ + } while (0) + + + +JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *jvm, void *reserved) { + JNIEnv *env = nullptr; + if (jvm->GetEnv((void **)&env, JNI_VERSION_1_4)) { + return JNI_ERR; + } + + // Only what we need so far + if (initJNIUtils(env) == JNI_ERR) return JNI_ERR; + if (initPointerUtils(env) == JNI_ERR) return JNI_ERR; + + return JNI_VERSION_1_4; +} + + + +JNIEXPORT void JNICALL JNI_OnUnload(JavaVM *vm, void *reserved) { +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasCreateNative(JNIEnv *env, jclass cls, jobject handle) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cublasCreate"); + + Logger::log(LOG_TRACE, "Executing cublasCreate(handle=%p)\n", handle); + + // Declare native variables + cublasHandle_t handle_native; + + // Cublas API call + cublasStatus_t jniResult_native = cublasCreate(&handle_native); + setNativePointerValue(env, handle, (jlong)handle_native); + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDestroyNative(JNIEnv *env, jclass cls, jobject handle) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cublasDestroy"); + + Logger::log(LOG_TRACE, "Executing cublasDestroy(handle=%p)\n", handle); + + // Declare native variables + cublasHandle_t handle_native; + + // Copy Java inputs into native locals + handle_native = (cublasHandle_t)getNativePointerValue(env, handle); + + // Cublas API call + cublasStatus_t jniResult_native = cublasDestroy(handle_native); + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDgeamNative + (JNIEnv *env, jclass cls, jobject handle, jint transa, jint transb, jint m, jint n, jobject alpha, jobject A, + jint lda, jobject beta, jobject B, jint ldb, jobject C, jint ldc) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cublasDgeam"); + CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cublasDgeam"); + CUJAVA_REQUIRE_NONNULL(env, A, "A", "cublasDgeam"); + CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cublasDgeam"); + CUJAVA_REQUIRE_NONNULL(env, B, "B", "cublasDgeam"); + CUJAVA_REQUIRE_NONNULL(env, C, "C", "cublasDgeam"); + + Logger::log(LOG_TRACE, "Executing cublasDgeam(handle=%p, transa=%d, transb=%d, m=%d, n=%d, alpha=%p, A=%p, lda=%d, beta=%p, B=%p, ldb=%d, C=%p, ldc=%d)\n", + handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc); + + // Declare native variables + cublasHandle_t handle_native; + cublasOperation_t transa_native; + cublasOperation_t transb_native; + int m_native = 0; + int n_native = 0; + double * alpha_native = nullptr; + double * A_native = nullptr; + int lda_native = 0; + double * beta_native = nullptr; + double * B_native = nullptr; + int ldb_native = 0; + double * C_native = nullptr; + int ldc_native = 0; + + // Copy Java inputs into native locals + handle_native = (cublasHandle_t)getNativePointerValue(env, handle); + transa_native = (cublasOperation_t)transa; + transb_native = (cublasOperation_t)transb; + m_native = (int)m; + n_native = (int)n; + PointerData *alpha_pointerData = initPointerData(env, alpha); + if (alpha_pointerData == nullptr) { + return CUJAVA_CUBLAS_INTERNAL_ERROR; + } + alpha_native = (double *)alpha_pointerData->getPointer(env); + A_native = (double *)getPointer(env, A); + lda_native = (int)lda; + PointerData *beta_pointerData = initPointerData(env, beta); + if (beta_pointerData == nullptr) { + return CUJAVA_CUBLAS_INTERNAL_ERROR; + } + beta_native = (double *)beta_pointerData->getPointer(env); + B_native = (double *)getPointer(env, B); + ldb_native = (int)ldb; + C_native = (double *)getPointer(env, C); + ldc_native = (int)ldc; + + // Cublas API call + cublasStatus_t jniResult_native = cublasDgeam(handle_native, transa_native, transb_native, m_native, n_native, alpha_native, + A_native, lda_native, beta_native, B_native, ldb_native, C_native, ldc_native); + + if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUBLAS_INTERNAL_ERROR; + if (!releasePointerData(env, beta_pointerData, JNI_ABORT)) return CUJAVA_CUBLAS_INTERNAL_ERROR; + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDdotNative + (JNIEnv *env, jclass cls, jobject handle, jint n, jobject x, jint incx, jobject y, jint incy, jobject result) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cublasDdot"); + CUJAVA_REQUIRE_NONNULL(env, x, "x", "cublasDdot"); + CUJAVA_REQUIRE_NONNULL(env, y, "y", "cublasDdot"); + CUJAVA_REQUIRE_NONNULL(env, result, "result", "cublasDdot"); + + Logger::log(LOG_TRACE, "Executing cublasDdot(handle=%p, n=%d, x=%p, incx=%d, y=%p, incy=%d, result=%p)\n", + handle, n, x, incx, y, incy, result); + + // Declare native variables + cublasHandle_t handle_native; + int n_native = 0; + double * x_native = nullptr; + int incx_native = 0; + double * y_native = nullptr; + int incy_native = 0; + double * result_native = nullptr; + + // Copy Java inputs into native locals + handle_native = (cublasHandle_t)getNativePointerValue(env, handle); + n_native = (int)n; + x_native = (double *)getPointer(env, x); + incx_native = (int)incx; + y_native = (double *)getPointer(env, y); + incy_native = (int)incy; + PointerData *result_pointerData = initPointerData(env, result); + if (result_pointerData == nullptr) { + return CUJAVA_CUBLAS_INTERNAL_ERROR; + } + result_native = (double *)result_pointerData->getPointer(env); + + // Cublas API call + cublasStatus_t jniResult_native = cublasDdot(handle_native, n_native, x_native, incx_native, y_native, incy_native, result_native); + + if (!isPointerBackedByNativeMemory(env, result)) { + cudaDeviceSynchronize(); // add cudart to CMake to cover runtime call + } + if (!releasePointerData(env, result_pointerData, 0)) return CUJAVA_CUBLAS_INTERNAL_ERROR; + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDgemvNative + (JNIEnv *env, jclass cls, jobject handle, jint trans, jint m, jint n, jobject alpha, jobject A, jint lda, + jobject x, jint incx, jobject beta, jobject y, jint incy) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cublasDgemv"); + CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cublasDgemv"); + CUJAVA_REQUIRE_NONNULL(env, A, "A", "cublasDgemv"); + CUJAVA_REQUIRE_NONNULL(env, x, "x", "cublasDgemv"); + CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cublasDgemv"); + CUJAVA_REQUIRE_NONNULL(env, y, "y", "cublasDgemv"); + + Logger::log(LOG_TRACE, "Executing cublasDgemv(handle=%p, trans=%d, m=%d, n=%d, alpha=%p, A=%p, lda=%d, x=%p, incx=%d, beta=%p, y=%p, incy=%d)\n", + handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); + + // Declare native variables + cublasHandle_t handle_native; + cublasOperation_t trans_native; + int m_native = 0; + int n_native = 0; + double * alpha_native = nullptr; + double * A_native = nullptr; + int lda_native = 0; + double * x_native = nullptr; + int incx_native = 0; + double * beta_native = nullptr; + double * y_native = nullptr; + int incy_native = 0; + + // Copy Java inputs into native locals + handle_native = (cublasHandle_t)getNativePointerValue(env, handle); + trans_native = (cublasOperation_t)trans; + m_native = (int)m; + n_native = (int)n; + PointerData *alpha_pointerData = initPointerData(env, alpha); + if (alpha_pointerData == nullptr) { + return CUJAVA_CUBLAS_INTERNAL_ERROR; + } + alpha_native = (double *)alpha_pointerData->getPointer(env); + A_native = (double *)getPointer(env, A); + lda_native = (int)lda; + x_native = (double *)getPointer(env, x); + incx_native = (int)incx; + PointerData *beta_pointerData = initPointerData(env, beta); + if (beta_pointerData == nullptr) { + return CUJAVA_CUBLAS_INTERNAL_ERROR; + } + beta_native = (double *)beta_pointerData->getPointer(env); + y_native = (double *)getPointer(env, y); + incy_native = (int)incy; + + // Cublas API call + cublasStatus_t jniResult_native = cublasDgemv(handle_native, trans_native, m_native, n_native, alpha_native, A_native, + lda_native, x_native, incx_native, beta_native, y_native, incy_native); + + if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUBLAS_INTERNAL_ERROR; + if (!releasePointerData(env, beta_pointerData, JNI_ABORT)) return CUJAVA_CUBLAS_INTERNAL_ERROR; + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDgemmNative + (JNIEnv *env, jclass cls, jobject handle, jint transa, jint transb, jint m, jint n, jint k, jobject alpha, + jobject A, jint lda, jobject B, jint ldb, jobject beta, jobject C, jint ldc) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cublasDgemm"); + CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cublasDgemm"); + CUJAVA_REQUIRE_NONNULL(env, A, "A", "cublasDgemm"); + CUJAVA_REQUIRE_NONNULL(env, B, "B", "cublasDgemm"); + CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cublasDgemm"); + CUJAVA_REQUIRE_NONNULL(env, C, "C", "cublasDgemm"); + + Logger::log(LOG_TRACE, "Executing cublasDgemm(handle=%p, transa=%d, transb=%d, m=%d, n=%d, k=%d, alpha=%p, A=%p, lda=%d, B=%p, ldb=%d, beta=%p, C=%p, ldc=%d)\n", + handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); + + // Declare native variables + cublasHandle_t handle_native; + cublasOperation_t transa_native; + cublasOperation_t transb_native; + int m_native = 0; + int n_native = 0; + int k_native = 0; + double * alpha_native = nullptr; + double * A_native = nullptr; + int lda_native = 0; + double * B_native = nullptr; + int ldb_native = 0; + double * beta_native = nullptr; + double * C_native = nullptr; + int ldc_native = 0; + + // Copy Java inputs into native locals + handle_native = (cublasHandle_t)getNativePointerValue(env, handle); + transa_native = (cublasOperation_t)transa; + transb_native = (cublasOperation_t)transb; + m_native = (int)m; + n_native = (int)n; + k_native = (int)k; + PointerData *alpha_pointerData = initPointerData(env, alpha); + if (alpha_pointerData == NULL) { + return CUJAVA_CUBLAS_INTERNAL_ERROR; + } + alpha_native = (double *)alpha_pointerData->getPointer(env); + A_native = (double *)getPointer(env, A); + lda_native = (int)lda; + B_native = (double *)getPointer(env, B); + ldb_native = (int)ldb; + PointerData *beta_pointerData = initPointerData(env, beta); + if (beta_pointerData == nullptr) { + return CUJAVA_CUBLAS_INTERNAL_ERROR; + } + beta_native = (double *)beta_pointerData->getPointer(env); + C_native = (double *)getPointer(env, C); + ldc_native = (int)ldc; + + // Cublas API call + cublasStatus_t jniResult_native = cublasDgemm(handle_native, transa_native, transb_native, m_native, n_native, k_native, + alpha_native, A_native, lda_native, B_native, ldb_native, beta_native, C_native, ldc_native); + + if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUBLAS_INTERNAL_ERROR; + if (!releasePointerData(env, beta_pointerData, JNI_ABORT)) return CUJAVA_CUBLAS_INTERNAL_ERROR; + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDsyrkNative + (JNIEnv *env, jclass cls, jobject handle, jint uplo, jint trans, jint n, jint k, jobject alpha, + jobject A, jint lda, jobject beta, jobject C, jint ldc) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cublasDsyrk"); + CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cublasDsyrk"); + CUJAVA_REQUIRE_NONNULL(env, A, "A", "cublasDsyrk"); + CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cublasDsyrk"); + CUJAVA_REQUIRE_NONNULL(env, C, "C", "cublasDsyrk"); + + Logger::log(LOG_TRACE, "Executing cublasDsyrk(handle=%p, uplo=%d, trans=%d, n=%d, k=%d, alpha=%p, A=%p, lda=%d, beta=%p, C=%p, ldc=%d)\n", + handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); + + // Declare native variables + cublasHandle_t handle_native; + cublasFillMode_t uplo_native; + cublasOperation_t trans_native; + int n_native = 0; + int k_native = 0; + double * alpha_native = nullptr; + double * A_native = nullptr; + int lda_native = 0; + double * beta_native = nullptr; + double * C_native = nullptr; + int ldc_native = 0; + + // Copy Java inputs into native locals + handle_native = (cublasHandle_t)getNativePointerValue(env, handle); + uplo_native = (cublasFillMode_t)uplo; + trans_native = (cublasOperation_t)trans; + n_native = (int)n; + k_native = (int)k; + PointerData *alpha_pointerData = initPointerData(env, alpha); + if (alpha_pointerData == nullptr) { + return CUJAVA_CUBLAS_INTERNAL_ERROR; + } + alpha_native = (double *)alpha_pointerData->getPointer(env); + A_native = (double *)getPointer(env, A); + lda_native = (int)lda; + PointerData *beta_pointerData = initPointerData(env, beta); + if (beta_pointerData == nullptr) { + return CUJAVA_CUBLAS_INTERNAL_ERROR; + } + beta_native = (double *)beta_pointerData->getPointer(env); + C_native = (double *)getPointer(env, C); + ldc_native = (int)ldc; + + // Cublas API call + cublasStatus_t jniResult_native = cublasDsyrk(handle_native, uplo_native, trans_native, n_native, k_native, + alpha_native, A_native, lda_native, beta_native, C_native, ldc_native); + + if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUBLAS_INTERNAL_ERROR; + if (!releasePointerData(env, beta_pointerData, JNI_ABORT)) return CUJAVA_CUBLAS_INTERNAL_ERROR; + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDaxpyNative + (JNIEnv *env, jclass cls, jobject handle, jint n, jobject alpha, jobject x, jint incx, jobject y, jint incy) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cublasDaxpy"); + CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cublasDaxpy"); + CUJAVA_REQUIRE_NONNULL(env, x, "x", "cublasDaxpy"); + CUJAVA_REQUIRE_NONNULL(env, y, "y", "cublasDaxpy"); + + Logger::log(LOG_TRACE, "Executing cublasDaxpy(handle=%p, n=%d, alpha=%p, x=%p, incx=%d, y=%p, incy=%d)\n", + handle, n, alpha, x, incx, y, incy); + + // Declare native variables + cublasHandle_t handle_native; + int n_native = 0; + double * alpha_native = nullptr; + double * x_native = nullptr; + int incx_native = 0; + double * y_native = nullptr; + int incy_native = 0; + + // Copy Java inputs into native locals + handle_native = (cublasHandle_t)getNativePointerValue(env, handle); + n_native = (int)n; + PointerData *alpha_pointerData = initPointerData(env, alpha); + if (alpha_pointerData == nullptr) { + return CUJAVA_CUBLAS_INTERNAL_ERROR; + } + alpha_native = (double *)alpha_pointerData->getPointer(env); + x_native = (double *)getPointer(env, x); + incx_native = (int)incx; + y_native = (double *)getPointer(env, y); + incy_native = (int)incy; + + // Cublas API call + cublasStatus_t jniResult_native = cublasDaxpy(handle_native, n_native, alpha_native, x_native, incx_native, y_native, incy_native); + + if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUBLAS_INTERNAL_ERROR; + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDtrsmNative + (JNIEnv *env, jclass cls, jobject handle, jint side, jint uplo, jint trans, jint diag, jint m, + jint n, jobject alpha, jobject A, jint lda, jobject B, jint ldb) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cublasDtrsm"); + CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cublasDtrsm"); + CUJAVA_REQUIRE_NONNULL(env, A, "A", "cublasDtrsm"); + CUJAVA_REQUIRE_NONNULL(env, B, "B", "cublasDtrsm"); + + Logger::log(LOG_TRACE, "Executing cublasDtrsm(handle=%p, side=%d, uplo=%d, trans=%d, diag=%d, m=%d, n=%d, alpha=%p, A=%p, lda=%d, B=%p, ldb=%d)\n", + handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb); + + // Declare native variables + cublasHandle_t handle_native; + cublasSideMode_t side_native; + cublasFillMode_t uplo_native; + cublasOperation_t trans_native; + cublasDiagType_t diag_native; + int m_native = 0; + int n_native = 0; + double * alpha_native = nullptr; + double * A_native = nullptr; + int lda_native = 0; + double * B_native = nullptr; + int ldb_native = 0; + + // Copy Java inputs into native locals + handle_native = (cublasHandle_t)getNativePointerValue(env, handle); + side_native = (cublasSideMode_t)side; + uplo_native = (cublasFillMode_t)uplo; + trans_native = (cublasOperation_t)trans; + diag_native = (cublasDiagType_t)diag; + m_native = (int)m; + n_native = (int)n; + PointerData *alpha_pointerData = initPointerData(env, alpha); + if (alpha_pointerData == nullptr) { + return CUJAVA_CUBLAS_INTERNAL_ERROR; + } + alpha_native = (double *)alpha_pointerData->getPointer(env); + A_native = (double *)getPointer(env, A); + lda_native = (int)lda; + B_native = (double *)getPointer(env, B); + ldb_native = (int)ldb; + + // Cublas API call + cublasStatus_t jniResult_native = cublasDtrsm(handle_native, side_native, uplo_native, trans_native, diag_native, + m_native, n_native, alpha_native, A_native, lda_native, B_native, ldb_native); + + if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUBLAS_INTERNAL_ERROR; + + jint jniResult = (jint)jniResult_native; + return jniResult; +} diff --git a/src/main/cpp/jni/cublas/cujava_cublas.hpp b/src/main/cpp/jni/cublas/cujava_cublas.hpp new file mode 100644 index 00000000000..39523d78f6f --- /dev/null +++ b/src/main/cpp/jni/cublas/cujava_cublas.hpp @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include + +#ifndef _Included_org_apache_sysds_cujava_cublas_CuJavaCublas +#define _Included_org_apache_sysds_cujava_cublas_CuJavaCublas +#ifdef __cplusplus +extern "C" { +#endif + + +/* + * Class: org.apache.sysds.cujava.cublas.CuJavaCublas + * Methods: + * - cublasCreate + * - cublasDestroy + * - cublasDgeam + * - cublasDdot + * - cublasDgemv + * - cublasDgemm + * - cublasDsyrk + * - cublasDaxpy + * - cublasDtrsm + */ + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasCreateNative(JNIEnv *env, jclass cls, jobject handle); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDestroyNative(JNIEnv *env, jclass cls, jobject handle); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDgeamNative + (JNIEnv *env, jclass cls, jobject handle, jint transa, jint transb, jint m, jint n, jobject alpha, jobject A, + jint lda, jobject beta, jobject B, jint ldb, jobject C, jint ldc); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDdotNative + (JNIEnv *env, jclass cls, jobject handle, jint n, jobject x, jint incx, jobject y, jint incy, jobject result); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDgemvNative + (JNIEnv *env, jclass cls, jobject handle, jint trans, jint m, jint n, jobject alpha, jobject A, jint lda, + jobject x, jint incx, jobject beta, jobject y, jint incy); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDgemmNative + (JNIEnv *env, jclass cls, jobject handle, jint transa, jint transb, jint m, jint n, jint k, jobject alpha, + jobject A, jint lda, jobject B, jint ldb, jobject beta, jobject C, jint ldc); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDsyrkNative + (JNIEnv *env, jclass cls, jobject handle, jint uplo, jint trans, jint n, jint k, jobject alpha, + jobject A, jint lda, jobject beta, jobject C, jint ldc); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDaxpyNative + (JNIEnv *env, jclass cls, jobject handle, jint n, jobject alpha, jobject x, jint incx, jobject y, jint incy); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDtrsmNative + (JNIEnv *env, jclass cls, jobject handle, jint side, jint uplo, jint trans, jint diag, jint m, + jint n, jobject alpha, jobject A, jint lda, jobject B, jint ldb); + + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/main/cpp/jni/cublas/cujava_cublas_common.hpp b/src/main/cpp/jni/cublas/cujava_cublas_common.hpp new file mode 100644 index 00000000000..80950c84697 --- /dev/null +++ b/src/main/cpp/jni/cublas/cujava_cublas_common.hpp @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +#ifndef CUJAVA_CUBLAS_COMMON_HPP +#define CUJAVA_CUBLAS_COMMON_HPP + +#include +#include // cuBLAS v1 is deprecated +#include + +#include "../common/cujava_logger.hpp" +#include "../common/cujava_jni_utils.hpp" +#include "../common/cujava_pointer_utils.hpp" + +#define CUJAVA_CUBLAS_INTERNAL_ERROR (-1) + +#endif // CUJAVA_CUBLAS_COMMON_HPP + diff --git a/src/main/cpp/jni/cudnn/cujava_cudnn.cpp b/src/main/cpp/jni/cudnn/cujava_cudnn.cpp new file mode 100644 index 00000000000..7b5b8aba71c --- /dev/null +++ b/src/main/cpp/jni/cudnn/cujava_cudnn.cpp @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +// TODO: Implement jni wrapper for cuDNN diff --git a/src/main/cpp/jni/cudnn/cujava_cudnn.hpp b/src/main/cpp/jni/cudnn/cujava_cudnn.hpp new file mode 100644 index 00000000000..042f3ce1f39 --- /dev/null +++ b/src/main/cpp/jni/cudnn/cujava_cudnn.hpp @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ diff --git a/src/main/cpp/jni/cudnn/cujava_cudnn_common.hpp b/src/main/cpp/jni/cudnn/cujava_cudnn_common.hpp new file mode 100644 index 00000000000..042f3ce1f39 --- /dev/null +++ b/src/main/cpp/jni/cudnn/cujava_cudnn_common.hpp @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ diff --git a/src/main/cpp/jni/cusolver/cujava_cusolver.cpp b/src/main/cpp/jni/cusolver/cujava_cusolver.cpp new file mode 100644 index 00000000000..5194fceec27 --- /dev/null +++ b/src/main/cpp/jni/cusolver/cujava_cusolver.cpp @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +// TODO: Implement jni wrapper for cuSolver diff --git a/src/main/cpp/jni/cusolver/cujava_cusolver.hpp b/src/main/cpp/jni/cusolver/cujava_cusolver.hpp new file mode 100644 index 00000000000..042f3ce1f39 --- /dev/null +++ b/src/main/cpp/jni/cusolver/cujava_cusolver.hpp @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ diff --git a/src/main/cpp/jni/cusolver/cujava_cusolver_common.hpp b/src/main/cpp/jni/cusolver/cujava_cusolver_common.hpp new file mode 100644 index 00000000000..042f3ce1f39 --- /dev/null +++ b/src/main/cpp/jni/cusolver/cujava_cusolver_common.hpp @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ diff --git a/src/main/cpp/jni/cusparse/CMakeLists.txt b/src/main/cpp/jni/cusparse/CMakeLists.txt new file mode 100644 index 00000000000..2b728bd0da4 --- /dev/null +++ b/src/main/cpp/jni/cusparse/CMakeLists.txt @@ -0,0 +1,57 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + + +cmake_minimum_required(VERSION 3.18) + +project(CuJavaCusparse LANGUAGES CXX) + +find_package(JNI REQUIRED) +find_package(CUDAToolkit REQUIRED) # for CUDA::cusparse + +add_library(CuJavaCusparse SHARED + cujava_cusparse.cpp +) + +set_target_properties(CuJavaCusparse PROPERTIES + CXX_STANDARD 11 + OUTPUT_NAME cujava_cusparse # -> libcujava_cusparse.so + LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../../lib + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../../lib + ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../../lib +) + +target_include_directories(CuJavaCusparse + PRIVATE + ${JNI_INCLUDE_DIRS} + ${CUDAToolkit_INCLUDE_DIRS} + ${CMAKE_CURRENT_SOURCE_DIR} # headers in cusparse/ + ${CMAKE_CURRENT_SOURCE_DIR}/../common # if you include common headers +) + +target_link_libraries(CuJavaCusparse + PRIVATE + CuJavaCommonJNI + CUDA::cusparse + CUDA::cudart # needed for cudaDeviceSynchronize() + ${JNI_LIBRARIES} +) + diff --git a/src/main/cpp/jni/cusparse/cujava_cusparse.cpp b/src/main/cpp/jni/cusparse/cujava_cusparse.cpp new file mode 100644 index 00000000000..97ec11a7dd3 --- /dev/null +++ b/src/main/cpp/jni/cusparse/cujava_cusparse.cpp @@ -0,0 +1,1586 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +#include "cujava_cusparse.hpp" +#include "cujava_cusparse_common.hpp" + +#define CUJAVA_REQUIRE_NONNULL(env, obj, name, method) \ + do { \ + if ((obj) == nullptr) { \ + ThrowByName((env), "java/lang/NullPointerException", \ + "Parameter '" name "' is null for " method); \ + return CUJAVA_CUSPARSE_INTERNAL_ERROR; \ + } \ + } while (0) + + +JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *jvm, void *reserved) { + JNIEnv *env = nullptr; + if (jvm->GetEnv((void **)&env, JNI_VERSION_1_4)) { + return JNI_ERR; + } + + // Only what we need so far + if (initJNIUtils(env) == JNI_ERR) return JNI_ERR; + if (initPointerUtils(env) == JNI_ERR) return JNI_ERR; + + return JNI_VERSION_1_4; +} + + + +JNIEXPORT void JNICALL JNI_OnUnload(JavaVM *vm, void *reserved) { +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpGEMM_1copyNative + (JNIEnv *env, jclass cls, jobject handle, jint opA, jint opB,jobject alpha, jobject matA, jobject matB, jobject beta, jobject matC, + jint computeType, jint alg, jobject spgemmDescr) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseSpGEMM_copy"); + CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cusparseSpGEMM_copy"); + CUJAVA_REQUIRE_NONNULL(env, matA, "matA", "cusparseSpGEMM_copy"); + CUJAVA_REQUIRE_NONNULL(env, matB, "matB", "cusparseSpGEMM_copy"); + CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cusparseSpGEMM_copy"); + CUJAVA_REQUIRE_NONNULL(env, matC, "matC", "cusparseSpGEMM_copy"); + CUJAVA_REQUIRE_NONNULL(env, spgemmDescr, "spgemmDescr", "cusparseSpGEMM_copy"); + + Logger::log(LOG_TRACE, "Executing cusparseSpGEMM_copy\n"); + + // Copy Java inputs into native locals + cusparseHandle_t h = (cusparseHandle_t)getNativePointerValue(env, handle); + cusparseOperation_t aOp = (cusparseOperation_t)opA; + cusparseOperation_t bOp = (cusparseOperation_t)opB; + PointerData* alphaPD = initPointerData(env, alpha); if (!alphaPD) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + void* alphaPtr = alphaPD->getPointer(env); + cusparseConstSpMatDescr_t A = (cusparseConstSpMatDescr_t)getNativePointerValue(env, matA); + cusparseConstSpMatDescr_t B = (cusparseConstSpMatDescr_t)getNativePointerValue(env, matB); + PointerData* betaPD = initPointerData(env, beta); if (!betaPD) { releasePointerData(env, alphaPD, JNI_ABORT); return CUJAVA_CUSPARSE_INTERNAL_ERROR; } + void* betaPtr = betaPD->getPointer(env); + cusparseSpMatDescr_t C = (cusparseSpMatDescr_t)getNativePointerValue(env, matC); + cudaDataType ct = (cudaDataType)computeType; + cusparseSpGEMMAlg_t al = (cusparseSpGEMMAlg_t)alg; + cusparseSpGEMMDescr_t D = (cusparseSpGEMMDescr_t)getNativePointerValue(env, spgemmDescr); + + // Cusparse API call + cusparseStatus_t st = cusparseSpGEMM_copy(h, aOp, bOp, alphaPtr, A, B, betaPtr, C, ct, al, D); + + // alpha/beta are inputs → no commit + if (!releasePointerData(env, alphaPD, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + if (!releasePointerData(env, betaPD, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + + return (jint)st; +} + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseGetMatIndexBaseNative(JNIEnv *env, jclass cls, jobject descrA) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, descrA, "descrA", "cusparseGetMatIndexBase"); + + Logger::log(LOG_TRACE, "Executing cusparseGetMatIndexBase(descrA=%p)\n", descrA); + + // Declare native variables + cusparseMatDescr_t descrA_native; + + // Copy Java inputs into native locals + descrA_native = (cusparseMatDescr_t)getNativePointerValue(env, descrA); + + // Cusparse API call + cusparseIndexBase_t jniResult_native = cusparseGetMatIndexBase(descrA_native); + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCreateCsrNative + (JNIEnv *env, jclass cls, jobject spMatDescr, jlong rows, jlong cols, jlong nnz, jobject csrRowOffsets, + jobject csrColInd, jobject csrValues, jint csrRowOffsetsType, jint csrColIndType, jint idxBase, jint valueType) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, spMatDescr, "spMatDescr", "cusparseCreateCsr"); + + // Log message + Logger::log(LOG_TRACE, "Executing cusparseCreateCsr(spMatDescr=%p, rows=%ld, cols=%ld, nnz=%ld, csrRowOffsets=%p, csrColInd=%p, csrValues=%p, csrRowOffsetsType=%d, csrColIndType=%d, idxBase=%d, valueType=%d)\n", + spMatDescr, rows, cols, nnz, csrRowOffsets, csrColInd, csrValues, csrRowOffsetsType, csrColIndType, idxBase, valueType); + + // Declare native variables + cusparseSpMatDescr_t spMatDescr_native; + int64_t rows_native = 0; + int64_t cols_native = 0; + int64_t nnz_native = 0; + void * csrRowOffsets_native = nullptr; + void * csrColInd_native = nullptr; + void * csrValues_native = nullptr; + cusparseIndexType_t csrRowOffsetsType_native; + cusparseIndexType_t csrColIndType_native; + cusparseIndexBase_t idxBase_native; + cudaDataType valueType_native; + + // Copy Java inputs into native locals + rows_native = (int64_t)rows; + cols_native = (int64_t)cols; + nnz_native = (int64_t)nnz; + csrRowOffsets_native = (void *)getPointer(env, csrRowOffsets); + csrColInd_native = (void *)getPointer(env, csrColInd); + csrValues_native = (void *)getPointer(env, csrValues); + csrRowOffsetsType_native = (cusparseIndexType_t)csrRowOffsetsType; + csrColIndType_native = (cusparseIndexType_t)csrColIndType; + idxBase_native = (cusparseIndexBase_t)idxBase; + valueType_native = (cudaDataType)valueType; + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseCreateCsr(&spMatDescr_native, rows_native, cols_native, nnz_native, csrRowOffsets_native, + csrColInd_native, csrValues_native, csrRowOffsetsType_native, csrColIndType_native, idxBase_native, valueType_native); + setNativePointerValue(env, spMatDescr, (jlong)spMatDescr_native); + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCreateDnVecNative + (JNIEnv *env, jclass cls, jobject dnVecDescr, jlong size, jobject values, jint valueType) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, dnVecDescr, "dnVecDescr", "cusparseCreateDnVec"); + CUJAVA_REQUIRE_NONNULL(env, values, "values", "cusparseCreateDnVec"); + + Logger::log(LOG_TRACE, "Executing cusparseCreateDnVec(dnVecDescr=%p, size=%ld, values=%p, valueType=%d)\n", + dnVecDescr, size, values, valueType); + + // Declare native variables + cusparseDnVecDescr_t dnVecDescr_native; + int64_t size_native = 0; + void * values_native = nullptr; + cudaDataType valueType_native; + + // Copy Java inputs into native locals + size_native = (int64_t)size; + values_native = (void *)getPointer(env, values); + valueType_native = (cudaDataType)valueType; + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseCreateDnVec(&dnVecDescr_native, size_native, values_native, valueType_native); + setNativePointerValue(env, dnVecDescr, (jlong)dnVecDescr_native); + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpMV_1bufferSizeNative + (JNIEnv *env, jclass cls, jobject handle, jint opA, jobject alpha, jobject matA, jobject vecX, jobject beta, jobject vecY, jint computeType, jint alg, jlongArray bufferSize) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseSpMV_bufferSize"); + CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cusparseSpMV_bufferSize"); + CUJAVA_REQUIRE_NONNULL(env, matA, "matA", "cusparseSpMV_bufferSize"); + CUJAVA_REQUIRE_NONNULL(env, vecX, "vecX", "cusparseSpMV_bufferSize"); + CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cusparseSpMV_bufferSize"); + CUJAVA_REQUIRE_NONNULL(env, vecY, "vecY", "cusparseSpMV_bufferSize"); + CUJAVA_REQUIRE_NONNULL(env, bufferSize, "bufferSize", "cusparseSpMV_bufferSize"); + + Logger::log(LOG_TRACE, "Executing cusparseSpMV_bufferSize(handle=%p, opA=%d, alpha=%p, matA=%p, vecX=%p, beta=%p, vecY=%p, computeType=%d, alg=%d, bufferSize=%p)\n", + handle, opA, alpha, matA, vecX, beta, vecY, computeType, alg, bufferSize); + + // Declare native variables + cusparseHandle_t handle_native; + cusparseOperation_t opA_native; + void * alpha_native = nullptr; + cusparseConstSpMatDescr_t matA_native; + cusparseConstDnVecDescr_t vecX_native; + void * beta_native = nullptr; + cusparseDnVecDescr_t vecY_native; + cudaDataType computeType_native; + cusparseSpMVAlg_t alg_native; + size_t * bufferSize_native = nullptr; + + // Copy Java inputs into native locals + handle_native = (cusparseHandle_t)getNativePointerValue(env, handle); + opA_native = (cusparseOperation_t)opA; + PointerData *alpha_pointerData = initPointerData(env, alpha); + if (alpha_pointerData == nullptr) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + + alpha_native = (void *)alpha_pointerData->getPointer(env); + matA_native = (cusparseConstSpMatDescr_t)getNativePointerValue(env, matA); + vecX_native = (cusparseConstDnVecDescr_t)getNativePointerValue(env, vecX); + PointerData *beta_pointerData = initPointerData(env, beta); + if (beta_pointerData == nullptr) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + + beta_native = (void *)beta_pointerData->getPointer(env); + vecY_native = (cusparseDnVecDescr_t)getNativePointerValue(env, vecY); + computeType_native = (cudaDataType)computeType; + alg_native = (cusparseSpMVAlg_t)alg; + if (!initNative(env, bufferSize, bufferSize_native, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseSpMV_bufferSize(handle_native, opA_native, alpha_native, matA_native, + vecX_native, beta_native, vecY_native, computeType_native, alg_native, bufferSize_native); + + if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + if (!releasePointerData(env, beta_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + if (!releaseNative(env, bufferSize_native, bufferSize, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpMVNative + (JNIEnv *env, jclass cls, jobject handle, jint opA, jobject alpha, jobject matA, jobject vecX, jobject beta, jobject vecY, jint computeType, jint alg, jobject externalBuffer) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseSpMV"); + CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cusparseSpMV"); + CUJAVA_REQUIRE_NONNULL(env, matA, "matA", "cusparseSpMV"); + CUJAVA_REQUIRE_NONNULL(env, vecX, "vecX", "cusparseSpMV"); + CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cusparseSpMV"); + CUJAVA_REQUIRE_NONNULL(env, vecY, "vecY", "cusparseSpMV"); + + Logger::log(LOG_TRACE, "Executing cusparseSpMV(handle=%p, opA=%d, alpha=%p, matA=%p, vecX=%p, beta=%p, vecY=%p, computeType=%d, alg=%d, externalBuffer=%p)\n", + handle, opA, alpha, matA, vecX, beta, vecY, computeType, alg, externalBuffer); + + // Declare native variables + cusparseHandle_t handle_native; + cusparseOperation_t opA_native; + void * alpha_native = nullptr; + cusparseConstSpMatDescr_t matA_native; + cusparseConstDnVecDescr_t vecX_native; + void * beta_native = nullptr; + cusparseDnVecDescr_t vecY_native; + cudaDataType computeType_native; + cusparseSpMVAlg_t alg_native; + void * externalBuffer_native = nullptr; + + // Copy Java inputs into native locals + handle_native = (cusparseHandle_t)getNativePointerValue(env, handle); + opA_native = (cusparseOperation_t)opA; + PointerData *alpha_pointerData = initPointerData(env, alpha); + if (alpha_pointerData == nullptr) { + return CUJAVA_CUSPARSE_INTERNAL_ERROR; + } + alpha_native = (void *)alpha_pointerData->getPointer(env); + matA_native = (cusparseConstSpMatDescr_t)getNativePointerValue(env, matA); + vecX_native = (cusparseConstDnVecDescr_t)getNativePointerValue(env, vecX); + PointerData *beta_pointerData = initPointerData(env, beta); + if (beta_pointerData == nullptr) { + return CUJAVA_CUSPARSE_INTERNAL_ERROR; + } + beta_native = (void *)beta_pointerData->getPointer(env); + vecY_native = (cusparseDnVecDescr_t)getNativePointerValue(env, vecY); + computeType_native = (cudaDataType)computeType; + alg_native = (cusparseSpMVAlg_t)alg; + externalBuffer_native = (void *)getPointer(env, externalBuffer); + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseSpMV(handle_native, opA_native, alpha_native, matA_native, vecX_native, beta_native, vecY_native, computeType_native, alg_native, externalBuffer_native); + + if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + if (!releasePointerData(env, beta_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDestroyNative(JNIEnv *env, jclass cls, jobject handle) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseDestroy"); + + Logger::log(LOG_TRACE, "Executing cusparseDestroy(handle=%p)\n", handle); + + // Declare native variables + cusparseHandle_t handle_native; + + // Copy Java inputs into native locals + handle_native = (cusparseHandle_t)getNativePointerValue(env, handle); + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseDestroy(handle_native); + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDestroyDnVecNative(JNIEnv *env, jclass cls, jobject dnVecDescr) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, dnVecDescr, "dnVecDescr", "cusparseDestroyDnVec"); + + Logger::log(LOG_TRACE, "Executing cusparseDestroyDnVec(dnVecDescr=%p)\n", dnVecDescr); + + // Declare native variables + cusparseConstDnVecDescr_t dnVecDescr_native; + + // Copy Java inputs into native locals + dnVecDescr_native = (cusparseConstDnVecDescr_t)getNativePointerValue(env, dnVecDescr); + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseDestroyDnVec(dnVecDescr_native); + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDestroyDnMatNative(JNIEnv *env, jclass cls, jobject dnMatDescr) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, dnMatDescr, "dnMatDescr", "cusparseDestroyDnMat"); + + Logger::log(LOG_TRACE, "Executing cusparseDestroyDnMat(dnMatDescr=%p)\n", dnMatDescr); + + // Declare native variables + cusparseConstDnMatDescr_t dnMatDescr_native; + + // Copy Java inputs into native locals + dnMatDescr_native = (cusparseConstDnMatDescr_t)getNativePointerValue(env, dnMatDescr); + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseDestroyDnMat(dnMatDescr_native); + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDestroySpMatNative(JNIEnv *env, jclass cls, jobject spMatDescr) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, spMatDescr, "spMatDescr", "cusparseDestroySpMat"); + + Logger::log(LOG_TRACE, "Executing cusparseDestroySpMat(spMatDescr=%p)\n", spMatDescr); + + // Declare native variables + cusparseConstSpMatDescr_t spMatDescr_native; + + // Copy Java inputs into native locals + spMatDescr_native = (cusparseConstSpMatDescr_t)getNativePointerValue(env, spMatDescr); + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseDestroySpMat(spMatDescr_native); + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpMMNative +(JNIEnv *env, jclass cls, jobject handle, jint opA, jint opB, jobject alpha, jobject matA, jobject matB, jobject beta, jobject matC, jint computeType, jint alg, jobject externalBuffer) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseSpMM"); + CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cusparseSpMM"); + CUJAVA_REQUIRE_NONNULL(env, matA, "matA", "cusparseSpMM"); + CUJAVA_REQUIRE_NONNULL(env, matB, "matB", "cusparseSpMM"); + CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cusparseSpMM"); + CUJAVA_REQUIRE_NONNULL(env, matC, "matC", "cusparseSpMM"); + + Logger::log(LOG_TRACE, "Executing cusparseSpMM(handle=%p, opA=%d, opB=%d, alpha=%p, matA=%p, matB=%p, beta=%p, matC=%p, computeType=%d, alg=%d, externalBuffer=%p)\n", + handle, opA, opB, alpha, matA, matB, beta, matC, computeType, alg, externalBuffer); + + // Declare native variables + cusparseHandle_t handle_native; + cusparseOperation_t opA_native; + cusparseOperation_t opB_native; + void * alpha_native = nullptr; + cusparseConstSpMatDescr_t matA_native; + cusparseConstDnMatDescr_t matB_native; + void * beta_native = nullptr; + cusparseDnMatDescr_t matC_native; + cudaDataType computeType_native; + cusparseSpMMAlg_t alg_native; + void * externalBuffer_native = nullptr; + + // Copy Java inputs into native locals + handle_native = (cusparseHandle_t)getNativePointerValue(env, handle); + opA_native = (cusparseOperation_t)opA; + opB_native = (cusparseOperation_t)opB; + PointerData *alpha_pointerData = initPointerData(env, alpha); + if (alpha_pointerData == nullptr) { + return CUJAVA_CUSPARSE_INTERNAL_ERROR; + } + alpha_native = (void *)alpha_pointerData->getPointer(env); + matA_native = (cusparseConstSpMatDescr_t)getNativePointerValue(env, matA); + matB_native = (cusparseConstDnMatDescr_t)getNativePointerValue(env, matB); + PointerData *beta_pointerData = initPointerData(env, beta); + if (beta_pointerData == nullptr) { + return CUJAVA_CUSPARSE_INTERNAL_ERROR; + } + beta_native = (void *)beta_pointerData->getPointer(env); + matC_native = (cusparseDnMatDescr_t)getNativePointerValue(env, matC); + computeType_native = (cudaDataType)computeType; + alg_native = (cusparseSpMMAlg_t)alg; + externalBuffer_native = (void *)getPointer(env, externalBuffer); + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseSpMM(handle_native, opA_native, opB_native, alpha_native, matA_native, + matB_native, beta_native, matC_native, computeType_native, alg_native, externalBuffer_native); + + if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + if (!releasePointerData(env, beta_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpMM_1bufferSizeNative + (JNIEnv *env, jclass cls, jobject handle, jint opA, jint opB, jobject alpha, jobject matA, jobject matB, jobject beta, + jobject matC, jint computeType, jint alg, jlongArray bufferSize) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseSpMM_bufferSize"); + CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cusparseSpMM_bufferSize"); + CUJAVA_REQUIRE_NONNULL(env, matA, "matA", "cusparseSpMM_bufferSize"); + CUJAVA_REQUIRE_NONNULL(env, matB, "matB", "cusparseSpMM_bufferSize"); + CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cusparseSpMM_bufferSize"); + CUJAVA_REQUIRE_NONNULL(env, matC, "matC", "cusparseSpMM_bufferSize"); + CUJAVA_REQUIRE_NONNULL(env, bufferSize, "bufferSize", "cusparseSpMM_bufferSize"); + + Logger::log(LOG_TRACE, "Executing cusparseSpMM_bufferSize(handle=%p, opA=%d, opB=%d, alpha=%p, matA=%p, matB=%p, beta=%p, matC=%p, computeType=%d, alg=%d, bufferSize=%p)\n", + handle, opA, opB, alpha, matA, matB, beta, matC, computeType, alg, bufferSize); + + // Declare native variables + cusparseHandle_t handle_native; + cusparseOperation_t opA_native; + cusparseOperation_t opB_native; + void * alpha_native = nullptr; + cusparseConstSpMatDescr_t matA_native; + cusparseConstDnMatDescr_t matB_native; + void * beta_native = nullptr; + cusparseDnMatDescr_t matC_native; + cudaDataType computeType_native; + cusparseSpMMAlg_t alg_native; + size_t * bufferSize_native = nullptr; + + // Copy Java inputs into native locals + handle_native = (cusparseHandle_t)getNativePointerValue(env, handle); + opA_native = (cusparseOperation_t)opA; + opB_native = (cusparseOperation_t)opB; + PointerData *alpha_pointerData = initPointerData(env, alpha); + if (alpha_pointerData == nullptr) + { + return CUJAVA_CUSPARSE_INTERNAL_ERROR; + } + alpha_native = (void *)alpha_pointerData->getPointer(env); + matA_native = (cusparseConstSpMatDescr_t)getNativePointerValue(env, matA); + matB_native = (cusparseConstDnMatDescr_t)getNativePointerValue(env, matB); + PointerData *beta_pointerData = initPointerData(env, beta); + if (beta_pointerData == nullptr) { + return CUJAVA_CUSPARSE_INTERNAL_ERROR; + } + beta_native = (void *)beta_pointerData->getPointer(env); + matC_native = (cusparseDnMatDescr_t)getNativePointerValue(env, matC); + computeType_native = (cudaDataType)computeType; + alg_native = (cusparseSpMMAlg_t)alg; + if (!initNative(env, bufferSize, bufferSize_native, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseSpMM_bufferSize(handle_native, opA_native, opB_native, alpha_native, + matA_native, matB_native, beta_native, matC_native, computeType_native, alg_native, bufferSize_native); + + if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + if (!releasePointerData(env, beta_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + if (!releaseNative(env, bufferSize_native, bufferSize, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCreateDnMatNative + (JNIEnv *env, jclass cls, jobject dnMatDescr, jlong rows, jlong cols, jlong ld, jobject values, jint valueType, jint order) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, dnMatDescr, "dnMatDescr", "cusparseCreateDnMat"); + CUJAVA_REQUIRE_NONNULL(env, values, "values", "cusparseCreateDnMat"); + + Logger::log(LOG_TRACE, "Executing cusparseCreateDnMat(dnMatDescr=%p, rows=%ld, cols=%ld, ld=%ld, values=%p, valueType=%d, order=%d)\n", + dnMatDescr, rows, cols, ld, values, valueType, order); + + // Declare native variables + cusparseDnMatDescr_t dnMatDescr_native; + int64_t rows_native = 0; + int64_t cols_native = 0; + int64_t ld_native = 0; + void * values_native = nullptr; + cudaDataType valueType_native; + cusparseOrder_t order_native; + + // Copy Java inputs into native locals + rows_native = (int64_t)rows; + cols_native = (int64_t)cols; + ld_native = (int64_t)ld; + values_native = (void *)getPointer(env, values); + valueType_native = (cudaDataType)valueType; + order_native = (cusparseOrder_t)order; + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseCreateDnMat(&dnMatDescr_native, rows_native, cols_native, ld_native, + values_native, valueType_native, order_native); + setNativePointerValue(env, dnMatDescr, (jlong)dnMatDescr_native); + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCsrSetPointersNative + (JNIEnv *env, jclass cls, jobject spMatDescr, jobject csrRowOffsets, jobject csrColInd, jobject csrValues) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, spMatDescr, "spMatDescr", "cusparseCsrSetPointers"); + CUJAVA_REQUIRE_NONNULL(env, csrRowOffsets, "csrRowOffsets", "cusparseCsrSetPointers"); + CUJAVA_REQUIRE_NONNULL(env, csrColInd, "csrColInd", "cusparseCsrSetPointers"); + CUJAVA_REQUIRE_NONNULL(env, csrValues, "csrValues", "cusparseCsrSetPointers"); + + Logger::log(LOG_TRACE, "Executing cusparseCsrSetPointers(spMatDescr=%p, csrRowOffsets=%p, csrColInd=%p, csrValues=%p)\n", + spMatDescr, csrRowOffsets, csrColInd, csrValues); + + // Declare native variables + cusparseSpMatDescr_t spMatDescr_native; + void * csrRowOffsets_native = nullptr; + void * csrColInd_native = nullptr; + void * csrValues_native = nullptr; + + // Copy Java inputs into native locals + spMatDescr_native = (cusparseSpMatDescr_t)getNativePointerValue(env, spMatDescr); + csrRowOffsets_native = (void *)getPointer(env, csrRowOffsets); + csrColInd_native = (void *)getPointer(env, csrColInd); + csrValues_native = (void *)getPointer(env, csrValues); + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseCsrSetPointers(spMatDescr_native, csrRowOffsets_native, csrColInd_native, csrValues_native); + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCsr2cscEx2Native + (JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jint nnz, jobject csrVal, jobject csrRowPtr, + jobject csrColInd, jobject cscVal, jobject cscColPtr, jobject cscRowInd, jint valType, jint copyValues, jint idxBase, jint alg, jobject buffer) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseCsr2cscEx2"); + CUJAVA_REQUIRE_NONNULL(env, csrVal, "csrVal", "cusparseCsr2cscEx2"); + CUJAVA_REQUIRE_NONNULL(env, csrRowPtr, "csrRowPtr", "cusparseCsr2cscEx2"); + CUJAVA_REQUIRE_NONNULL(env, csrColInd, "csrColInd", "cusparseCsr2cscEx2"); + CUJAVA_REQUIRE_NONNULL(env, cscVal, "cscVal", "cusparseCsr2cscEx2"); + CUJAVA_REQUIRE_NONNULL(env, cscColPtr, "cscColPtr", "cusparseCsr2cscEx2"); + CUJAVA_REQUIRE_NONNULL(env, cscRowInd, "cscRowInd", "cusparseCsr2cscEx2"); + CUJAVA_REQUIRE_NONNULL(env, buffer, "buffer", "cusparseCsr2cscEx2"); + + Logger::log(LOG_TRACE, "Executing cusparseCsr2cscEx2(handle=%p, m=%d, n=%d, nnz=%d, csrVal=%p, csrRowPtr=%p, csrColInd=%p, cscVal=%p, cscColPtr=%p, cscRowInd=%p, valType=%d, copyValues=%d, idxBase=%d, alg=%d, buffer=%p)\n", + handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, cscVal, cscColPtr, cscRowInd, valType, copyValues, idxBase, alg, buffer); + + // Declare native variables + cusparseHandle_t handle_native; + int m_native = 0; + int n_native = 0; + int nnz_native = 0; + void * csrVal_native = nullptr; + int * csrRowPtr_native = nullptr; + int * csrColInd_native = nullptr; + void * cscVal_native = nullptr; + int * cscColPtr_native = nullptr; + int * cscRowInd_native = nullptr; + cudaDataType valType_native; + cusparseAction_t copyValues_native; + cusparseIndexBase_t idxBase_native; + cusparseCsr2CscAlg_t alg_native; + void * buffer_native = nullptr; + + // Copy Java inputs into native locals + handle_native = (cusparseHandle_t)getNativePointerValue(env, handle); + m_native = (int)m; + n_native = (int)n; + nnz_native = (int)nnz; + csrVal_native = (void *)getPointer(env, csrVal); + csrRowPtr_native = (int *)getPointer(env, csrRowPtr); + csrColInd_native = (int *)getPointer(env, csrColInd); + cscVal_native = (void *)getPointer(env, cscVal); + cscColPtr_native = (int *)getPointer(env, cscColPtr); + cscRowInd_native = (int *)getPointer(env, cscRowInd); + valType_native = (cudaDataType)valType; + copyValues_native = (cusparseAction_t)copyValues; + idxBase_native = (cusparseIndexBase_t)idxBase; + alg_native = (cusparseCsr2CscAlg_t)alg; + buffer_native = (void *)getPointer(env, buffer); + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseCsr2cscEx2(handle_native, m_native, n_native, nnz_native, + csrVal_native, csrRowPtr_native, csrColInd_native, cscVal_native, cscColPtr_native, cscRowInd_native, + valType_native, copyValues_native, idxBase_native, alg_native, buffer_native); + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCsr2cscEx2_1bufferSizeNative + (JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jint nnz, jobject csrVal, jobject csrRowPtr, jobject csrColInd, + jobject cscVal, jobject cscColPtr, jobject cscRowInd, jint valType, jint copyValues, jint idxBase, jint alg, jlongArray bufferSize) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseCsr2cscEx2_bufferSize"); + CUJAVA_REQUIRE_NONNULL(env, csrVal, "csrVal", "cusparseCsr2cscEx2_bufferSize"); + CUJAVA_REQUIRE_NONNULL(env, csrRowPtr, "csrRowPtr", "cusparseCsr2cscEx2_bufferSize"); + CUJAVA_REQUIRE_NONNULL(env, csrColInd, "csrColInd", "cusparseCsr2cscEx2_bufferSize"); + CUJAVA_REQUIRE_NONNULL(env, cscVal, "cscVal", "cusparseCsr2cscEx2_bufferSize"); + CUJAVA_REQUIRE_NONNULL(env, cscColPtr, "cscColPtr", "cusparseCsr2cscEx2_bufferSize"); + CUJAVA_REQUIRE_NONNULL(env, cscRowInd, "cscRowInd", "cusparseCsr2cscEx2_bufferSize"); + CUJAVA_REQUIRE_NONNULL(env, bufferSize, "bufferSize", "cusparseCsr2cscEx2_bufferSize"); + + Logger::log(LOG_TRACE, "Executing cusparseCsr2cscEx2_bufferSize(handle=%p, m=%d, n=%d, nnz=%d, csrVal=%p, csrRowPtr=%p, csrColInd=%p, cscVal=%p, cscColPtr=%p, cscRowInd=%p, valType=%d, copyValues=%d, idxBase=%d, alg=%d, bufferSize=%p)\n", + handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, cscVal, cscColPtr, cscRowInd, valType, copyValues, idxBase, alg, bufferSize); + + // Declare native variables + cusparseHandle_t handle_native; + int m_native = 0; + int n_native = 0; + int nnz_native = 0; + void * csrVal_native = nullptr; + int * csrRowPtr_native = nullptr; + int * csrColInd_native = nullptr; + void * cscVal_native = nullptr; + int * cscColPtr_native = nullptr; + int * cscRowInd_native = nullptr; + cudaDataType valType_native; + cusparseAction_t copyValues_native; + cusparseIndexBase_t idxBase_native; + cusparseCsr2CscAlg_t alg_native; + size_t * bufferSize_native = nullptr; + + // Copy Java inputs into native locals + handle_native = (cusparseHandle_t)getNativePointerValue(env, handle); + m_native = (int)m; + n_native = (int)n; + nnz_native = (int)nnz; + csrVal_native = (void *)getPointer(env, csrVal); + csrRowPtr_native = (int *)getPointer(env, csrRowPtr); + csrColInd_native = (int *)getPointer(env, csrColInd); + cscVal_native = (void *)getPointer(env, cscVal); + cscColPtr_native = (int *)getPointer(env, cscColPtr); + cscRowInd_native = (int *)getPointer(env, cscRowInd); + valType_native = (cudaDataType)valType; + copyValues_native = (cusparseAction_t)copyValues; + idxBase_native = (cusparseIndexBase_t)idxBase; + alg_native = (cusparseCsr2CscAlg_t)alg; + if (!initNative(env, bufferSize, bufferSize_native, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseCsr2cscEx2_bufferSize + (handle_native, m_native, n_native, nnz_native, csrVal_native, csrRowPtr_native, csrColInd_native, cscVal_native, + cscColPtr_native, cscRowInd_native, valType_native, copyValues_native, idxBase_native, alg_native, bufferSize_native); + if (!releaseNative(env, bufferSize_native, bufferSize, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDcsrgeam2Native + (JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jobject alpha, jobject descrA, jint nnzA, jobject csrSortedValA, + jobject csrSortedRowPtrA, jobject csrSortedColIndA, jobject beta, jobject descrB, jint nnzB, jobject csrSortedValB, + jobject csrSortedRowPtrB, jobject csrSortedColIndB, jobject descrC, jobject csrSortedValC, jobject csrSortedRowPtrC, jobject csrSortedColIndC, jobject pBuffer) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseDcsrgeam2"); + CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cusparseDcsrgeam2"); + CUJAVA_REQUIRE_NONNULL(env, descrA, "descrA", "cusparseDcsrgeam2"); + CUJAVA_REQUIRE_NONNULL(env, csrSortedValA, "csrSortedValA", "cusparseDcsrgeam2"); + CUJAVA_REQUIRE_NONNULL(env, csrSortedRowPtrA, "csrSortedRowPtrA", "cusparseDcsrgeam2"); + CUJAVA_REQUIRE_NONNULL(env, csrSortedColIndA, "csrSortedColIndA", "cusparseDcsrgeam2"); + CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cusparseDcsrgeam2"); + CUJAVA_REQUIRE_NONNULL(env, descrB, "descrB", "cusparseDcsrgeam2"); + CUJAVA_REQUIRE_NONNULL(env, csrSortedValB, "csrSortedValB", "cusparseDcsrgeam2"); + CUJAVA_REQUIRE_NONNULL(env, csrSortedRowPtrB, "csrSortedRowPtrB", "cusparseDcsrgeam2"); + CUJAVA_REQUIRE_NONNULL(env, csrSortedColIndB, "csrSortedColIndB", "cusparseDcsrgeam2"); + CUJAVA_REQUIRE_NONNULL(env, descrC, "descrC", "cusparseDcsrgeam2"); + CUJAVA_REQUIRE_NONNULL(env, csrSortedValC, "csrSortedValC", "cusparseDcsrgeam2"); + CUJAVA_REQUIRE_NONNULL(env, csrSortedRowPtrC, "csrSortedRowPtrC", "cusparseDcsrgeam2"); + CUJAVA_REQUIRE_NONNULL(env, csrSortedColIndC, "csrSortedColIndC", "cusparseDcsrgeam2"); + CUJAVA_REQUIRE_NONNULL(env, pBuffer, "pBuffer", "cusparseDcsrgeam2"); + + Logger::log(LOG_TRACE, "Executing cusparseDcsrgeam2(handle=%p, m=%d, n=%d, alpha=%p, descrA=%p, nnzA=%d, csrSortedValA=%p, csrSortedRowPtrA=%p, csrSortedColIndA=%p, beta=%p, descrB=%p, nnzB=%d, csrSortedValB=%p, csrSortedRowPtrB=%p, csrSortedColIndB=%p, descrC=%p, csrSortedValC=%p, csrSortedRowPtrC=%p, csrSortedColIndC=%p, pBuffer=%p)\n", + handle, m, n, alpha, descrA, nnzA, csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer); + + // Declare native variables + cusparseHandle_t handle_native; + int m_native = 0; + int n_native = 0; + double * alpha_native = nullptr; + cusparseMatDescr_t descrA_native; + int nnzA_native = 0; + double * csrSortedValA_native = nullptr; + int * csrSortedRowPtrA_native = nullptr; + int * csrSortedColIndA_native = nullptr; + double * beta_native = nullptr; + cusparseMatDescr_t descrB_native; + int nnzB_native = 0; + double * csrSortedValB_native = nullptr; + int * csrSortedRowPtrB_native = nullptr; + int * csrSortedColIndB_native = nullptr; + cusparseMatDescr_t descrC_native; + double * csrSortedValC_native = nullptr; + int * csrSortedRowPtrC_native = nullptr; + int * csrSortedColIndC_native = nullptr; + void * pBuffer_native = nullptr; + + // Copy Java inputs into native locals + handle_native = (cusparseHandle_t)getNativePointerValue(env, handle); + m_native = (int)m; + n_native = (int)n; + PointerData *alpha_pointerData = initPointerData(env, alpha); + if (alpha_pointerData == nullptr) { + return CUJAVA_CUSPARSE_INTERNAL_ERROR; + } + alpha_native = (double *)alpha_pointerData->getPointer(env); + descrA_native = (cusparseMatDescr_t)getNativePointerValue(env, descrA); + nnzA_native = (int)nnzA; + csrSortedValA_native = (double *)getPointer(env, csrSortedValA); + csrSortedRowPtrA_native = (int *)getPointer(env, csrSortedRowPtrA); + csrSortedColIndA_native = (int *)getPointer(env, csrSortedColIndA); + PointerData *beta_pointerData = initPointerData(env, beta); + if (beta_pointerData == nullptr) { + return CUJAVA_CUSPARSE_INTERNAL_ERROR; + } + beta_native = (double *)beta_pointerData->getPointer(env); + descrB_native = (cusparseMatDescr_t)getNativePointerValue(env, descrB); + nnzB_native = (int)nnzB; + csrSortedValB_native = (double *)getPointer(env, csrSortedValB); + csrSortedRowPtrB_native = (int *)getPointer(env, csrSortedRowPtrB); + csrSortedColIndB_native = (int *)getPointer(env, csrSortedColIndB); + descrC_native = (cusparseMatDescr_t)getNativePointerValue(env, descrC); + csrSortedValC_native = (double *)getPointer(env, csrSortedValC); + csrSortedRowPtrC_native = (int *)getPointer(env, csrSortedRowPtrC); + csrSortedColIndC_native = (int *)getPointer(env, csrSortedColIndC); + pBuffer_native = (void *)getPointer(env, pBuffer); + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseDcsrgeam2(handle_native, m_native, n_native, alpha_native, descrA_native, + nnzA_native, csrSortedValA_native, csrSortedRowPtrA_native, csrSortedColIndA_native, beta_native, descrB_native, + nnzB_native, csrSortedValB_native, csrSortedRowPtrB_native, csrSortedColIndB_native, descrC_native, csrSortedValC_native, + csrSortedRowPtrC_native, csrSortedColIndC_native, pBuffer_native); + + if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + if (!releasePointerData(env, beta_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDcsrgeam2_1bufferSizeExtNative + (JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jobject alpha, jobject descrA, jint nnzA, jobject csrSortedValA, + jobject csrSortedRowPtrA, jobject csrSortedColIndA, jobject beta, jobject descrB, jint nnzB, jobject csrSortedValB, jobject csrSortedRowPtrB, + jobject csrSortedColIndB, jobject descrC, jobject csrSortedValC, jobject csrSortedRowPtrC, jobject csrSortedColIndC, jlongArray pBufferSizeInBytes) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseDcsrgeam2_bufferSizeExt"); + CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cusparseDcsrgeam2_bufferSizeExt"); + CUJAVA_REQUIRE_NONNULL(env, descrA, "descrA", "cusparseDcsrgeam2_bufferSizeExt"); + CUJAVA_REQUIRE_NONNULL(env, csrSortedValA, "csrSortedValA", "cusparseDcsrgeam2_bufferSizeExt"); + CUJAVA_REQUIRE_NONNULL(env, csrSortedRowPtrA, "csrSortedRowPtrA", "cusparseDcsrgeam2_bufferSizeExt"); + CUJAVA_REQUIRE_NONNULL(env, csrSortedColIndA, "csrSortedColIndA", "cusparseDcsrgeam2_bufferSizeExt"); + CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cusparseDcsrgeam2_bufferSizeExt"); + CUJAVA_REQUIRE_NONNULL(env, descrB, "descrB", "cusparseDcsrgeam2_bufferSizeExt"); + CUJAVA_REQUIRE_NONNULL(env, csrSortedValB, "csrSortedValB", "cusparseDcsrgeam2_bufferSizeExt"); + CUJAVA_REQUIRE_NONNULL(env, csrSortedRowPtrB, "csrSortedRowPtrB", "cusparseDcsrgeam2_bufferSizeExt"); + CUJAVA_REQUIRE_NONNULL(env, csrSortedColIndB, "csrSortedColIndB", "cusparseDcsrgeam2_bufferSizeExt"); + CUJAVA_REQUIRE_NONNULL(env, descrC, "descrC", "cusparseDcsrgeam2_bufferSizeExt"); + CUJAVA_REQUIRE_NONNULL(env, csrSortedValC, "csrSortedValC", "cusparseDcsrgeam2_bufferSizeExt"); + CUJAVA_REQUIRE_NONNULL(env, csrSortedRowPtrC, "csrSortedRowPtrC", "cusparseDcsrgeam2_bufferSizeExt"); + CUJAVA_REQUIRE_NONNULL(env, csrSortedColIndC, "csrSortedColIndC", "cusparseDcsrgeam2_bufferSizeExt"); + CUJAVA_REQUIRE_NONNULL(env, pBufferSizeInBytes, "pBufferSizeInBytes", "cusparseDcsrgeam2_bufferSizeExt"); + + Logger::log(LOG_TRACE, "Executing cusparseDcsrgeam2_bufferSizeExt(handle=%p, m=%d, n=%d, alpha=%p, descrA=%p, nnzA=%d, csrSortedValA=%p, csrSortedRowPtrA=%p, csrSortedColIndA=%p, beta=%p, descrB=%p, nnzB=%d, csrSortedValB=%p, csrSortedRowPtrB=%p, csrSortedColIndB=%p, descrC=%p, csrSortedValC=%p, csrSortedRowPtrC=%p, csrSortedColIndC=%p, pBufferSizeInBytes=%p)\n", + handle, m, n, alpha, descrA, nnzA, csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes); + + // Declare native variables + cusparseHandle_t handle_native; + int m_native = 0; + int n_native = 0; + double * alpha_native = nullptr; + cusparseMatDescr_t descrA_native; + int nnzA_native = 0; + double * csrSortedValA_native = nullptr; + int * csrSortedRowPtrA_native = nullptr; + int * csrSortedColIndA_native = nullptr; + double * beta_native = nullptr; + cusparseMatDescr_t descrB_native; + int nnzB_native = 0; + double * csrSortedValB_native = nullptr; + int * csrSortedRowPtrB_native = nullptr; + int * csrSortedColIndB_native = nullptr; + cusparseMatDescr_t descrC_native; + double * csrSortedValC_native = nullptr; + int * csrSortedRowPtrC_native = nullptr; + int * csrSortedColIndC_native = nullptr; + size_t * pBufferSizeInBytes_native = nullptr; + + // Copy Java inputs into native locals + handle_native = (cusparseHandle_t)getNativePointerValue(env, handle); + m_native = (int)m; + n_native = (int)n; + PointerData *alpha_pointerData = initPointerData(env, alpha); + if (alpha_pointerData == nullptr) { + return CUJAVA_CUSPARSE_INTERNAL_ERROR; + } + alpha_native = (double *)alpha_pointerData->getPointer(env); + descrA_native = (cusparseMatDescr_t)getNativePointerValue(env, descrA); + nnzA_native = (int)nnzA; + csrSortedValA_native = (double *)getPointer(env, csrSortedValA); + csrSortedRowPtrA_native = (int *)getPointer(env, csrSortedRowPtrA); + csrSortedColIndA_native = (int *)getPointer(env, csrSortedColIndA); + PointerData *beta_pointerData = initPointerData(env, beta); + if (beta_pointerData == nullptr) { + return CUJAVA_CUSPARSE_INTERNAL_ERROR; + } + beta_native = (double *)beta_pointerData->getPointer(env); + descrB_native = (cusparseMatDescr_t)getNativePointerValue(env, descrB); + nnzB_native = (int)nnzB; + csrSortedValB_native = (double *)getPointer(env, csrSortedValB); + csrSortedRowPtrB_native = (int *)getPointer(env, csrSortedRowPtrB); + csrSortedColIndB_native = (int *)getPointer(env, csrSortedColIndB); + descrC_native = (cusparseMatDescr_t)getNativePointerValue(env, descrC); + csrSortedValC_native = (double *)getPointer(env, csrSortedValC); + csrSortedRowPtrC_native = (int *)getPointer(env, csrSortedRowPtrC); + csrSortedColIndC_native = (int *)getPointer(env, csrSortedColIndC); + if (!initNative(env, pBufferSizeInBytes, pBufferSizeInBytes_native, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseDcsrgeam2_bufferSizeExt(handle_native, m_native, n_native, alpha_native, + descrA_native, nnzA_native, csrSortedValA_native, csrSortedRowPtrA_native, csrSortedColIndA_native, beta_native, + descrB_native, nnzB_native, csrSortedValB_native, csrSortedRowPtrB_native, csrSortedColIndB_native, descrC_native, + csrSortedValC_native, csrSortedRowPtrC_native, csrSortedColIndC_native, pBufferSizeInBytes_native); + + if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + if (!releasePointerData(env, beta_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + if (!releaseNative(env, pBufferSizeInBytes_native, pBufferSizeInBytes, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSparseToDenseNative + (JNIEnv *env, jclass cls, jobject handle, jobject matA, jobject matB, jint alg, jobject externalBuffer) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseSparseToDense"); + CUJAVA_REQUIRE_NONNULL(env, matA, "matA", "cusparseSparseToDense"); + CUJAVA_REQUIRE_NONNULL(env, matB, "matB", "cusparseSparseToDense"); + + Logger::log(LOG_TRACE, "Executing cusparseSparseToDense(handle=%p, matA=%p, matB=%p, alg=%d, externalBuffer=%p)\n", + handle, matA, matB, alg, externalBuffer); + + // Declare native variables + cusparseHandle_t handle_native; + cusparseConstSpMatDescr_t matA_native; + cusparseDnMatDescr_t matB_native; + cusparseSparseToDenseAlg_t alg_native; + void * externalBuffer_native = nullptr; + + // Copy Java inputs into native locals + handle_native = (cusparseHandle_t)getNativePointerValue(env, handle); + matA_native = (cusparseConstSpMatDescr_t)getNativePointerValue(env, matA); + matB_native = (cusparseDnMatDescr_t)getNativePointerValue(env, matB); + alg_native = (cusparseSparseToDenseAlg_t)alg; + externalBuffer_native = (void *)getPointer(env, externalBuffer); + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseSparseToDense(handle_native, matA_native, matB_native, alg_native, externalBuffer_native); + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSparseToDense_1bufferSizeNative + (JNIEnv *env, jclass cls, jobject handle, jobject matA, jobject matB, jint alg, jlongArray bufferSize) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseSparseToDense_bufferSize"); + CUJAVA_REQUIRE_NONNULL(env, matA, "matA", "cusparseSparseToDense_bufferSize"); + CUJAVA_REQUIRE_NONNULL(env, matB, "matB", "cusparseSparseToDense_bufferSize"); + CUJAVA_REQUIRE_NONNULL(env, bufferSize, "bufferSize", "cusparseSparseToDense_bufferSize"); + + Logger::log(LOG_TRACE, "Executing cusparseSparseToDense_bufferSize(handle=%p, matA=%p, matB=%p, alg=%d, bufferSize=%p)\n", + handle, matA, matB, alg, bufferSize); + + // Declare native variables + cusparseHandle_t handle_native; + cusparseConstSpMatDescr_t matA_native; + cusparseDnMatDescr_t matB_native; + cusparseSparseToDenseAlg_t alg_native; + size_t * bufferSize_native = nullptr; + + // Copy Java inputs into native locals + handle_native = (cusparseHandle_t)getNativePointerValue(env, handle); + matA_native = (cusparseConstSpMatDescr_t)getNativePointerValue(env, matA); + matB_native = (cusparseDnMatDescr_t)getNativePointerValue(env, matB); + alg_native = (cusparseSparseToDenseAlg_t)alg; + if (!initNative(env, bufferSize, bufferSize_native, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseSparseToDense_bufferSize(handle_native, matA_native, matB_native, alg_native, bufferSize_native); + + if (!releaseNative(env, bufferSize_native, bufferSize, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDenseToSparse_1bufferSizeNative + (JNIEnv *env, jclass cls, jobject handle, jobject matA, jobject matB, jint alg, jlongArray bufferSize) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseDenseToSparse_bufferSize"); + CUJAVA_REQUIRE_NONNULL(env, matA, "matA", "cusparseDenseToSparse_bufferSize"); + CUJAVA_REQUIRE_NONNULL(env, matB, "matB", "cusparseDenseToSparse_bufferSize"); + CUJAVA_REQUIRE_NONNULL(env, bufferSize, "bufferSize", "cusparseDenseToSparse_bufferSize"); + + Logger::log(LOG_TRACE, "Executing cusparseDenseToSparse_bufferSize(handle=%p, matA=%p, matB=%p, alg=%d, bufferSize=%p)\n", handle, matA, matB, alg, bufferSize); + + // Declare native variables + cusparseHandle_t handle_native; + cusparseConstDnMatDescr_t matA_native; + cusparseSpMatDescr_t matB_native; + cusparseDenseToSparseAlg_t alg_native; + size_t * bufferSize_native = nullptr; + + // Copy Java inputs into native locals + handle_native = (cusparseHandle_t)getNativePointerValue(env, handle); + matA_native = (cusparseConstDnMatDescr_t)getNativePointerValue(env, matA); + matB_native = (cusparseSpMatDescr_t)getNativePointerValue(env, matB); + alg_native = (cusparseDenseToSparseAlg_t)alg; + if (!initNative(env, bufferSize, bufferSize_native, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseDenseToSparse_bufferSize(handle_native, matA_native, matB_native, alg_native, bufferSize_native); + + if (!releaseNative(env, bufferSize_native, bufferSize, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDenseToSparse_1analysisNative + (JNIEnv *env, jclass cls, jobject handle, jobject matA, jobject matB, jint alg, jobject externalBuffer) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseDenseToSparse_analysis"); + CUJAVA_REQUIRE_NONNULL(env, matA, "matA", "cusparseDenseToSparse_analysis"); + CUJAVA_REQUIRE_NONNULL(env, matB, "matB", "cusparseDenseToSparse_analysis"); + + Logger::log(LOG_TRACE, "Executing cusparseDenseToSparse_analysis(handle=%p, matA=%p, matB=%p, alg=%d, externalBuffer=%p)\n", + handle, matA, matB, alg, externalBuffer); + + // Declare native variables + cusparseHandle_t handle_native; + cusparseConstDnMatDescr_t matA_native; + cusparseSpMatDescr_t matB_native; + cusparseDenseToSparseAlg_t alg_native; + void * externalBuffer_native = nullptr; + + // Copy Java inputs into native locals + handle_native = (cusparseHandle_t)getNativePointerValue(env, handle); + matA_native = (cusparseConstDnMatDescr_t)getNativePointerValue(env, matA); + matB_native = (cusparseSpMatDescr_t)getNativePointerValue(env, matB); + alg_native = (cusparseDenseToSparseAlg_t)alg; + externalBuffer_native = (void *)getPointer(env, externalBuffer); + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseDenseToSparse_analysis(handle_native, matA_native, matB_native, alg_native, externalBuffer_native); + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDenseToSparse_1convertNative + (JNIEnv *env, jclass cls, jobject handle, jobject matA, jobject matB, jint alg, jobject externalBuffer) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseDenseToSparse_convert"); + CUJAVA_REQUIRE_NONNULL(env, matA, "matA", "cusparseDenseToSparse_convert"); + CUJAVA_REQUIRE_NONNULL(env, matB, "matB", "cusparseDenseToSparse_convert"); + + Logger::log(LOG_TRACE, "Executing cusparseDenseToSparse_convert(handle=%p, matA=%p, matB=%p, alg=%d, externalBuffer=%p)\n", + handle, matA, matB, alg, externalBuffer); + + // Declare native variables + cusparseHandle_t handle_native; + cusparseConstDnMatDescr_t matA_native; + cusparseSpMatDescr_t matB_native; + cusparseDenseToSparseAlg_t alg_native; + void * externalBuffer_native = nullptr; + + // Copy Java inputs into native locals + handle_native = (cusparseHandle_t)getNativePointerValue(env, handle); + matA_native = (cusparseConstDnMatDescr_t)getNativePointerValue(env, matA); + matB_native = (cusparseSpMatDescr_t)getNativePointerValue(env, matB); + alg_native = (cusparseDenseToSparseAlg_t)alg; + externalBuffer_native = (void *)getPointer(env, externalBuffer); + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseDenseToSparse_convert(handle_native, matA_native, matB_native, alg_native, externalBuffer_native); + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDnnzNative + (JNIEnv *env, jclass cls, jobject handle, jint dirA, jint m, jint n, jobject descrA, jobject A, jint lda, jobject nnzPerRowCol, jobject nnzTotalDevHostPtr) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseDnnz"); + CUJAVA_REQUIRE_NONNULL(env, descrA, "descrA", "cusparseDnnz"); + CUJAVA_REQUIRE_NONNULL(env, A, "A", "cusparseDnnz"); + CUJAVA_REQUIRE_NONNULL(env, nnzPerRowCol, "nnzPerRowCol", "cusparseDnnz"); + CUJAVA_REQUIRE_NONNULL(env, nnzTotalDevHostPtr, "nnzTotalDevHostPtr", "cusparseDnnz"); + + Logger::log(LOG_TRACE, "Executing cusparseDnnz(handle=%p, dirA=%d, m=%d, n=%d, descrA=%p, A=%p, lda=%d, nnzPerRowCol=%p, nnzTotalDevHostPtr=%p)\n", + handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, nnzTotalDevHostPtr); + + // Declare native variables + cusparseHandle_t handle_native; + cusparseDirection_t dirA_native; + int m_native = 0; + int n_native = 0; + cusparseMatDescr_t descrA_native; + double * A_native = nullptr; + int lda_native = 0; + int * nnzPerRowCol_native = nullptr; + int * nnzTotalDevHostPtr_native = nullptr; + + // Copy Java inputs into native locals + handle_native = (cusparseHandle_t)getNativePointerValue(env, handle); + dirA_native = (cusparseDirection_t)dirA; + m_native = (int)m; + n_native = (int)n; + descrA_native = (cusparseMatDescr_t)getNativePointerValue(env, descrA); + A_native = (double *)getPointer(env, A); + lda_native = (int)lda; + nnzPerRowCol_native = (int *)getPointer(env, nnzPerRowCol); + PointerData *nnzTotalDevHostPtr_pointerData = initPointerData(env, nnzTotalDevHostPtr); + if (nnzTotalDevHostPtr_pointerData == nullptr) { + return CUJAVA_CUSPARSE_INTERNAL_ERROR; + } + nnzTotalDevHostPtr_native = (int *)nnzTotalDevHostPtr_pointerData->getPointer(env); + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseDnnz(handle_native, dirA_native, m_native, n_native, descrA_native, A_native, + lda_native, nnzPerRowCol_native, nnzTotalDevHostPtr_native); + + if (!isPointerBackedByNativeMemory(env, nnzTotalDevHostPtr)) { + cudaDeviceSynchronize(); + } + if (!releasePointerData(env, nnzTotalDevHostPtr_pointerData, 0)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSetMatTypeNative + (JNIEnv *env, jclass cls, jobject descrA, jint type) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, descrA, "descrA", "cusparseSetMatType"); + + Logger::log(LOG_TRACE, "Executing cusparseSetMatType(descrA=%p, type=%d)\n", descrA, type); + + // Declare native variables + cusparseMatDescr_t descrA_native; + cusparseMatrixType_t type_native; + + // Copy Java inputs into native locals + descrA_native = (cusparseMatDescr_t)getNativePointerValue(env, descrA); + type_native = (cusparseMatrixType_t)type; + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseSetMatType(descrA_native, type_native); + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSetMatIndexBaseNative + (JNIEnv *env, jclass cls, jobject descrA, jint base) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, descrA, "descrA", "cusparseSetMatIndexBase"); + + Logger::log(LOG_TRACE, "Executing cusparseSetMatIndexBase(descrA=%p, base=%d)\n", descrA, base); + + // Declare native variables + cusparseMatDescr_t descrA_native; + cusparseIndexBase_t base_native; + + // Copy Java inputs into native locals + descrA_native = (cusparseMatDescr_t)getNativePointerValue(env, descrA); + base_native = (cusparseIndexBase_t)base; + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseSetMatIndexBase(descrA_native, base_native); + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSetPointerModeNative + (JNIEnv *env, jclass cls, jobject handle, jint mode) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseSetPointerMode"); + + Logger::log(LOG_TRACE, "Executing cusparseSetPointerMode(handle=%p, mode=%d)\n", handle, mode); + + // Declare native variables + cusparseHandle_t handle_native; + cusparsePointerMode_t mode_native; + + // Copy Java inputs into native locals + handle_native = (cusparseHandle_t)getNativePointerValue(env, handle); + mode_native = (cusparsePointerMode_t)mode; + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseSetPointerMode(handle_native, mode_native); + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseXcsrgeam2NnzNative + (JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jobject descrA, jint nnzA, jobject csrSortedRowPtrA, jobject csrSortedColIndA, + jobject descrB, jint nnzB, jobject csrSortedRowPtrB, jobject csrSortedColIndB, jobject descrC, jobject csrSortedRowPtrC, jobject nnzTotalDevHostPtr, jobject workspace) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseXcsrgeam2Nnz"); + CUJAVA_REQUIRE_NONNULL(env, descrA, "descrA", "cusparseXcsrgeam2Nnz"); + CUJAVA_REQUIRE_NONNULL(env, csrSortedRowPtrA, "csrSortedRowPtrA", "cusparseXcsrgeam2Nnz"); + CUJAVA_REQUIRE_NONNULL(env, csrSortedColIndA, "csrSortedColIndA", "cusparseXcsrgeam2Nnz"); + CUJAVA_REQUIRE_NONNULL(env, descrB, "descrB", "cusparseXcsrgeam2Nnz"); + CUJAVA_REQUIRE_NONNULL(env, csrSortedRowPtrB, "csrSortedRowPtrB", "cusparseXcsrgeam2Nnz"); + CUJAVA_REQUIRE_NONNULL(env, csrSortedColIndB, "csrSortedColIndB", "cusparseXcsrgeam2Nnz"); + CUJAVA_REQUIRE_NONNULL(env, descrC, "descrC", "cusparseXcsrgeam2Nnz"); + CUJAVA_REQUIRE_NONNULL(env, csrSortedRowPtrC, "csrSortedRowPtrC", "cusparseXcsrgeam2Nnz"); + CUJAVA_REQUIRE_NONNULL(env, nnzTotalDevHostPtr, "nnzTotalDevHostPtr", "cusparseXcsrgeam2Nnz"); + CUJAVA_REQUIRE_NONNULL(env, workspace, "workspace", "cusparseXcsrgeam2Nnz"); + + // Log message + Logger::log(LOG_TRACE, "Executing cusparseXcsrgeam2Nnz(handle=%p, m=%d, n=%d, descrA=%p, nnzA=%d, csrSortedRowPtrA=%p, csrSortedColIndA=%p, descrB=%p, nnzB=%d, csrSortedRowPtrB=%p, csrSortedColIndB=%p, descrC=%p, csrSortedRowPtrC=%p, nnzTotalDevHostPtr=%p, workspace=%p)\n", + handle, m, n, descrA, nnzA, csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, csrSortedColIndB, descrC, csrSortedRowPtrC, nnzTotalDevHostPtr, workspace); + + // Declare native variables + cusparseHandle_t handle_native; + int m_native = 0; + int n_native = 0; + cusparseMatDescr_t descrA_native; + int nnzA_native = 0; + int * csrSortedRowPtrA_native = nullptr; + int * csrSortedColIndA_native = nullptr; + cusparseMatDescr_t descrB_native; + int nnzB_native = 0; + int * csrSortedRowPtrB_native = nullptr; + int * csrSortedColIndB_native = nullptr; + cusparseMatDescr_t descrC_native; + int * csrSortedRowPtrC_native = nullptr; + int * nnzTotalDevHostPtr_native = nullptr; + void * workspace_native = nullptr; + + // Copy Java inputs into native locals + handle_native = (cusparseHandle_t)getNativePointerValue(env, handle); + m_native = (int)m; + n_native = (int)n; + descrA_native = (cusparseMatDescr_t)getNativePointerValue(env, descrA); + nnzA_native = (int)nnzA; + csrSortedRowPtrA_native = (int *)getPointer(env, csrSortedRowPtrA); + csrSortedColIndA_native = (int *)getPointer(env, csrSortedColIndA); + descrB_native = (cusparseMatDescr_t)getNativePointerValue(env, descrB); + nnzB_native = (int)nnzB; + csrSortedRowPtrB_native = (int *)getPointer(env, csrSortedRowPtrB); + csrSortedColIndB_native = (int *)getPointer(env, csrSortedColIndB); + descrC_native = (cusparseMatDescr_t)getNativePointerValue(env, descrC); + csrSortedRowPtrC_native = (int *)getPointer(env, csrSortedRowPtrC); + PointerData *nnzTotalDevHostPtr_pointerData = initPointerData(env, nnzTotalDevHostPtr); + + if (nnzTotalDevHostPtr_pointerData == nullptr) { + return CUJAVA_CUSPARSE_INTERNAL_ERROR; + } + nnzTotalDevHostPtr_native = (int *)nnzTotalDevHostPtr_pointerData->getPointer(env); + workspace_native = (void *)getPointer(env, workspace); + + cusparseStatus_t jniResult_native = cusparseXcsrgeam2Nnz(handle_native, m_native, n_native, descrA_native, nnzA_native, + csrSortedRowPtrA_native, csrSortedColIndA_native, descrB_native, nnzB_native, csrSortedRowPtrB_native, csrSortedColIndB_native, + descrC_native, csrSortedRowPtrC_native, nnzTotalDevHostPtr_native, workspace_native); + + if (!isPointerBackedByNativeMemory(env, nnzTotalDevHostPtr)) { + cudaDeviceSynchronize(); + } + if (!releasePointerData(env, nnzTotalDevHostPtr_pointerData, 0)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpGEMM_1workEstimationNative + (JNIEnv *env, jclass cls, jobject handle, jint opA, jint opB, jobject alpha, jobject matA, jobject matB, jobject beta, + jobject matC, jint computeType, jint alg, jobject spgemmDescr, jlongArray bufferSize1, jobject externalBuffer1) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseSpGEMM_workEstimation"); + CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cusparseSpGEMM_workEstimation"); + CUJAVA_REQUIRE_NONNULL(env, matA, "matA", "cusparseSpGEMM_workEstimation"); + CUJAVA_REQUIRE_NONNULL(env, matB, "matB", "cusparseSpGEMM_workEstimation"); + CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cusparseSpGEMM_workEstimation"); + CUJAVA_REQUIRE_NONNULL(env, matC, "matC", "cusparseSpGEMM_workEstimation"); + CUJAVA_REQUIRE_NONNULL(env, spgemmDescr, "spgemmDescr", "cusparseSpGEMM_workEstimation"); + CUJAVA_REQUIRE_NONNULL(env, bufferSize1, "bufferSize1", "cusparseSpGEMM_workEstimation"); + + Logger::log(LOG_TRACE, "Executing cusparseSpGEMM_workEstimation(handle=%p, opA=%d, opB=%d, alpha=%p, matA=%p, matB=%p, beta=%p, matC=%p, computeType=%d, alg=%d, spgemmDescr=%p, bufferSize1=%p, externalBuffer1=%p)\n", + handle, opA, opB, alpha, matA, matB, beta, matC, computeType, alg, spgemmDescr, bufferSize1, externalBuffer1); + + // Declare native variables + cusparseHandle_t handle_native; + cusparseOperation_t opA_native; + cusparseOperation_t opB_native; + void * alpha_native = nullptr; + cusparseConstSpMatDescr_t matA_native; + cusparseConstSpMatDescr_t matB_native; + void * beta_native = nullptr; + cusparseSpMatDescr_t matC_native; + cudaDataType computeType_native; + cusparseSpGEMMAlg_t alg_native; + cusparseSpGEMMDescr_t spgemmDescr_native; + size_t * bufferSize1_native = nullptr; + void * externalBuffer1_native = nullptr; + + // Copy Java inputs into native locals + handle_native = (cusparseHandle_t)getNativePointerValue(env, handle); + opA_native = (cusparseOperation_t)opA; + opB_native = (cusparseOperation_t)opB; + PointerData *alpha_pointerData = initPointerData(env, alpha); + if (alpha_pointerData == nullptr) { + return CUJAVA_CUSPARSE_INTERNAL_ERROR; + } + alpha_native = (void *)alpha_pointerData->getPointer(env); + matA_native = (cusparseConstSpMatDescr_t)getNativePointerValue(env, matA); + matB_native = (cusparseConstSpMatDescr_t)getNativePointerValue(env, matB); + PointerData *beta_pointerData = initPointerData(env, beta); + if (beta_pointerData == nullptr) { + return CUJAVA_CUSPARSE_INTERNAL_ERROR; + } + beta_native = (void *)beta_pointerData->getPointer(env); + matC_native = (cusparseSpMatDescr_t)getNativePointerValue(env, matC); + computeType_native = (cudaDataType)computeType; + alg_native = (cusparseSpGEMMAlg_t)alg; + spgemmDescr_native = (cusparseSpGEMMDescr_t)getNativePointerValue(env, spgemmDescr); + if (!initNative(env, bufferSize1, bufferSize1_native, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + externalBuffer1_native = (void *)getPointer(env, externalBuffer1); + + cusparseStatus_t jniResult_native = cusparseSpGEMM_workEstimation(handle_native, opA_native, opB_native, alpha_native, + matA_native, matB_native, beta_native, matC_native, computeType_native, alg_native, spgemmDescr_native, bufferSize1_native, externalBuffer1_native); + + if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + if (!releasePointerData(env, beta_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + if (!releaseNative(env, bufferSize1_native, bufferSize1, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpGEMM_1computeNative + (JNIEnv *env, jclass cls, jobject handle, jint opA, jint opB, jobject alpha, jobject matA, jobject matB, jobject beta, + jobject matC, jint computeType, jint alg, jobject spgemmDescr, jlongArray bufferSize2, jobject externalBuffer2) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseSpGEMM_compute"); + CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cusparseSpGEMM_compute"); + CUJAVA_REQUIRE_NONNULL(env, matA, "matA", "cusparseSpGEMM_compute"); + CUJAVA_REQUIRE_NONNULL(env, matB, "matB", "cusparseSpGEMM_compute"); + CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cusparseSpGEMM_compute"); + CUJAVA_REQUIRE_NONNULL(env, matC, "matC", "cusparseSpGEMM_compute"); + CUJAVA_REQUIRE_NONNULL(env, spgemmDescr, "spgemmDescr", "cusparseSpGEMM_compute"); + CUJAVA_REQUIRE_NONNULL(env, bufferSize2, "bufferSize2", "cusparseSpGEMM_compute"); + + Logger::log(LOG_TRACE, "Executing cusparseSpGEMM_compute(handle=%p, opA=%d, opB=%d, alpha=%p, matA=%p, matB=%p, beta=%p, matC=%p, computeType=%d, alg=%d, spgemmDescr=%p, bufferSize2=%p, externalBuffer2=%p)\n", + handle, opA, opB, alpha, matA, matB, beta, matC, computeType, alg, spgemmDescr, bufferSize2, externalBuffer2); + + // Declare native variables + cusparseHandle_t handle_native; + cusparseOperation_t opA_native; + cusparseOperation_t opB_native; + void * alpha_native = nullptr; + cusparseConstSpMatDescr_t matA_native; + cusparseConstSpMatDescr_t matB_native; + void * beta_native = nullptr; + cusparseSpMatDescr_t matC_native; + cudaDataType computeType_native; + cusparseSpGEMMAlg_t alg_native; + cusparseSpGEMMDescr_t spgemmDescr_native; + size_t * bufferSize2_native = nullptr; + void * externalBuffer2_native = nullptr; + + // Copy Java inputs into native locals + handle_native = (cusparseHandle_t)getNativePointerValue(env, handle); + opA_native = (cusparseOperation_t)opA; + opB_native = (cusparseOperation_t)opB; + PointerData *alpha_pointerData = initPointerData(env, alpha); + if (alpha_pointerData == nullptr) { + return CUJAVA_CUSPARSE_INTERNAL_ERROR; + } + alpha_native = (void *)alpha_pointerData->getPointer(env); + matA_native = (cusparseConstSpMatDescr_t)getNativePointerValue(env, matA); + matB_native = (cusparseConstSpMatDescr_t)getNativePointerValue(env, matB); + PointerData *beta_pointerData = initPointerData(env, beta); + if (beta_pointerData == nullptr) { + return CUJAVA_CUSPARSE_INTERNAL_ERROR; + } + beta_native = (void *)beta_pointerData->getPointer(env); + matC_native = (cusparseSpMatDescr_t)getNativePointerValue(env, matC); + computeType_native = (cudaDataType)computeType; + alg_native = (cusparseSpGEMMAlg_t)alg; + spgemmDescr_native = (cusparseSpGEMMDescr_t)getNativePointerValue(env, spgemmDescr); + if (!initNative(env, bufferSize2, bufferSize2_native, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + externalBuffer2_native = (void *)getPointer(env, externalBuffer2); + + cusparseStatus_t jniResult_native = cusparseSpGEMM_compute(handle_native, opA_native, opB_native, alpha_native, matA_native, + matB_native, beta_native, matC_native, computeType_native, alg_native, spgemmDescr_native, bufferSize2_native, externalBuffer2_native); + + if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + if (!releasePointerData(env, beta_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + if (!releaseNative(env, bufferSize2_native, bufferSize2, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpMatGetSizeNative + (JNIEnv *env, jclass cls, jobject spMatDescr, jlongArray rows, jlongArray cols, jlongArray nnz) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, spMatDescr, "spMatDescr", "cusparseSpMatGetSize"); + CUJAVA_REQUIRE_NONNULL(env, rows, "rows", "cusparseSpMatGetSize"); + CUJAVA_REQUIRE_NONNULL(env, cols, "cols", "cusparseSpMatGetSize"); + CUJAVA_REQUIRE_NONNULL(env, nnz, "nnz", "cusparseSpMatGetSize"); + + Logger::log(LOG_TRACE, "Executing cusparseSpMatGetSize(spMatDescr=%p, rows=%p, cols=%p, nnz=%p)\n", spMatDescr, rows, cols, nnz); + + // Declare native variables + cusparseConstSpMatDescr_t spMatDescr_native; + int64_t rows_native; + int64_t cols_native; + int64_t nnz_native; + + // Copy Java inputs into native locals + spMatDescr_native = (cusparseConstSpMatDescr_t)getNativePointerValue(env, spMatDescr); + + cusparseStatus_t jniResult_native = cusparseSpMatGetSize(spMatDescr_native, &rows_native, &cols_native, &nnz_native); + + if (!set(env, rows, 0, (jlong)rows_native)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + if (!set(env, cols, 0, (jlong)cols_native)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + if (!set(env, nnz, 0, (jlong)nnz_native)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + + // Return the result + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseXcsrsortNative +(JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jint nnz, jobject descrA, jobject csrRowPtrA, jobject csrColIndA, jobject P, jobject pBuffer) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseXcsrsort"); + CUJAVA_REQUIRE_NONNULL(env, descrA, "descrA", "cusparseXcsrsort"); + CUJAVA_REQUIRE_NONNULL(env, csrRowPtrA, "csrRowPtrA", "cusparseXcsrsort"); + CUJAVA_REQUIRE_NONNULL(env, csrColIndA, "csrColIndA", "cusparseXcsrsort"); + CUJAVA_REQUIRE_NONNULL(env, P, "P", "cusparseXcsrsort"); + CUJAVA_REQUIRE_NONNULL(env, pBuffer, "pBuffer", "cusparseXcsrsort"); + + Logger::log(LOG_TRACE, "Executing cusparseXcsrsort(handle=%p, m=%d, n=%d, nnz=%d, descrA=%p, csrRowPtrA=%p, csrColIndA=%p, P=%p, pBuffer=%p)\n", + handle, m, n, nnz, descrA, csrRowPtrA, csrColIndA, P, pBuffer); + + // Declare native variables + cusparseHandle_t handle_native; + int m_native = 0; + int n_native = 0; + int nnz_native = 0; + cusparseMatDescr_t descrA_native; + int * csrRowPtrA_native = nullptr; + int * csrColIndA_native = nullptr; + int * P_native = nullptr; + void * pBuffer_native = nullptr; + + // Copy Java inputs into native locals + handle_native = (cusparseHandle_t)getNativePointerValue(env, handle); + m_native = (int)m; + n_native = (int)n; + nnz_native = (int)nnz; + descrA_native = (cusparseMatDescr_t)getNativePointerValue(env, descrA); + csrRowPtrA_native = (int *)getPointer(env, csrRowPtrA); + csrColIndA_native = (int *)getPointer(env, csrColIndA); + P_native = (int *)getPointer(env, P); + pBuffer_native = (void *)getPointer(env, pBuffer); + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseXcsrsort(handle_native, m_native, n_native, nnz_native, descrA_native, + csrRowPtrA_native, csrColIndA_native, P_native, pBuffer_native); + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseXcsrsort_1bufferSizeExtNative +(JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jint nnz, jobject csrRowPtrA, jobject csrColIndA, jlongArray pBufferSizeInBytes) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseXcsrsort_bufferSizeExt"); + CUJAVA_REQUIRE_NONNULL(env, csrRowPtrA, "csrRowPtrA", "cusparseXcsrsort_bufferSizeExt"); + CUJAVA_REQUIRE_NONNULL(env, csrColIndA, "csrColIndA", "cusparseXcsrsort_bufferSizeExt"); + CUJAVA_REQUIRE_NONNULL(env, pBufferSizeInBytes, "pBufferSizeInBytes", "cusparseXcsrsort_bufferSizeExt"); + + Logger::log(LOG_TRACE, "Executing cusparseXcsrsort_bufferSizeExt(handle=%p, m=%d, n=%d, nnz=%d, csrRowPtrA=%p, csrColIndA=%p, pBufferSizeInBytes=%p)\n", + handle, m, n, nnz, csrRowPtrA, csrColIndA, pBufferSizeInBytes); + + // Declare native variables + cusparseHandle_t handle_native; + int m_native = 0; + int n_native = 0; + int nnz_native = 0; + int * csrRowPtrA_native = nullptr; + int * csrColIndA_native = nullptr; + size_t * pBufferSizeInBytes_native = nullptr; + + // Copy Java inputs into native locals + handle_native = (cusparseHandle_t)getNativePointerValue(env, handle); + m_native = (int)m; + n_native = (int)n; + nnz_native = (int)nnz; + csrRowPtrA_native = (int *)getPointer(env, csrRowPtrA); + csrColIndA_native = (int *)getPointer(env, csrColIndA); + if (!initNative(env, pBufferSizeInBytes, pBufferSizeInBytes_native, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseXcsrsort_bufferSizeExt(handle_native, m_native, n_native, nnz_native, + csrRowPtrA_native, csrColIndA_native, pBufferSizeInBytes_native); + + if (!releaseNative(env, pBufferSizeInBytes_native, pBufferSizeInBytes, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR; + + // Return the result + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCreateNative(JNIEnv *env, jclass cls, jobject handle) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseCreate"); + + Logger::log(LOG_TRACE, "Executing cusparseCreate(handle=%p)\n", handle); + + // Declare native variables + cusparseHandle_t handle_native; + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseCreate(&handle_native); + setNativePointerValue(env, handle, (jlong)handle_native); + + jint jniResult = (jint)jniResult_native; + return jniResult; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCreateIdentityPermutationNative + (JNIEnv *env, jclass cls, jobject handle, jint n, jobject p) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseCreateIdentityPermutation"); + CUJAVA_REQUIRE_NONNULL(env, p, "p", "cusparseCreateIdentityPermutation"); + + Logger::log(LOG_TRACE, "Executing cusparseCreateIdentityPermutation(handle=%p, n=%d, p=%p)\n", handle, n, p); + + // Declare native variables + cusparseHandle_t handle_native; + int n_native = 0; + int * p_native = nullptr; + + // Copy Java inputs into native locals + handle_native = (cusparseHandle_t)getNativePointerValue(env, handle); + n_native = (int)n; + p_native = (int *)getPointer(env, p); + + // Cusparse API call + cusparseStatus_t jniResult_native = cusparseCreateIdentityPermutation(handle_native, n_native, p_native); + + jint jniResult = (jint)jniResult_native; + return jniResult; +} diff --git a/src/main/cpp/jni/cusparse/cujava_cusparse.hpp b/src/main/cpp/jni/cusparse/cujava_cusparse.hpp new file mode 100644 index 00000000000..5c3d1ee2c20 --- /dev/null +++ b/src/main/cpp/jni/cusparse/cujava_cusparse.hpp @@ -0,0 +1,226 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +#include + +#ifndef _Included_org_apache_sysds_cujava_cusparse_CuJavaCusparse +#define _Included_org_apache_sysds_cujava_cusparse_CuJavaCusparse +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Class: org.apache.sysds.cujava.cusparse.CuJavaCusparse + * Methods: + * - cusparseSpGEMM_copyNative + * - cusparseGetMatIndexBase + * - cusparseCreateCsr + * - cusparseCreateDnVec + * - cusparseSpMV_bufferSize + * - cusparseSpMV + * - cusparseDestroy + * - cusparseDestroyDnVec + * - cusparseDestroyDnMat + * - cusparseDestroySpMat + * - cusparseSpMM + * - cusparseSpMM_bufferSize + * - cusparseCreateDnMat + * - cusparseCsrSetPointers + * - cusparseCsr2cscEx2 + * - cusparseCsr2cscEx2_bufferSize + * - cusparseDcsrgeam2 + * - cusparseDcsrgeam2_bufferSizeEx + * - cusparseSparseToDense + * - cusparseSparseToDense_bufferSize + * - cusparseDenseToSparse_bufferSize + * - cusparseDenseToSparse_analysis + * - cusparseDenseToSparse_convert + * - cusparseDnnz + * - cusparseSetMatType + * - cusparseSetMatIndexBase + * - cusparseSetPointerMode + * - cusparseXcsrgeam2Nnz + * - cusparseSpGEMM_workEstimation + * - cusparseSpGEMM_compute + * - cusparseSpMatGetSize + * - cusparseXcsrsort + * - cusparseXcsrsort_bufferSizeExt + * - cusparseCreate + * - cusparseCreateIdentityPermutation + */ + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpGEMM_1copyNative + (JNIEnv *env, jclass cls, jobject handle, jint opA, jint opB, + jobject alpha, jobject matA, jobject matB, jobject beta, jobject matC, + jint computeType, jint alg, jobject spgemmDescr); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseGetMatIndexBaseNative + (JNIEnv *env, jclass cls, jobject descrA); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCreateCsrNative + (JNIEnv *env, jclass cls, jobject spMatDescr, jlong rows, jlong cols, jlong nnz, jobject csrRowOffsets, + jobject csrColInd, jobject csrValues, jint csrRowOffsetsType, jint csrColIndType, jint idxBase, jint valueType); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCreateDnVecNative + (JNIEnv *env, jclass cls, jobject dnVecDescr, jlong size, jobject values, jint valueType); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpMV_1bufferSizeNative + (JNIEnv *env, jclass cls, jobject handle, jint opA, jobject alpha, jobject matA, jobject vecX, jobject beta, + jobject vecY, jint computeType, jint alg, jlongArray bufferSize); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpMVNative + (JNIEnv *env, jclass cls, jobject handle, jint opA, jobject alpha, jobject matA, jobject vecX, jobject beta, + jobject vecY, jint computeType, jint alg, jobject externalBuffer); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDestroyNative + (JNIEnv *env, jclass cls, jobject handle); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDestroyDnVecNative + (JNIEnv *env, jclass cls, jobject dnVecDescr); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDestroyDnMatNative + (JNIEnv *env, jclass cls, jobject dnMatDescr); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDestroySpMatNative + (JNIEnv *env, jclass cls, jobject spMatDescr); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpMMNative + (JNIEnv *env, jclass cls, jobject handle, jint opA, jint opB, jobject alpha, jobject matA, jobject matB, jobject beta, + jobject matC, jint computeType, jint alg, jobject externalBuffer); + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpMM_1bufferSizeNative + (JNIEnv *env, jclass cls, jobject handle, jint opA, jint opB, jobject alpha, jobject matA, jobject matB, jobject beta, + jobject matC, jint computeType, jint alg, jlongArray bufferSize); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCreateDnMatNative + (JNIEnv *env, jclass cls, jobject dnMatDescr, jlong rows, jlong cols, jlong ld, jobject values, jint valueType, jint order); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCsrSetPointersNative + (JNIEnv *env, jclass cls, jobject spMatDescr, jobject csrRowOffsets, jobject csrColInd, jobject csrValues); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCsr2cscEx2Native + (JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jint nnz, jobject csrVal, jobject csrRowPtr, + jobject csrColInd, jobject cscVal, jobject cscColPtr, jobject cscRowInd, jint valType, jint copyValues, jint idxBase, jint alg, jobject buffer); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCsr2cscEx2_1bufferSizeNative + (JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jint nnz, jobject csrVal, jobject csrRowPtr, jobject csrColInd, + jobject cscVal, jobject cscColPtr, jobject cscRowInd, jint valType, jint copyValues, jint idxBase, jint alg, jlongArray bufferSize); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDcsrgeam2Native + (JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jobject alpha, jobject descrA, jint nnzA, jobject csrSortedValA, + jobject csrSortedRowPtrA, jobject csrSortedColIndA, jobject beta, jobject descrB, jint nnzB, jobject csrSortedValB, + jobject csrSortedRowPtrB, jobject csrSortedColIndB, jobject descrC, jobject csrSortedValC, jobject csrSortedRowPtrC, jobject csrSortedColIndC, jobject pBuffer); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDcsrgeam2_1bufferSizeExtNative + (JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jobject alpha, jobject descrA, jint nnzA, jobject csrSortedValA, + jobject csrSortedRowPtrA, jobject csrSortedColIndA, jobject beta, jobject descrB, jint nnzB, jobject csrSortedValB, jobject csrSortedRowPtrB, + jobject csrSortedColIndB, jobject descrC, jobject csrSortedValC, jobject csrSortedRowPtrC, jobject csrSortedColIndC, jlongArray pBufferSizeInBytes); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSparseToDenseNative + (JNIEnv *env, jclass cls, jobject handle, jobject matA, jobject matB, jint alg, jobject externalBuffer); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSparseToDense_1bufferSizeNative + (JNIEnv *env, jclass cls, jobject handle, jobject matA, jobject matB, jint alg, jlongArray bufferSize); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDenseToSparse_1bufferSizeNative + (JNIEnv *env, jclass cls, jobject handle, jobject matA, jobject matB, jint alg, jlongArray bufferSize); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDenseToSparse_1analysisNative + (JNIEnv *env, jclass cls, jobject handle, jobject matA, jobject matB, jint alg, jobject externalBuffer); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDenseToSparse_1convertNative + (JNIEnv *env, jclass cls, jobject handle, jobject matA, jobject matB, jint alg, jobject externalBuffer); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDnnzNative + (JNIEnv *env, jclass cls, jobject handle, jint dirA, jint m, jint n, jobject descrA, jobject A, jint lda, jobject nnzPerRowCol, jobject nnzTotalDevHostPtr); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSetMatTypeNative + (JNIEnv *env, jclass cls, jobject descrA, jint type); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSetMatIndexBaseNative + (JNIEnv *env, jclass cls, jobject descrA, jint base); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSetPointerModeNative + (JNIEnv *env, jclass cls, jobject handle, jint mode); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseXcsrgeam2NnzNative + (JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jobject descrA, jint nnzA, jobject csrSortedRowPtrA, jobject csrSortedColIndA, + jobject descrB, jint nnzB, jobject csrSortedRowPtrB, jobject csrSortedColIndB, jobject descrC, jobject csrSortedRowPtrC, jobject nnzTotalDevHostPtr, jobject workspace); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpGEMM_1workEstimationNative + (JNIEnv *env, jclass cls, jobject handle, jint opA, jint opB, jobject alpha, jobject matA, jobject matB, jobject beta, + jobject matC, jint computeType, jint alg, jobject spgemmDescr, jlongArray bufferSize1, jobject externalBuffer1); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpGEMM_1computeNative + (JNIEnv *env, jclass cls, jobject handle, jint opA, jint opB, jobject alpha, jobject matA, jobject matB, jobject beta, + jobject matC, jint computeType, jint alg, jobject spgemmDescr, jlongArray bufferSize2, jobject externalBuffer2); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpMatGetSizeNative + (JNIEnv *env, jclass cls, jobject spMatDescr, jlongArray rows, jlongArray cols, jlongArray nnz); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseXcsrsortNative +(JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jint nnz, jobject descrA, jobject csrRowPtrA, jobject csrColIndA, jobject P, jobject pBuffer); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseXcsrsort_1bufferSizeExtNative +(JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jint nnz, jobject csrRowPtrA, jobject csrColIndA, jlongArray pBufferSizeInBytes); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCreateNative(JNIEnv *env, jclass cls, jobject handle); + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCreateIdentityPermutationNative + (JNIEnv *env, jclass cls, jobject handle, jint n, jobject p); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/main/cpp/jni/cusparse/cujava_cusparse_common.hpp b/src/main/cpp/jni/cusparse/cujava_cusparse_common.hpp new file mode 100644 index 00000000000..85ecfd9346c --- /dev/null +++ b/src/main/cpp/jni/cusparse/cujava_cusparse_common.hpp @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +#ifndef CUJAVA_CUSPARSE_COMMON_HPP +#define CUJAVA_CUSPARSE_COMMON_HPP + +#include +#include +#include +#include + +#include "../common/cujava_logger.hpp" +#include "../common/cujava_jni_utils.hpp" +#include "../common/cujava_pointer_utils.hpp" + +#define CUJAVA_CUSPARSE_INTERNAL_ERROR (-1) + + + +#endif // CUJAVA_CUSPARSE_COMMON_HPP diff --git a/src/main/cpp/jni/driver/CMakeLists.txt b/src/main/cpp/jni/driver/CMakeLists.txt new file mode 100644 index 00000000000..9825ade2736 --- /dev/null +++ b/src/main/cpp/jni/driver/CMakeLists.txt @@ -0,0 +1,54 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +cmake_minimum_required(VERSION 3.18) + +project(CuJavaDriver LANGUAGES CXX) + +find_package(JNI REQUIRED) +find_package(CUDAToolkit REQUIRED) # for CUDA::cuda_driver + +add_library(CuJavaDriver SHARED + cujava_driver.cpp +) + +set_target_properties(CuJavaDriver PROPERTIES + CXX_STANDARD 11 + OUTPUT_NAME cujava_driver # -> libcujava_driver.so + LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../../lib + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../../lib + ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../../lib +) + +target_include_directories(CuJavaDriver + PRIVATE + ${JNI_INCLUDE_DIRS} + ${CUDAToolkit_INCLUDE_DIRS} + ${CMAKE_CURRENT_SOURCE_DIR} # headers in driver/ + ${CMAKE_CURRENT_SOURCE_DIR}/../common # if including common headers +) + +target_link_libraries(CuJavaRuntime + PRIVATE + CuJavaCommonJNI + CUDA::cuda_driver # driver API (libcuda) + ${JNI_LIBRARIES} +) diff --git a/src/main/cpp/jni/driver/cujava_driver.cpp b/src/main/cpp/jni/driver/cujava_driver.cpp new file mode 100644 index 00000000000..2d3d9e9afb0 --- /dev/null +++ b/src/main/cpp/jni/driver/cujava_driver.cpp @@ -0,0 +1,340 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "cujava_driver.hpp" +#include "cujava_driver_common.hpp" + +#define CUJAVA_REQUIRE_NONNULL(env, obj, name, method) \ + do { \ + if ((obj) == nullptr) { \ + ThrowByName((env), "java/lang/NullPointerException", \ + "Parameter '" name "' is null for " method); \ + return CUJAVA_INTERNAL_ERROR; \ + } \ + } while (0) + + + +JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *jvm, void *reserved) { + JNIEnv *env = nullptr; + if (jvm->GetEnv((void **)&env, JNI_VERSION_1_4)) { + return JNI_ERR; + } + + // Only what we need so far + if (initJNIUtils(env) == JNI_ERR) return JNI_ERR; + if (initPointerUtils(env) == JNI_ERR) return JNI_ERR; + + return JNI_VERSION_1_4; +} + + + +JNIEXPORT void JNICALL JNI_OnUnload(JavaVM *vm, void *reserved) { +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuCtxCreateNative + (JNIEnv *env, jclass cls, jobject pctx, jint flags, jobject dev) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, pctx, "pctx", "cuCtxCreate"); + CUJAVA_REQUIRE_NONNULL(env, dev, "dev", "cuCtxCreate"); + + Logger::log(LOG_TRACE, "Executing cuCtxCreate\n"); + + CUdevice nativeDev = (CUdevice)(intptr_t)getNativePointerValue(env, dev); + CUcontext nativePctx; + int result = cuCtxCreate(&nativePctx, (int)flags, nativeDev); + setNativePointerValue(env, pctx, (jlong)nativePctx); + + return result; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuDeviceGetNative + (JNIEnv *env, jclass cls, jobject device, jint ordinal) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, device, "device", "cuDeviceGet"); + + Logger::log(LOG_TRACE, "Executing cuDeviceGet for device %ld\n", ordinal); + + CUdevice nativeDevice; + int result = cuDeviceGet(&nativeDevice, ordinal); + setNativePointerValue(env, device, (jlong)nativeDevice); + return result; +} + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuDeviceGetCountNative + (JNIEnv *env, jclass cls, jintArray count) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, count, "count", "cuDeviceGetCount"); + + Logger::log(LOG_TRACE, "Executing cuDeviceGetCount\n"); + + int nativeCount = 0; + int result = cuDeviceGetCount(&nativeCount); + if (!set(env, count, 0, nativeCount)) return CUJAVA_INTERNAL_ERROR; + return result; +} + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuInitNative + (JNIEnv *env, jclass cls, jint flags) { + Logger::log(LOG_TRACE, "Executing cuInit\n"); + + int result = cuInit((unsigned int)flags); + return result; +} + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuLaunchKernelNative + (JNIEnv *env, jclass, jobject f, jint gridDimX, jint gridDimY, jint gridDimZ, + jint blockDimX, jint blockDimY, jint blockDimZ, jint sharedMemBytes, + jobject hStream, jobject kernelParams, jobject extra) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, f, "f", "cuLaunchKernel"); + + Logger::log(LOG_TRACE, "Executing cuLaunchKernel\n"); + + CUfunction nativeF = (CUfunction)getNativePointerValue(env, f); + CUstream nativeHStr = (CUstream) getNativePointerValue(env, hStream); + + PointerData *kernelParamsPD = nullptr; + void **nativeKernelParams = nullptr; + if (kernelParams != nullptr) { + kernelParamsPD = initPointerData(env, kernelParams); + if (kernelParamsPD == nullptr) return CUJAVA_INTERNAL_ERROR; + nativeKernelParams = (void**)kernelParamsPD->getPointer(env); + } + + PointerData *extraPD = nullptr; + void **nativeExtra = nullptr; + if (extra != nullptr) { + extraPD = initPointerData(env, extra); + if (extraPD == nullptr) return CUJAVA_INTERNAL_ERROR; + nativeExtra = (void**)extraPD->getPointer(env); + } + + int result = cuLaunchKernel( + nativeF, + (unsigned int)gridDimX, (unsigned int)gridDimY, (unsigned int)gridDimZ, + (unsigned int)blockDimX, (unsigned int)blockDimY, (unsigned int)blockDimZ, + (unsigned int)sharedMemBytes, + nativeHStr, + nativeKernelParams, + nativeExtra); + + if (!releasePointerData(env, kernelParamsPD, 0)) return CUJAVA_INTERNAL_ERROR; + if (!releasePointerData(env, extraPD, 0)) return CUJAVA_INTERNAL_ERROR; + + return result; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuModuleGetFunctionNative + (JNIEnv *env, jclass, jobject hfunc, jobject hmod, jstring name) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, hfunc, "hfunc", "cuModuleGetFunction"); + CUJAVA_REQUIRE_NONNULL(env, hmod, "hmod", "cuModuleGetFunction"); + CUJAVA_REQUIRE_NONNULL(env, name, "name", "cuModuleGetFunction"); + + Logger::log(LOG_TRACE, "Executing cuModuleGetFunction\n"); + + CUmodule nativeHmod = (CUmodule)getNativePointerValue(env, hmod); + char* nativeName = toNativeCString(env, name); + if (!nativeName) return CUJAVA_INTERNAL_ERROR; + + CUfunction nativeHfunc = nullptr; + int result = cuModuleGetFunction(&nativeHfunc, nativeHmod, nativeName); + + delete[] nativeName; + setNativePointerValue(env, hfunc, (jlong)nativeHfunc); + return result; +} + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuModuleLoadDataExNative + (JNIEnv *env, jclass, jobject phMod, jobject p, jint numOptions, jintArray options, jobject optionValues) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, phMod, "phMod", "cuModuleLoadDataEx"); + CUJAVA_REQUIRE_NONNULL(env, p, "p", "cuModuleLoadDataEx"); + CUJAVA_REQUIRE_NONNULL(env, options, "options", "cuModuleLoadDataEx"); + CUJAVA_REQUIRE_NONNULL(env, optionValues, "optionValues", "cuModuleLoadDataEx"); + + Logger::log(LOG_TRACE, "Executing cuModuleLoadDataEx\n"); + + CUjit_option *nativeOptions = nullptr; + { + jint *opts = env->GetIntArrayElements(options, nullptr); + if (opts == nullptr) return CUJAVA_INTERNAL_ERROR; + + nativeOptions = new CUjit_option[(size_t)numOptions]; + for (int i = 0; i < numOptions; ++i) nativeOptions[i] = (CUjit_option)opts[i]; + + env->ReleaseIntArrayElements(options, opts, JNI_ABORT); + } + + // Pointers for 'p' (module data) and 'optionValues' (void** for JIT options) + CUmodule nativeModule; + + PointerData *pPD = initPointerData(env, p); + if (pPD == nullptr) { delete[] nativeOptions; return CUJAVA_INTERNAL_ERROR; } + + PointerData *ovPD = initPointerData(env, optionValues); + if (ovPD == nullptr) { + releasePointerData(env, pPD, JNI_ABORT); + delete[] nativeOptions; + return CUJAVA_INTERNAL_ERROR; + } + void **nativeOptionValues = (void**)ovPD->getPointer(env); + + int result = cuModuleLoadDataEx( + &nativeModule, + (void*)pPD->getPointer(env), + (unsigned int)numOptions, + nativeOptions, + nativeOptionValues); + + delete[] nativeOptions; + + setNativePointerValue(env, phMod, (jlong)nativeModule); + + // p is input-only → no-commit; optionValues may receive outputs → commit + if (!releasePointerData(env, pPD, JNI_ABORT)) return CUJAVA_INTERNAL_ERROR; + if (!releasePointerData(env, ovPD, 0)) return CUJAVA_INTERNAL_ERROR; + + return result; +} + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuMemAllocNative + (JNIEnv *env, jclass cls, jobject dptr, jlong bytesize) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, dptr, "dptr", "cuMemAlloc"); + + Logger::log(LOG_TRACE, "Executing cuMemAlloc of %ld bytes\n", (long)bytesize); + + CUdeviceptr nativeDptr; + int result = cuMemAlloc(&nativeDptr, (size_t)bytesize); + setPointer(env, dptr, (jlong)nativeDptr); + return result; +} + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuModuleUnloadNative + (JNIEnv *env, jclass cls, jobject hmod) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, hmod, "hmod", "cuModuleUnload"); + + Logger::log(LOG_TRACE, "Executing cuModuleUnload\n"); + + CUmodule nativeHmod = (CUmodule)getNativePointerValue(env, hmod); + int result = cuModuleUnload(nativeHmod); + return result; +} + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuCtxDestroyNative + (JNIEnv *env, jclass cls, jobject ctx) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, ctx, "ctx", "cuCtxDestroy"); + + Logger::log(LOG_TRACE, "Executing cuCtxDestroy\n"); + + CUcontext nativeCtx = (CUcontext)getNativePointerValue(env, ctx); + int result = cuCtxDestroy(nativeCtx); + return result; +} + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuMemFreeNative + (JNIEnv *env, jclass cls, jobject dptr) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, dptr, "dptr", "cuMemFree"); + + Logger::log(LOG_TRACE, "Executing cuMemFree\n"); + + CUdeviceptr nativeDptr = (CUdeviceptr)getPointer(env, dptr); + int result = cuMemFree(nativeDptr); + return result; +} + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuMemcpyDtoHNative + (JNIEnv *env, jclass, jobject dstHost, jobject srcDevice, jlong ByteCount) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, dstHost, "dstHost", "cuMemcpyDtoH"); + CUJAVA_REQUIRE_NONNULL(env, srcDevice, "srcDevice", "cuMemcpyDtoH"); + + Logger::log(LOG_TRACE, "Executing cuMemcpyDtoH of %ld bytes\n", (long)ByteCount); + + PointerData *dstHostPD = initPointerData(env, dstHost); + if (dstHostPD == nullptr) return CUJAVA_INTERNAL_ERROR; + + // Correct: CUdeviceptr from CUdeviceptr wrapper + CUdeviceptr nativeSrcDevice = (CUdeviceptr)(uintptr_t)getNativePointerValue(env, srcDevice); + void *nativeDstHost = dstHostPD->getPointer(env); + + int result = cuMemcpyDtoH(nativeDstHost, nativeSrcDevice, (size_t)ByteCount); + + if (!releasePointerData(env, dstHostPD, 0)) return CUJAVA_INTERNAL_ERROR; // commit host writes + return result; +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuCtxSynchronizeNative + (JNIEnv *env, jclass cls) { + Logger::log(LOG_TRACE, "Executing cuCtxSynchronize\n"); + + return cuCtxSynchronize(); +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuDeviceGetAttributeNative + (JNIEnv *env, jclass cls, jintArray pi, jint CUdevice_attribute_attrib, jobject dev) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, pi, "pi", "cuDeviceGetAttribute"); + CUJAVA_REQUIRE_NONNULL(env, dev, "dev", "cuDeviceGetAttribute"); + + Logger::log(LOG_TRACE, "Executing cuDeviceGetAttribute\n"); + + CUdevice nativeDev = (CUdevice)(intptr_t)getNativePointerValue(env, dev); + int nativePi = 0; + int result = cuDeviceGetAttribute(&nativePi, (CUdevice_attribute)CUdevice_attribute_attrib, nativeDev); + if (!set(env, pi, 0, nativePi)) return CUJAVA_INTERNAL_ERROR; + return result; +} diff --git a/src/main/cpp/jni/driver/cujava_driver.hpp b/src/main/cpp/jni/driver/cujava_driver.hpp new file mode 100644 index 00000000000..6a99bf669b6 --- /dev/null +++ b/src/main/cpp/jni/driver/cujava_driver.hpp @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + #include + + #ifndef _Included_org_apache_sysds_cujava_driver_CuJavaDriver + #define _Included_org_apache_sysds_cujava_driver_CuJavaDriver + #ifdef __cplusplus + extern "C" { + #endif + + /* + * Class: org.apache.sysds.cujava.driver.CuJavaDriver + * Methods: + * - cudaCtxCreate + * - cuDeviceGet + * - cuDeviceGetCount + * - cuInit + * - cuLaunchKernel + * - cuModuleGetFunction + * - cuModuleLoadDataEx + * - cuMemAlloc + * - cuModuleUnload + * - cuCtxDestroy + * - cuMemFree + * - cuMemcpyDtoH + * - cuCtxSynchronize + * - cuDeviceGetAttribute + */ + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuCtxCreateNative + (JNIEnv *env, jclass cls, jobject pctx, jint flags, jobject dev); + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuDeviceGetNative + (JNIEnv *env, jclass cls, jobject device, jint ordinal); + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuDeviceGetCountNative + (JNIEnv *env, jclass cls, jintArray count); + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuInitNative + (JNIEnv *env, jclass cls, jint Flags); + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuLaunchKernelNative + (JNIEnv *env, jclass, jobject f, jint gridDimX, jint gridDimY, jint gridDimZ, + jint blockDimX, jint blockDimY, jint blockDimZ, jint sharedMemBytes, + jobject hStream, jobject kernelParams, jobject extra); + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuModuleGetFunctionNative + (JNIEnv *env, jclass, jobject hfunc, jobject hmod, jstring name); + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuModuleLoadDataExNative + (JNIEnv *env, jclass, jobject phMod, jobject p, jint numOptions, jintArray options, jobject optionValues); + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuMemAllocNative + (JNIEnv *env, jclass cls, jobject dptr, jlong bytesize); + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuModuleUnloadNative + (JNIEnv *env, jclass cls, jobject hmod); + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuCtxDestroyNative + (JNIEnv *env, jclass cls, jobject ctx); + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuMemFreeNative + (JNIEnv *env, jclass cls, jobject dptr); + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuMemcpyDtoHNative + (JNIEnv *env, jclass cls, jobject dstHost, jobject srcDevice, jlong ByteCount); + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuCtxSynchronizeNative + (JNIEnv *env, jclass cls); + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuDeviceGetAttributeNative + (JNIEnv *env, jclass cls, jintArray pi, jint CUdevice_attribute_attrib, jobject dev); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/main/cpp/jni/driver/cujava_driver_common.hpp b/src/main/cpp/jni/driver/cujava_driver_common.hpp new file mode 100644 index 00000000000..c4c219981c6 --- /dev/null +++ b/src/main/cpp/jni/driver/cujava_driver_common.hpp @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef CUJAVA_DRIVER_COMMON_HPP +#define CUJAVA_DRIVER_COMMON_HPP + +#include +#include +#include + +#include "../common/cujava_logger.hpp" +#include "../common/cujava_jni_utils.hpp" +#include "../common/cujava_pointer_utils.hpp" + +#define CUJAVA_INTERNAL_ERROR 0x80000001 + + + +#endif // CUJAVA_DRIVER_COMMON_HPP diff --git a/src/main/cpp/jni/runtime/CMakeLists.txt b/src/main/cpp/jni/runtime/CMakeLists.txt new file mode 100644 index 00000000000..2bb94a356ab --- /dev/null +++ b/src/main/cpp/jni/runtime/CMakeLists.txt @@ -0,0 +1,55 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +cmake_minimum_required(VERSION 3.18) + +project(CuJavaRuntime LANGUAGES CXX) + +find_package(JNI REQUIRED) +find_package(CUDAToolkit REQUIRED) # for CUDA::cudart + +add_library(CuJavaRuntime SHARED + cujava_runtime.cpp +) + +set_target_properties(CuJavaRuntime PROPERTIES + CXX_STANDARD 11 + OUTPUT_NAME cujava_runtime # -> libcujava_runtime.so + LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../../lib + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../../lib + ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../../lib +) + +target_include_directories(CuJavaRuntime + PRIVATE + ${JNI_INCLUDE_DIRS} + ${CUDAToolkit_INCLUDE_DIRS} + ${CMAKE_CURRENT_SOURCE_DIR} # headers in runtime/ + ${CMAKE_CURRENT_SOURCE_DIR}/../common # if including common headers +) + +target_link_libraries(CuJavaRuntime + PRIVATE + CuJavaCommonJNI + CUDA::cudart + ${JNI_LIBRARIES} +) + diff --git a/src/main/cpp/jni/runtime/cujava_runtime.cpp b/src/main/cpp/jni/runtime/cujava_runtime.cpp new file mode 100644 index 00000000000..cbf14bf0bdd --- /dev/null +++ b/src/main/cpp/jni/runtime/cujava_runtime.cpp @@ -0,0 +1,572 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +#include "cujava_runtime.hpp" +#include "cujava_runtime_common.hpp" + +#define CUJAVA_REQUIRE_NONNULL(env, obj, name, method) \ + do { \ + if ((obj) == nullptr) { \ + ThrowByName((env), "java/lang/NullPointerException", \ + "Parameter '" name "' is null for " method); \ + return CUJAVA_INTERNAL_ERROR; \ + } \ + } while (0) + +// ---- cudaDeviceProp jfieldIDs ---- +static jclass cudaDeviceProp_class = nullptr; + +#define F(name) static jfieldID name = nullptr; +F(cudaDeviceProp_accessPolicyMaxWindowSize) +F(cudaDeviceProp_asyncEngineCount) +F(cudaDeviceProp_canMapHostMemory) +F(cudaDeviceProp_canUseHostPointerForRegisteredMem) +F(cudaDeviceProp_clockRate) +F(cudaDeviceProp_clusterLaunch) +F(cudaDeviceProp_computeMode) +F(cudaDeviceProp_computePreemptionSupported) +F(cudaDeviceProp_concurrentKernels) +F(cudaDeviceProp_concurrentManagedAccess) +F(cudaDeviceProp_cooperativeLaunch) +F(cudaDeviceProp_cooperativeMultiDeviceLaunch) +F(cudaDeviceProp_deferredMappingCudaArraySupported) +F(cudaDeviceProp_deviceOverlap) +F(cudaDeviceProp_directManagedMemAccessFromHost) +F(cudaDeviceProp_ECCEnabled) +F(cudaDeviceProp_globalL1CacheSupported) +F(cudaDeviceProp_gpuDirectRDMAFlushWritesOptions) +F(cudaDeviceProp_gpuDirectRDMASupported) +F(cudaDeviceProp_gpuDirectRDMAWritesOrdering) +F(cudaDeviceProp_hostNativeAtomicSupported) +F(cudaDeviceProp_hostRegisterReadOnlySupported) +F(cudaDeviceProp_hostRegisterSupported) +F(cudaDeviceProp_integrated) +F(cudaDeviceProp_ipcEventSupported) +F(cudaDeviceProp_isMultiGpuBoard) +F(cudaDeviceProp_kernelExecTimeoutEnabled) +F(cudaDeviceProp_l2CacheSize) +F(cudaDeviceProp_localL1CacheSupported) +F(cudaDeviceProp_luid) +F(cudaDeviceProp_luidDeviceNodeMask) +F(cudaDeviceProp_major) +F(cudaDeviceProp_managedMemory) +F(cudaDeviceProp_maxBlocksPerMultiProcessor) +F(cudaDeviceProp_maxGridSize) +F(cudaDeviceProp_maxSurface1D) +F(cudaDeviceProp_maxSurface1DLayered) +F(cudaDeviceProp_maxSurface2D) +F(cudaDeviceProp_maxSurface2DLayered) +F(cudaDeviceProp_maxSurface3D) +F(cudaDeviceProp_maxSurfaceCubemap) +F(cudaDeviceProp_maxSurfaceCubemapLayered) +F(cudaDeviceProp_maxTexture1D) +F(cudaDeviceProp_maxTexture1DLayered) +F(cudaDeviceProp_maxTexture1DLinear) +F(cudaDeviceProp_maxTexture1DMipmap) +F(cudaDeviceProp_maxTexture2D) +F(cudaDeviceProp_maxTexture2DGather) +F(cudaDeviceProp_maxTexture2DLayered) +F(cudaDeviceProp_maxTexture2DLinear) +F(cudaDeviceProp_maxTexture2DMipmap) +F(cudaDeviceProp_maxTexture3D) +F(cudaDeviceProp_maxTexture3DAlt) +F(cudaDeviceProp_maxTextureCubemap) +F(cudaDeviceProp_maxTextureCubemapLayered) +F(cudaDeviceProp_maxThreadsDim) +F(cudaDeviceProp_maxThreadsPerBlock) +F(cudaDeviceProp_maxThreadsPerMultiProcessor) +F(cudaDeviceProp_memoryBusWidth) +F(cudaDeviceProp_memoryClockRate) +F(cudaDeviceProp_memoryPoolsSupported) +F(cudaDeviceProp_memoryPoolSupportedHandleTypes) +F(cudaDeviceProp_memPitch) +F(cudaDeviceProp_minor) +F(cudaDeviceProp_multiGpuBoardGroupID) +F(cudaDeviceProp_multiProcessorCount) +F(cudaDeviceProp_name) +F(cudaDeviceProp_pageableMemoryAccess) +F(cudaDeviceProp_pageableMemoryAccessUsesHostPageTables) +F(cudaDeviceProp_pciBusID) +F(cudaDeviceProp_pciDeviceID) +F(cudaDeviceProp_pciDomainID) +F(cudaDeviceProp_persistingL2CacheMaxSize) +F(cudaDeviceProp_regsPerBlock) +F(cudaDeviceProp_regsPerMultiprocessor) +F(cudaDeviceProp_reserved) +F(cudaDeviceProp_reservedSharedMemPerBlock) +F(cudaDeviceProp_sharedMemPerBlock) +F(cudaDeviceProp_sharedMemPerBlockOptin) +F(cudaDeviceProp_sharedMemPerMultiprocessor) +F(cudaDeviceProp_singleToDoublePrecisionPerfRatio) +F(cudaDeviceProp_sparseCudaArraySupported) +F(cudaDeviceProp_streamPrioritiesSupported) +F(cudaDeviceProp_surfaceAlignment) +F(cudaDeviceProp_tccDriver) +F(cudaDeviceProp_textureAlignment) +F(cudaDeviceProp_texturePitchAlignment) +F(cudaDeviceProp_timelineSemaphoreInteropSupported) +F(cudaDeviceProp_totalConstMem) +F(cudaDeviceProp_totalGlobalMem) +F(cudaDeviceProp_unifiedAddressing) +F(cudaDeviceProp_unifiedFunctionPointers) +F(cudaDeviceProp_warpSize) +#undef F + + + +JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *jvm, void *reserved) { + JNIEnv *env = nullptr; + if (jvm->GetEnv((void **)&env, JNI_VERSION_1_4)) { + return JNI_ERR; + } + + // Only what we need so far + if (initJNIUtils(env) == JNI_ERR) return JNI_ERR; + if (initPointerUtils(env) == JNI_ERR) return JNI_ERR; + + // ---- cache all fields of org.apache.sysds.cujava.runtime.cudaDeviceProp ---- + { + jclass cls = nullptr; + if (!init(env, cls, "org/apache/sysds/cujava/runtime/CudaDeviceProp")) return JNI_ERR; + cudaDeviceProp_class = (jclass)env->NewGlobalRef(cls); + if (!cudaDeviceProp_class) return JNI_ERR; + + struct Spec { const char* name; const char* sig; jfieldID* out; } specs[] = { + {"accessPolicyMaxWindowSize","I",&cudaDeviceProp_accessPolicyMaxWindowSize}, + {"asyncEngineCount","I",&cudaDeviceProp_asyncEngineCount}, + {"canMapHostMemory","I",&cudaDeviceProp_canMapHostMemory}, + {"canUseHostPointerForRegisteredMem","I",&cudaDeviceProp_canUseHostPointerForRegisteredMem}, + {"clockRate","I",&cudaDeviceProp_clockRate}, + {"clusterLaunch","I",&cudaDeviceProp_clusterLaunch}, + {"computeMode","I",&cudaDeviceProp_computeMode}, + {"computePreemptionSupported","I",&cudaDeviceProp_computePreemptionSupported}, + {"concurrentKernels","I",&cudaDeviceProp_concurrentKernels}, + {"concurrentManagedAccess","I",&cudaDeviceProp_concurrentManagedAccess}, + {"cooperativeLaunch","I",&cudaDeviceProp_cooperativeLaunch}, + {"cooperativeMultiDeviceLaunch","I",&cudaDeviceProp_cooperativeMultiDeviceLaunch}, + {"deferredMappingCudaArraySupported","I",&cudaDeviceProp_deferredMappingCudaArraySupported}, + {"deviceOverlap","I",&cudaDeviceProp_deviceOverlap}, + {"directManagedMemAccessFromHost","I",&cudaDeviceProp_directManagedMemAccessFromHost}, + {"ECCEnabled","I",&cudaDeviceProp_ECCEnabled}, + {"globalL1CacheSupported","I",&cudaDeviceProp_globalL1CacheSupported}, + {"gpuDirectRDMAFlushWritesOptions","I",&cudaDeviceProp_gpuDirectRDMAFlushWritesOptions}, + {"gpuDirectRDMASupported","I",&cudaDeviceProp_gpuDirectRDMASupported}, + {"gpuDirectRDMAWritesOrdering","I",&cudaDeviceProp_gpuDirectRDMAWritesOrdering}, + {"hostNativeAtomicSupported","I",&cudaDeviceProp_hostNativeAtomicSupported}, + {"hostRegisterReadOnlySupported","I",&cudaDeviceProp_hostRegisterReadOnlySupported}, + {"hostRegisterSupported","I",&cudaDeviceProp_hostRegisterSupported}, + {"integrated","I",&cudaDeviceProp_integrated}, + {"ipcEventSupported","I",&cudaDeviceProp_ipcEventSupported}, + {"isMultiGpuBoard","I",&cudaDeviceProp_isMultiGpuBoard}, + {"kernelExecTimeoutEnabled","I",&cudaDeviceProp_kernelExecTimeoutEnabled}, + {"l2CacheSize","I",&cudaDeviceProp_l2CacheSize}, + {"localL1CacheSupported","I",&cudaDeviceProp_localL1CacheSupported}, + {"luid","[B",&cudaDeviceProp_luid}, + {"luidDeviceNodeMask","I",&cudaDeviceProp_luidDeviceNodeMask}, + {"major","I",&cudaDeviceProp_major}, + {"managedMemory","I",&cudaDeviceProp_managedMemory}, + {"maxBlocksPerMultiProcessor","I",&cudaDeviceProp_maxBlocksPerMultiProcessor}, + {"maxGridSize","[I",&cudaDeviceProp_maxGridSize}, + {"maxSurface1D","I",&cudaDeviceProp_maxSurface1D}, + {"maxSurface1DLayered","[I",&cudaDeviceProp_maxSurface1DLayered}, + {"maxSurface2D","[I",&cudaDeviceProp_maxSurface2D}, + {"maxSurface2DLayered","[I",&cudaDeviceProp_maxSurface2DLayered}, + {"maxSurface3D","[I",&cudaDeviceProp_maxSurface3D}, + {"maxSurfaceCubemap","I",&cudaDeviceProp_maxSurfaceCubemap}, + {"maxSurfaceCubemapLayered","[I",&cudaDeviceProp_maxSurfaceCubemapLayered}, + {"maxTexture1D","I",&cudaDeviceProp_maxTexture1D}, + {"maxTexture1DLayered","[I",&cudaDeviceProp_maxTexture1DLayered}, + {"maxTexture1DLinear","I",&cudaDeviceProp_maxTexture1DLinear}, + {"maxTexture1DMipmap","I",&cudaDeviceProp_maxTexture1DMipmap}, + {"maxTexture2D","[I",&cudaDeviceProp_maxTexture2D}, + {"maxTexture2DGather","[I",&cudaDeviceProp_maxTexture2DGather}, + {"maxTexture2DLayered","[I",&cudaDeviceProp_maxTexture2DLayered}, + {"maxTexture2DLinear","[I",&cudaDeviceProp_maxTexture2DLinear}, + {"maxTexture2DMipmap","[I",&cudaDeviceProp_maxTexture2DMipmap}, + {"maxTexture3D","[I",&cudaDeviceProp_maxTexture3D}, + {"maxTexture3DAlt","[I",&cudaDeviceProp_maxTexture3DAlt}, + {"maxTextureCubemap","I",&cudaDeviceProp_maxTextureCubemap}, + {"maxTextureCubemapLayered","[I",&cudaDeviceProp_maxTextureCubemapLayered}, + {"maxThreadsDim","[I",&cudaDeviceProp_maxThreadsDim}, + {"maxThreadsPerBlock","I",&cudaDeviceProp_maxThreadsPerBlock}, + {"maxThreadsPerMultiProcessor","I",&cudaDeviceProp_maxThreadsPerMultiProcessor}, + {"memoryBusWidth","I",&cudaDeviceProp_memoryBusWidth}, + {"memoryClockRate","I",&cudaDeviceProp_memoryClockRate}, + {"memoryPoolsSupported","I",&cudaDeviceProp_memoryPoolsSupported}, + {"memoryPoolSupportedHandleTypes","I",&cudaDeviceProp_memoryPoolSupportedHandleTypes}, + {"memPitch","J",&cudaDeviceProp_memPitch}, + {"minor","I",&cudaDeviceProp_minor}, + {"multiGpuBoardGroupID","I",&cudaDeviceProp_multiGpuBoardGroupID}, + {"multiProcessorCount","I",&cudaDeviceProp_multiProcessorCount}, + {"name","[B",&cudaDeviceProp_name}, + {"pageableMemoryAccess","I",&cudaDeviceProp_pageableMemoryAccess}, + {"pageableMemoryAccessUsesHostPageTables","I",&cudaDeviceProp_pageableMemoryAccessUsesHostPageTables}, + {"pciBusID","I",&cudaDeviceProp_pciBusID}, + {"pciDeviceID","I",&cudaDeviceProp_pciDeviceID}, + {"pciDomainID","I",&cudaDeviceProp_pciDomainID}, + {"persistingL2CacheMaxSize","I",&cudaDeviceProp_persistingL2CacheMaxSize}, + {"regsPerBlock","I",&cudaDeviceProp_regsPerBlock}, + {"regsPerMultiprocessor","I",&cudaDeviceProp_regsPerMultiprocessor}, + {"reserved","I",&cudaDeviceProp_reserved}, + {"reservedSharedMemPerBlock","J",&cudaDeviceProp_reservedSharedMemPerBlock}, + {"sharedMemPerBlock","J",&cudaDeviceProp_sharedMemPerBlock}, + {"sharedMemPerBlockOptin","J",&cudaDeviceProp_sharedMemPerBlockOptin}, + {"sharedMemPerMultiprocessor","J",&cudaDeviceProp_sharedMemPerMultiprocessor}, + {"singleToDoublePrecisionPerfRatio","I",&cudaDeviceProp_singleToDoublePrecisionPerfRatio}, + {"sparseCudaArraySupported","I",&cudaDeviceProp_sparseCudaArraySupported}, + {"streamPrioritiesSupported","I",&cudaDeviceProp_streamPrioritiesSupported}, + {"surfaceAlignment","J",&cudaDeviceProp_surfaceAlignment}, + {"tccDriver","I",&cudaDeviceProp_tccDriver}, + {"textureAlignment","J",&cudaDeviceProp_textureAlignment}, + {"texturePitchAlignment","J",&cudaDeviceProp_texturePitchAlignment}, + {"timelineSemaphoreInteropSupported","I",&cudaDeviceProp_timelineSemaphoreInteropSupported}, + {"totalConstMem","J",&cudaDeviceProp_totalConstMem}, + {"totalGlobalMem","J",&cudaDeviceProp_totalGlobalMem}, + {"unifiedAddressing","I",&cudaDeviceProp_unifiedAddressing}, + {"unifiedFunctionPointers","I",&cudaDeviceProp_unifiedFunctionPointers}, + {"warpSize","I",&cudaDeviceProp_warpSize}, + }; + + for (const auto& s : specs) { + if (!init(env, cls, *s.out, s.name, s.sig)) return JNI_ERR; + } + } + + + return JNI_VERSION_1_4; +} + + +JNIEXPORT void JNICALL JNI_OnUnload(JavaVM *vm, void *reserved) { +} + +static void setCudaDeviceProp(JNIEnv* env, jobject prop, const cudaDeviceProp& p) { + // byte[256] name + byte[8] luid (luid undefined on non-Windows -> zero it) + setFieldBytes(env, prop, cudaDeviceProp_name, + reinterpret_cast(p.name), 256); + { jbyte z8[8] = {0}; setFieldBytes(env, prop, cudaDeviceProp_luid, z8, 8); } + + // int[] fields + { jint v[3] = { (jint)p.maxThreadsDim[0], (jint)p.maxThreadsDim[1], (jint)p.maxThreadsDim[2] }; + setFieldInts(env, prop, cudaDeviceProp_maxThreadsDim, v, 3); } + + { jint v[3] = { (jint)p.maxGridSize[0], (jint)p.maxGridSize[1], (jint)p.maxGridSize[2] }; + setFieldInts(env, prop, cudaDeviceProp_maxGridSize, v, 3); } + + { jint v[2] = { (jint)p.maxTexture2D[0], (jint)p.maxTexture2D[1] }; + setFieldInts(env, prop, cudaDeviceProp_maxTexture2D, v, 2); } + + { jint v[2] = { (jint)p.maxTexture2DGather[0], (jint)p.maxTexture2DGather[1] }; + setFieldInts(env, prop, cudaDeviceProp_maxTexture2DGather, v, 2); } + + { jint v[3] = { (jint)p.maxTexture2DLinear[0], (jint)p.maxTexture2DLinear[1], (jint)p.maxTexture2DLinear[2] }; + setFieldInts(env, prop, cudaDeviceProp_maxTexture2DLinear, v, 3); } + + { jint v[2] = { (jint)p.maxTexture2DMipmap[0], (jint)p.maxTexture2DMipmap[1] }; + setFieldInts(env, prop, cudaDeviceProp_maxTexture2DMipmap, v, 2); } + + { jint v[3] = { (jint)p.maxTexture3D[0], (jint)p.maxTexture3D[1], (jint)p.maxTexture3D[2] }; + setFieldInts(env, prop, cudaDeviceProp_maxTexture3D, v, 3); } + + { jint v[3] = { (jint)p.maxTexture3DAlt[0], (jint)p.maxTexture3DAlt[1], (jint)p.maxTexture3DAlt[2] }; + setFieldInts(env, prop, cudaDeviceProp_maxTexture3DAlt, v, 3); } + + { jint v[2] = { (jint)p.maxTexture1DLayered[0], (jint)p.maxTexture1DLayered[1] }; + setFieldInts(env, prop, cudaDeviceProp_maxTexture1DLayered, v, 2); } + + { jint v[3] = { (jint)p.maxTexture2DLayered[0], (jint)p.maxTexture2DLayered[1], (jint)p.maxTexture2DLayered[2] }; + setFieldInts(env, prop, cudaDeviceProp_maxTexture2DLayered, v, 3); } + + { jint v[2] = { (jint)p.maxTextureCubemapLayered[0], (jint)p.maxTextureCubemapLayered[1] }; + setFieldInts(env, prop, cudaDeviceProp_maxTextureCubemapLayered, v, 2); } + + { jint v[2] = { (jint)p.maxSurface1DLayered[0], (jint)p.maxSurface1DLayered[1] }; + setFieldInts(env, prop, cudaDeviceProp_maxSurface1DLayered, v, 2); } + + { jint v[2] = { (jint)p.maxSurface2D[0], (jint)p.maxSurface2D[1] }; + setFieldInts(env, prop, cudaDeviceProp_maxSurface2D, v, 2); } + + { jint v[3] = { (jint)p.maxSurface2DLayered[0], (jint)p.maxSurface2DLayered[1], (jint)p.maxSurface2DLayered[2] }; + setFieldInts(env, prop, cudaDeviceProp_maxSurface2DLayered, v, 3); } + + { jint v[3] = { (jint)p.maxSurface3D[0], (jint)p.maxSurface3D[1], (jint)p.maxSurface3D[2] }; + setFieldInts(env, prop, cudaDeviceProp_maxSurface3D, v, 3); } + + // long fields + env->SetLongField(prop, cudaDeviceProp_totalGlobalMem, (jlong)p.totalGlobalMem); + env->SetLongField(prop, cudaDeviceProp_totalConstMem, (jlong)p.totalConstMem); + env->SetLongField(prop, cudaDeviceProp_sharedMemPerBlock, (jlong)p.sharedMemPerBlock); + env->SetLongField(prop, cudaDeviceProp_sharedMemPerMultiprocessor,(jlong)p.sharedMemPerMultiprocessor); + env->SetLongField(prop, cudaDeviceProp_reservedSharedMemPerBlock,(jlong)p.reservedSharedMemPerBlock); + env->SetLongField(prop, cudaDeviceProp_sharedMemPerBlockOptin, (jlong)p.sharedMemPerBlockOptin); + env->SetLongField(prop, cudaDeviceProp_memPitch, (jlong)p.memPitch); + env->SetLongField(prop, cudaDeviceProp_surfaceAlignment, (jlong)p.surfaceAlignment); + env->SetLongField(prop, cudaDeviceProp_textureAlignment, (jlong)p.textureAlignment); + env->SetLongField(prop, cudaDeviceProp_texturePitchAlignment, (jlong)p.texturePitchAlignment); + + // int fields (available in cudaDeviceProp) + env->SetIntField(prop, cudaDeviceProp_regsPerBlock, (jint)p.regsPerBlock); + env->SetIntField(prop, cudaDeviceProp_regsPerMultiprocessor, (jint)p.regsPerMultiprocessor); + env->SetIntField(prop, cudaDeviceProp_warpSize, (jint)p.warpSize); + env->SetIntField(prop, cudaDeviceProp_maxThreadsPerBlock, (jint)p.maxThreadsPerBlock); + env->SetIntField(prop, cudaDeviceProp_maxThreadsPerMultiProcessor,(jint)p.maxThreadsPerMultiProcessor); + env->SetIntField(prop, cudaDeviceProp_clockRate, (jint)p.clockRate); + env->SetIntField(prop, cudaDeviceProp_memoryClockRate, (jint)p.memoryClockRate); + env->SetIntField(prop, cudaDeviceProp_memoryBusWidth, (jint)p.memoryBusWidth); + env->SetIntField(prop, cudaDeviceProp_l2CacheSize, (jint)p.l2CacheSize); + env->SetIntField(prop, cudaDeviceProp_major, (jint)p.major); + env->SetIntField(prop, cudaDeviceProp_minor, (jint)p.minor); + env->SetIntField(prop, cudaDeviceProp_multiProcessorCount, (jint)p.multiProcessorCount); + env->SetIntField(prop, cudaDeviceProp_deviceOverlap, (jint)p.deviceOverlap); + env->SetIntField(prop, cudaDeviceProp_kernelExecTimeoutEnabled, (jint)p.kernelExecTimeoutEnabled); + env->SetIntField(prop, cudaDeviceProp_integrated, (jint)p.integrated); + env->SetIntField(prop, cudaDeviceProp_canMapHostMemory, (jint)p.canMapHostMemory); + env->SetIntField(prop, cudaDeviceProp_computeMode, (jint)p.computeMode); + env->SetIntField(prop, cudaDeviceProp_maxTexture1D, (jint)p.maxTexture1D); + env->SetIntField(prop, cudaDeviceProp_maxTexture1DMipmap, (jint)p.maxTexture1DMipmap); + env->SetIntField(prop, cudaDeviceProp_maxTexture1DLinear, (jint)p.maxTexture1DLinear); + env->SetIntField(prop, cudaDeviceProp_maxTextureCubemap, (jint)p.maxTextureCubemap); + env->SetIntField(prop, cudaDeviceProp_maxSurface1D, (jint)p.maxSurface1D); + env->SetIntField(prop, cudaDeviceProp_maxSurfaceCubemap, (jint)p.maxSurfaceCubemap); + env->SetIntField(prop, cudaDeviceProp_asyncEngineCount, (jint)p.asyncEngineCount); + env->SetIntField(prop, cudaDeviceProp_concurrentKernels, (jint)p.concurrentKernels); + env->SetIntField(prop, cudaDeviceProp_ECCEnabled, (jint)p.ECCEnabled); + env->SetIntField(prop, cudaDeviceProp_pciBusID, (jint)p.pciBusID); + env->SetIntField(prop, cudaDeviceProp_pciDeviceID, (jint)p.pciDeviceID); + env->SetIntField(prop, cudaDeviceProp_pciDomainID, (jint)p.pciDomainID); + env->SetIntField(prop, cudaDeviceProp_unifiedAddressing, (jint)p.unifiedAddressing); + +} + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaMemcpyNative + (JNIEnv *env, jclass cls, jobject dst, jobject src, jlong count, jint kind) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, dst, "dst", "cudaMemcpy"); + CUJAVA_REQUIRE_NONNULL(env, src, "src", "cudaMemcpy"); + + Logger::log(LOG_TRACE, "Executing cudaMemcpy of %ld bytes\n", (long)count); + + // Obtain the destination and source pointers + PointerData *dstPointerData = initPointerData(env, dst); + if (dstPointerData == nullptr) { + return CUJAVA_INTERNAL_ERROR; + } + PointerData *srcPointerData = initPointerData(env, src); + if (srcPointerData == nullptr) { + return CUJAVA_INTERNAL_ERROR; + } + + // Execute the cudaMemcpy operation + int result = CUJAVA_INTERNAL_ERROR; + if (kind == cudaMemcpyHostToHost) { + Logger::log(LOG_TRACE, "Copying %ld bytes from host to host\n", (long)count); + result = cudaMemcpy((void*)dstPointerData->getPointer(env), (void*)srcPointerData->getPointer(env), (size_t)count, cudaMemcpyHostToHost); + } + else if (kind == cudaMemcpyHostToDevice) { + Logger::log(LOG_TRACE, "Copying %ld bytes from host to device\n", (long)count); + result = cudaMemcpy((void*)dstPointerData->getPointer(env), (void*)srcPointerData->getPointer(env), (size_t)count, cudaMemcpyHostToDevice); + } + else if (kind == cudaMemcpyDeviceToHost) { + Logger::log(LOG_TRACE, "Copying %ld bytes from device to host\n", (long)count); + result = cudaMemcpy((void*)dstPointerData->getPointer(env), (void*)srcPointerData->getPointer(env), (size_t)count, cudaMemcpyDeviceToHost); + } + else if (kind == cudaMemcpyDeviceToDevice) { + Logger::log(LOG_TRACE, "Copying %ld bytes from device to device\n", (long)count); + result = cudaMemcpy((void*)dstPointerData->getPointer(env), (void*)srcPointerData->getPointer(env), (size_t)count, cudaMemcpyDeviceToDevice); + } + else { + Logger::log(LOG_ERROR, "Invalid cudaMemcpyKind given: %d\n", kind); + return cudaErrorInvalidMemcpyDirection; + } + + // Release the pointer data + if (!releasePointerData(env, dstPointerData)) return CUJAVA_INTERNAL_ERROR; + if (!releasePointerData(env, srcPointerData, JNI_ABORT)) return CUJAVA_INTERNAL_ERROR; + return result; +} + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaMallocNative + (JNIEnv *env, jclass cls, jobject devPtr, jlong size) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, devPtr, "devPtr", "cudaMalloc"); + + Logger::log(LOG_TRACE, "Executing cudaMalloc of %ld bytes\n", (long)size); + + void *nativeDevPtr = nullptr; + int result = cudaMalloc(&nativeDevPtr, (size_t)size); + setPointer(env, devPtr, (jlong)nativeDevPtr); + + return result; +} + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaFreeNative + (JNIEnv *env, jclass cls, jobject devPtr) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, devPtr, "devPtr", "cudaFree"); + + Logger::log(LOG_TRACE, "Executing cudaFree\n"); + + void *nativeDevPtr = nullptr; + nativeDevPtr = getPointer(env, devPtr); + int result = cudaFree(nativeDevPtr); + return result; +} + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaMemsetNative + (JNIEnv *env, jclass cls, jobject mem, jint c, jlong count) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, mem, "mem", "cudaMemset"); + + Logger::log(LOG_TRACE, "Executing cudaMemset\n"); + + void *nativeMem = getPointer(env, mem); + + int result = cudaMemset(nativeMem, (int)c, (size_t)count); + return result; +} + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaDeviceSynchronizeNative + (JNIEnv *env, jclass cls) { + Logger::log(LOG_TRACE, "Executing cudaDeviceSynchronize\n"); + + int result = cudaDeviceSynchronize(); + return result; +} + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaMallocManagedNative + (JNIEnv *env, jclass cls, jobject devPtr, jlong size, jint flags) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, devPtr, "devPtr", "cudaMallocManaged"); + + Logger::log(LOG_TRACE, "Executing cudaMallocManaged of %ld bytes\n", (long)size); + + void *nativeDevPtr = nullptr; + int result = cudaMallocManaged(&nativeDevPtr, (size_t)size, (unsigned int)flags); + if (result == cudaSuccess) { + if (flags == cudaMemAttachHost) { + jobject object = env->NewDirectByteBuffer(nativeDevPtr, size); + env->SetObjectField(devPtr, Pointer_buffer, object); + env->SetObjectField(devPtr, Pointer_pointers, nullptr); + env->SetLongField(devPtr, Pointer_byteOffset, 0); + } + env->SetLongField(devPtr, NativePointerObject_nativePointer, (jlong)nativeDevPtr); + } + + return result; +} + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaMemGetInfoNative + (JNIEnv *env, jclass cls, jlongArray freeBytes, jlongArray totalBytes) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, freeBytes, "freeBytes", "cudaMemGetInfo"); + CUJAVA_REQUIRE_NONNULL(env, totalBytes, "totalBytes", "cudaMemGetInfo"); + + Logger::log(LOG_TRACE, "Executing cudaMemGetInfo\n"); + + size_t nativeFreeBytes = 0; + size_t nativeTotalBytes = 0; + + int result = cudaMemGetInfo(&nativeFreeBytes, &nativeTotalBytes); + + if (!set(env, freeBytes, 0, (jlong)nativeFreeBytes)) return CUJAVA_INTERNAL_ERROR; + if (!set(env, totalBytes, 0, (jlong)nativeTotalBytes)) return CUJAVA_INTERNAL_ERROR; + + return result; +} + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaGetDeviceCountNative + (JNIEnv *env, jclass cls, jintArray count) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, count, "count", "cudaGetDeviceCount"); + + Logger::log(LOG_TRACE, "Executing cudaGetDeviceCount\n"); + + int nativeCount = 0; + int result = cudaGetDeviceCount(&nativeCount); + if (!set(env, count, 0, nativeCount)) return CUJAVA_INTERNAL_ERROR; + return result; +} + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaSetDeviceNative + (JNIEnv *env, jclass cls, jint device) { + Logger::log(LOG_TRACE, "Executing cudaSetDevice\n"); + + return cudaSetDevice(device); +} + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaSetDeviceFlagsNative + (JNIEnv *env, jclass cls, jint flags) { + Logger::log(LOG_TRACE, "Executing cudaSetDeviceFlags\n"); + + return cudaSetDeviceFlags((int)flags); +} + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaGetDeviceNative + (JNIEnv *env, jclass cls, jintArray device) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, device, "device", "cudaGetDevice"); + + Logger::log(LOG_TRACE, "Executing cudaGetDevice\n"); + + int nativeDevice = 0; + int result = cudaGetDevice(&nativeDevice); + if (!set(env, device, 0, nativeDevice)) return CUJAVA_INTERNAL_ERROR; + return result; +} + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaGetDevicePropertiesNative + (JNIEnv *env, jclass cls, jobject prop, jint device) { + + // Validate: all jobject parameters must be non-null + CUJAVA_REQUIRE_NONNULL(env, prop, "prop", "cudaGetDeviceProperties"); + + Logger::log(LOG_TRACE, "Executing cudaGetDeviceProperties\n"); + + cudaDeviceProp nativeProp; + int result = cudaGetDeviceProperties(&nativeProp, device); + + setCudaDeviceProp(env, prop, nativeProp); + return result; +} + + + + diff --git a/src/main/cpp/jni/runtime/cujava_runtime.hpp b/src/main/cpp/jni/runtime/cujava_runtime.hpp new file mode 100644 index 00000000000..4c455821a73 --- /dev/null +++ b/src/main/cpp/jni/runtime/cujava_runtime.hpp @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include + +#ifndef _Included_org_apache_sysds_cujava_runtime_CuJava +#define _Included_org_apache_sysds_cujava_runtime_CuJava +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Class: org.apache.sysds.cujava.runtime.CuJava + * Methods: + * - cudaMemcpy + * - cudaMalloc + * - cudaFree + * - cudaMemset + * - cudaDeviceSynchronize + * - cudaMallocManaged + * - cudaMemGetInfo + * - cudaGetDeviceCount + * - cudaSetDevice + * - cudaSetDeviceFlags + * - cudaGetDevice + * - cudaGetDeviceProperties + */ + + + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaMemcpyNative + (JNIEnv *, jclass, jobject, jobject, jlong, jint); + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaMallocNative + (JNIEnv *env, jclass cls, jobject devPtr, jlong size); + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaFreeNative + (JNIEnv *env, jclass cls, jobject devPtr); + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaMemsetNative + (JNIEnv *env, jclass cls, jobject mem, jint c, jlong count); + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaDeviceSynchronizeNative + (JNIEnv *env, jclass cls); + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaMallocManagedNative + (JNIEnv *env, jclass cls, jobject devPtr, jlong size, jint flags); + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaMemGetInfoNative + (JNIEnv *env, jclass cls, jlongArray freeBytes, jlongArray totalBytes); + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaGetDeviceCountNative + (JNIEnv *env, jclass cls, jintArray count); + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaSetDeviceNative + (JNIEnv *env, jclass cls, jint device); + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaSetDeviceFlagsNative + (JNIEnv *env, jclass cls, jint flags); + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaGetDeviceNative + (JNIEnv *env, jclass cls, jintArray device); + +JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaGetDevicePropertiesNative + (JNIEnv *env, jclass cls, jobject prop, jint device); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/main/cpp/jni/runtime/cujava_runtime_common.hpp b/src/main/cpp/jni/runtime/cujava_runtime_common.hpp new file mode 100644 index 00000000000..94e6265711d --- /dev/null +++ b/src/main/cpp/jni/runtime/cujava_runtime_common.hpp @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef CUJAVA_RUNTIME_COMMON_HPP +#define CUJAVA_RUNTIME_COMMON_HPP + +#include +#include +#include + +#include "../common/cujava_logger.hpp" +#include "../common/cujava_jni_utils.hpp" +#include "../common/cujava_pointer_utils.hpp" + +#define CUJAVA_INTERNAL_ERROR 0x80000001 + + + +#endif // CUJAVA_RUNTIME_COMMON_HPP diff --git a/src/main/cpp/lib/libCuJavaCommonJNI.a b/src/main/cpp/lib/libCuJavaCommonJNI.a new file mode 100644 index 00000000000..088843ce137 Binary files /dev/null and b/src/main/cpp/lib/libCuJavaCommonJNI.a differ diff --git a/src/main/cpp/lib/libcujava_cublas.so b/src/main/cpp/lib/libcujava_cublas.so new file mode 100755 index 00000000000..a64fe23e06f Binary files /dev/null and b/src/main/cpp/lib/libcujava_cublas.so differ diff --git a/src/main/cpp/lib/libcujava_cusparse.so b/src/main/cpp/lib/libcujava_cusparse.so new file mode 100755 index 00000000000..d0439d0d327 Binary files /dev/null and b/src/main/cpp/lib/libcujava_cusparse.so differ diff --git a/src/main/cpp/lib/libcujava_driver.so b/src/main/cpp/lib/libcujava_driver.so new file mode 100755 index 00000000000..a3e488b7722 Binary files /dev/null and b/src/main/cpp/lib/libcujava_driver.so differ diff --git a/src/main/cpp/lib/libcujava_runtime.so b/src/main/cpp/lib/libcujava_runtime.so new file mode 100755 index 00000000000..24de9f300e4 Binary files /dev/null and b/src/main/cpp/lib/libcujava_runtime.so differ diff --git a/src/main/java/org/apache/sysds/cujava/CuJavaLibLoader.java b/src/main/java/org/apache/sysds/cujava/CuJavaLibLoader.java new file mode 100644 index 00000000000..4e8381b505d --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/CuJavaLibLoader.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.*; +import java.util.Collections; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; + +public class CuJavaLibLoader { + + private static volatile boolean loaded = false; // fast-path guard + private static final Set LOADED = Collections.newSetFromMap(new ConcurrentHashMap<>()); + + /** Public entry – call from static blocks in binding classes. */ + public static synchronized void load(String lib) { + if (!LOADED.add(lib)) return; // already loaded + + // 1) Standard lookup (java.library.path or OS default locations) + try { + System.loadLibrary(lib); + return; + } + catch (UnsatisfiedLinkError ignored) { + // Fall through to JAR extraction + } + + // 2) Extract the library from the JAR (/lib/...) to a temp file + String fileName = System.mapLibraryName(lib); // platform-specific + String resource = "/lib/" + fileName; // matches lib in the POM + + try (InputStream in = CuJavaLibLoader.class.getResourceAsStream(resource)) { + if (in == null) + throw new UnsatisfiedLinkError( + "Native library not found inside JAR at " + resource); + + Path tmp = Files.createTempFile("cujava_", fileName); + tmp.toFile().deleteOnExit(); + Files.copy(in, tmp, StandardCopyOption.REPLACE_EXISTING); + + System.load(tmp.toAbsolutePath().toString()); + } + catch (IOException | UnsatisfiedLinkError e) { + LOADED.remove(lib); + throw (UnsatisfiedLinkError) + new UnsatisfiedLinkError("Failed to load native CUDA bridge: " + e).initCause(e); + } + } + + private CuJavaLibLoader() { /* no instances */ } + +} diff --git a/src/main/java/org/apache/sysds/cujava/CudaDataType.java b/src/main/java/org/apache/sysds/cujava/CudaDataType.java new file mode 100644 index 00000000000..17dda43ff28 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/CudaDataType.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava; + +/** + * CUDA data-type constants (mirror of cudaDataType_t). Grouped: all real (R) types first, then complex (C) companions. + */ + +public class CudaDataType { + + /* ─────── Real scalars ─────── */ + + /** 16-bit IEEE half-precision float (fp16) */ + public static final int CUDA_R_16F = 2; + /** 16-bit bfloat16 */ + public static final int CUDA_R_16BF = 14; + /** 32-bit IEEE single-precision float */ + public static final int CUDA_R_32F = 0; + /** 64-bit IEEE double-precision float */ + public static final int CUDA_R_64F = 1; + + /** 4-bit signed integer */ + public static final int CUDA_R_4I = 16; + /** 4-bit unsigned integer */ + public static final int CUDA_R_4U = 18; + /** 8-bit signed integer */ + public static final int CUDA_R_8I = 3; + /** 8-bit unsigned integer */ + public static final int CUDA_R_8U = 8; + /** 16-bit signed integer */ + public static final int CUDA_R_16I = 20; + /** 16-bit unsigned integer */ + public static final int CUDA_R_16U = 22; + /** 32-bit signed integer */ + public static final int CUDA_R_32I = 10; + /** 32-bit unsigned integer */ + public static final int CUDA_R_32U = 12; + /** 64-bit signed integer */ + public static final int CUDA_R_64I = 24; + /** 64-bit unsigned integer */ + public static final int CUDA_R_64U = 26; + + /** 8-bit float, FP-8 format E4M3 */ + public static final int CUDA_R_8F_E4M3 = 28; + /** 8-bit float, FP-8 format E5M2 */ + public static final int CUDA_R_8F_E5M2 = 29; + + + /* ─────── Complex pairs (real + imaginary) ─────── */ + + /** two fp16 numbers: (real, imag) */ + public static final int CUDA_C_16F = 6; + /** two bfloat16 numbers */ + public static final int CUDA_C_16BF = 15; + /** two 32-bit floats */ + public static final int CUDA_C_32F = 4; + /** two 64-bit doubles */ + public static final int CUDA_C_64F = 5; + + /** two 4-bit signed integers */ + public static final int CUDA_C_4I = 17; + /** two 4-bit unsigned integers */ + public static final int CUDA_C_4U = 19; + /** two 8-bit signed integers */ + public static final int CUDA_C_8I = 7; + /** two 8-bit unsigned integers */ + public static final int CUDA_C_8U = 9; + /** two 16-bit signed integers */ + public static final int CUDA_C_16I = 21; + /** two 16-bit unsigned integers */ + public static final int CUDA_C_16U = 23; + /** two 32-bit signed integers */ + public static final int CUDA_C_32I = 11; + /** two 32-bit unsigned integers */ + public static final int CUDA_C_32U = 13; + /** two 64-bit signed integers */ + public static final int CUDA_C_64I = 25; + /** two 64-bit unsigned integers */ + public static final int CUDA_C_64U = 27; + + private CudaDataType() { /* utility class – no instantiation */ } + +} diff --git a/src/main/java/org/apache/sysds/cujava/CudaException.java b/src/main/java/org/apache/sysds/cujava/CudaException.java new file mode 100644 index 00000000000..bcc901af3e9 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/CudaException.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava; + +public class CudaException extends RuntimeException { + + private static final long serialVersionUID = 1587809813906124159L; + + public CudaException(String message) { + super(message); + } + + public CudaException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/src/main/java/org/apache/sysds/cujava/NativePointerObject.java b/src/main/java/org/apache/sysds/cujava/NativePointerObject.java new file mode 100644 index 00000000000..8b3c33cb3e6 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/NativePointerObject.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava; + +public abstract class NativePointerObject { + + private long nativePointer; + + protected NativePointerObject() { + nativePointer = 0; + } + + protected NativePointerObject(long nativePointer) { + this.nativePointer = nativePointer; + } + + protected NativePointerObject(NativePointerObject other) { + this.nativePointer = other.nativePointer; + } + + public long getNativePointer() { + return nativePointer; + } + +} diff --git a/src/main/java/org/apache/sysds/cujava/Pointer.java b/src/main/java/org/apache/sysds/cujava/Pointer.java new file mode 100644 index 00000000000..84d280db4b9 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/Pointer.java @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava; + +import java.nio.Buffer; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.ShortBuffer; +import java.nio.IntBuffer; +import java.nio.FloatBuffer; +import java.nio.LongBuffer; +import java.nio.DoubleBuffer; +import java.nio.ByteOrder; + +public class Pointer extends NativePointerObject { + + private long byteOffset; + private final Buffer buffer; + private final NativePointerObject[] pointers; + + public Pointer() { + buffer = null; + pointers = null; + byteOffset = 0; + } + + protected Pointer(long nativePointerValue) { + super(nativePointerValue); + buffer = null; + pointers = null; + byteOffset = 0; + } + + private Pointer(Buffer buffer) { + this.buffer = buffer; + pointers = null; + byteOffset = 0; + } + + private Pointer(NativePointerObject[] pointers) { + buffer = null; + this.pointers = pointers; + byteOffset = 0; + } + + protected Pointer(Pointer other) { + super(other.getNativePointer()); + this.buffer = other.buffer; + this.pointers = other.pointers; + this.byteOffset = other.byteOffset; + } + + protected Pointer(Pointer other, long byteOffset) { + this(other); + this.byteOffset += byteOffset; + } + + public static Pointer to(byte[] values) { + return new Pointer(ByteBuffer.wrap(values)); + } + + public static Pointer to(char[] values) { + return new Pointer(CharBuffer.wrap(values)); + } + + public static Pointer to(short[] values) { + return new Pointer(ShortBuffer.wrap(values)); + } + + public static Pointer to(int[] values) { + return new Pointer(IntBuffer.wrap(values)); + } + + public static Pointer to(float[] values) { + return new Pointer(FloatBuffer.wrap(values)); + } + + public static Pointer to(long[] values) { + return new Pointer(LongBuffer.wrap(values)); + } + + public static Pointer to(double[] values) { + return new Pointer(DoubleBuffer.wrap(values)); + } + + public static Pointer to(NativePointerObject... pointers) { + if(pointers == null) { + throw new IllegalArgumentException( + "The pointers argument is null – expected one or more NativePointerObject references."); + } + return new Pointer(pointers); + } + + public Pointer withByteOffset(long byteOffset) { + return new Pointer(this, byteOffset); + } + + public long getByteOffset() { + return byteOffset; + } + + public long address() { // nativePointer + byteOffset + return getNativePointer() + getByteOffset(); + } + + public ByteBuffer getByteBuffer(long byteOffset, long byteSize) { + if(buffer == null) { + return null; + } + if(!(buffer instanceof ByteBuffer internalByteBuffer)) { + return null; + } + ByteBuffer byteBuffer = internalByteBuffer.slice(); + byteBuffer.limit(Math.toIntExact(byteOffset + byteSize)); + byteBuffer.position(Math.toIntExact(byteOffset)); + return byteBuffer.slice().order(ByteOrder.nativeOrder()); + } + + public static Pointer to(Buffer buffer) { + if(buffer == null || (!buffer.isDirect() && !buffer.hasArray())) { + throw new IllegalArgumentException( + "Invalid buffer: argument is null or neither direct nor backed by an array; " + + "expected a non-null direct buffer or one with an accessible backing array."); + } + return new Pointer(buffer); + } + +} diff --git a/src/main/java/org/apache/sysds/cujava/Sizeof.java b/src/main/java/org/apache/sysds/cujava/Sizeof.java new file mode 100644 index 00000000000..b53a1fa6e81 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/Sizeof.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava; + +public class Sizeof { + + /** + * CUDA expects sizes in bytes. The JDK provides sizes in bits. + * Hence, we divide the sizes provided by the JDK by 8 to obtain bytes. + */ + + public static final int BYTE = Byte.SIZE / 8; + + public static final int CHAR = Character.SIZE / 8; + + public static final int SHORT = Short.SIZE / 8; + + public static final int INT = Integer.SIZE / 8; + + public static final int FLOAT = Float.SIZE / 8; + + public static final int LONG = Long.SIZE / 8; + + public static final int DOUBLE = Double.SIZE / 8; + + // Keep constructor private to prevent instantiation + private Sizeof() { + } + +} diff --git a/src/main/java/org/apache/sysds/cujava/cublas/CuJavaCublas.java b/src/main/java/org/apache/sysds/cujava/cublas/CuJavaCublas.java new file mode 100644 index 00000000000..0689e64b91f --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cublas/CuJavaCublas.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cublas; + +import org.apache.sysds.cujava.CuJavaLibLoader; +import org.apache.sysds.cujava.CudaException; +import org.apache.sysds.cujava.Pointer; + +/** + * The methods declared in this class refer to cublas v2. cublas v1 is deprecated in CUDA 12 and SystemDS does not + * utilize v1 methods anymore. + */ + +public class CuJavaCublas { + + private static boolean exceptionsEnabled = false; + + private static final String LIB_BASE = "cujava_cublas"; + + private CuJavaCublas() { + // prevent instantiation + } + + static { + CuJavaLibLoader.load(LIB_BASE); + } + + private static int checkCublasStatus(int result) { + if(exceptionsEnabled && result != cublasStatus.CUBLAS_STATUS_SUCCESS) { + throw new CudaException(cublasStatus.statusString(result)); + } + return result; + } + + public static void setExceptionsEnabled(boolean enabled) { + exceptionsEnabled = enabled; + } + + public static int cublasCreate(cublasHandle handle) { + return checkCublasStatus(cublasCreateNative(handle)); + } + + private static native int cublasCreateNative(cublasHandle handle); + + public static int cublasDestroy(cublasHandle handle) { + return checkCublasStatus(cublasDestroyNative(handle)); + } + + private static native int cublasDestroyNative(cublasHandle handle); + + public static int cublasDgeam(cublasHandle handle, int transa, int transb, int m, int n, Pointer alpha, Pointer A, + int lda, Pointer beta, Pointer B, int ldb, Pointer C, int ldc) { + return checkCublasStatus(cublasDgeamNative(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc)); + } + + private static native int cublasDgeamNative(cublasHandle handle, int transa, int transb, int m, int n, + Pointer alpha, Pointer A, int lda, Pointer beta, Pointer B, int ldb, Pointer C, int ldc); + + public static int cublasDdot(cublasHandle handle, int n, Pointer x, int incx, Pointer y, int incy, Pointer result) { + return checkCublasStatus(cublasDdotNative(handle, n, x, incx, y, incy, result)); + } + + private static native int cublasDdotNative(cublasHandle handle, int n, Pointer x, int incx, Pointer y, int incy, + Pointer result); + + public static int cublasDgemv(cublasHandle handle, int trans, int m, int n, Pointer alpha, Pointer A, int lda, + Pointer x, int incx, Pointer beta, Pointer y, int incy) { + return checkCublasStatus(cublasDgemvNative(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy)); + } + + private static native int cublasDgemvNative(cublasHandle handle, int trans, int m, int n, Pointer alpha, Pointer A, + int lda, Pointer x, int incx, Pointer beta, Pointer y, int incy); + + public static int cublasDgemm(cublasHandle handle, int transa, int transb, int m, int n, int k, Pointer alpha, + Pointer A, int lda, Pointer B, int ldb, Pointer beta, Pointer C, int ldc) { + return checkCublasStatus( + cublasDgemmNative(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc)); + } + + private static native int cublasDgemmNative(cublasHandle handle, int transa, int transb, int m, int n, int k, + Pointer alpha, Pointer A, int lda, Pointer B, int ldb, Pointer beta, Pointer C, int ldc); + + public static int cublasDsyrk(cublasHandle handle, int uplo, int trans, int n, int k, Pointer alpha, Pointer A, + int lda, Pointer beta, Pointer C, int ldc) { + return checkCublasStatus(cublasDsyrkNative(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc)); + } + + private static native int cublasDsyrkNative(cublasHandle handle, int uplo, int trans, int n, int k, Pointer alpha, + Pointer A, int lda, Pointer beta, Pointer C, int ldc); + + public static int cublasDaxpy(cublasHandle handle, int n, Pointer alpha, Pointer x, int incx, Pointer y, int incy) { + return checkCublasStatus(cublasDaxpyNative(handle, n, alpha, x, incx, y, incy)); + } + + private static native int cublasDaxpyNative(cublasHandle handle, int n, Pointer alpha, Pointer x, int incx, + Pointer y, int incy); + + public static int cublasDtrsm(cublasHandle handle, int side, int uplo, int trans, int diag, int m, int n, + Pointer alpha, Pointer A, int lda, Pointer B, int ldb) { + return checkCublasStatus(cublasDtrsmNative(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb)); + } + + private static native int cublasDtrsmNative(cublasHandle handle, int side, int uplo, int trans, int diag, int m, + int n, Pointer alpha, Pointer A, int lda, Pointer B, int ldb); +} diff --git a/src/main/java/org/apache/sysds/cujava/cublas/cublasDiagType.java b/src/main/java/org/apache/sysds/cujava/cublas/cublasDiagType.java new file mode 100644 index 00000000000..8c0972e2724 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cublas/cublasDiagType.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cublas; + +public class cublasDiagType { + + public static final int CUBLAS_DIAG_NON_UNIT = 0; + + public static final int CUBLAS_DIAG_UNIT = 1; + + private cublasDiagType(){ + // prevent instantiation + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cublas/cublasFillMode.java b/src/main/java/org/apache/sysds/cujava/cublas/cublasFillMode.java new file mode 100644 index 00000000000..d9e0720fcee --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cublas/cublasFillMode.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cublas; + +public class cublasFillMode { + + public static final int CUBLAS_FILL_MODE_LOWER = 0; + + public static final int CUBLAS_FILL_MODE_UPPER = 1; + + + public static final int CUBLAS_FILL_MODE_FULL = 2; + + private cublasFillMode(){ + // prevent instantiation + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cublas/cublasHandle.java b/src/main/java/org/apache/sysds/cujava/cublas/cublasHandle.java new file mode 100644 index 00000000000..a02dfed3295 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cublas/cublasHandle.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cublas; + +import org.apache.sysds.cujava.NativePointerObject; + +public class cublasHandle extends NativePointerObject { + + public cublasHandle() { + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cublas/cublasOperation.java b/src/main/java/org/apache/sysds/cujava/cublas/cublasOperation.java new file mode 100644 index 00000000000..eadf75d756b --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cublas/cublasOperation.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cublas; + +public class cublasOperation { + + public static final int CUBLAS_OP_N = 0; + + public static final int CUBLAS_OP_T = 1; + + public static final int CUBLAS_OP_C = 2; + + public static final int CUBLAS_OP_HERMITAN = 2; + + public static final int CUBLAS_OP_CONJG = 3; + + private cublasOperation(){ + // prevent instantiation + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cublas/cublasPointerMode.java b/src/main/java/org/apache/sysds/cujava/cublas/cublasPointerMode.java new file mode 100644 index 00000000000..9b274e28cba --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cublas/cublasPointerMode.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cublas; + +public class cublasPointerMode { + + public static final int CUBLAS_POINTER_MODE_HOST = 0; + + public static final int CUBLAS_POINTER_MODE_DEVICE = 1; + + private cublasPointerMode() { + // prevent instantiation + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cublas/cublasSideMode.java b/src/main/java/org/apache/sysds/cujava/cublas/cublasSideMode.java new file mode 100644 index 00000000000..fe474d55065 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cublas/cublasSideMode.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cublas; + +public class cublasSideMode { + + public static final int CUBLAS_SIDE_LEFT = 0; + + public static final int CUBLAS_SIDE_RIGHT = 1; + + private cublasSideMode() { + // prevent instantiation + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cublas/cublasStatus.java b/src/main/java/org/apache/sysds/cujava/cublas/cublasStatus.java new file mode 100644 index 00000000000..b0ba370a730 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cublas/cublasStatus.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cublas; + +public class cublasStatus { + + public static final int CUBLAS_STATUS_SUCCESS = 0; + + public static final int CUBLAS_STATUS_NOT_INITIALIZED = 1; + + public static final int CUBLAS_STATUS_ALLOC_FAILED = 3; + + public static final int CUBLAS_STATUS_INVALID_VALUE = 7; + + public static final int CUBLAS_STATUS_ARCH_MISMATCH = 8; + + public static final int CUBLAS_STATUS_MAPPING_ERROR = 11; + + public static final int CUBLAS_STATUS_EXECUTION_FAILED = 13; + + public static final int CUBLAS_STATUS_INTERNAL_ERROR = 14; + + public static final int CUBLAS_STATUS_NOT_SUPPORTED = 15; + + private cublasStatus() { + } + + public static String statusString(int err) { + return switch(err) { + case CUBLAS_STATUS_SUCCESS -> "CUBLAS_STATUS_SUCCESS"; + case CUBLAS_STATUS_NOT_INITIALIZED -> "CUBLAS_STATUS_NOT_INITIALIZED"; + case CUBLAS_STATUS_ALLOC_FAILED -> "CUBLAS_STATUS_ALLOC_FAILED"; + case CUBLAS_STATUS_INVALID_VALUE -> "CUBLAS_STATUS_INVALID_VALUE"; + case CUBLAS_STATUS_ARCH_MISMATCH -> "CUBLAS_STATUS_ARCH_MISMATCH"; + case CUBLAS_STATUS_MAPPING_ERROR -> "CUBLAS_STATUS_MAPPING_ERROR"; + case CUBLAS_STATUS_EXECUTION_FAILED -> "CUBLAS_STATUS_EXECUTION_FAILED"; + case CUBLAS_STATUS_INTERNAL_ERROR -> "CUBLAS_STATUS_INTERNAL_ERROR"; + case CUBLAS_STATUS_NOT_SUPPORTED -> "CUBLAS_STATUS_NOT_SUPPORTED"; + default -> "Invalid error"; + }; + } + +} diff --git a/src/main/java/org/apache/sysds/cujava/cudnn/CuJavaCudnn.java b/src/main/java/org/apache/sysds/cujava/cudnn/CuJavaCudnn.java new file mode 100644 index 00000000000..a38cd518c13 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cudnn/CuJavaCudnn.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cudnn; + +public class CuJavaCudnn { + + private CuJavaCudnn(){ + // prevent instantiation + } + + // TODO: Implement java wrapper for cuDNN +} diff --git a/src/main/java/org/apache/sysds/cujava/cusolver/CuJavaCusolver.java b/src/main/java/org/apache/sysds/cujava/cusolver/CuJavaCusolver.java new file mode 100644 index 00000000000..20878688607 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusolver/CuJavaCusolver.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusolver; + +public class CuJavaCusolver { + + private CuJavaCusolver() { + // prevent instantiation + } + + // TODO: Implement java wrapper for cuSOLVER +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/CuJavaCusparse.java b/src/main/java/org/apache/sysds/cujava/cusparse/CuJavaCusparse.java new file mode 100644 index 00000000000..27965a7938e --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/CuJavaCusparse.java @@ -0,0 +1,373 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +import org.apache.sysds.cujava.CuJavaLibLoader; +import org.apache.sysds.cujava.CudaException; +import org.apache.sysds.cujava.Pointer; + +public class CuJavaCusparse { + + private static boolean exceptionsEnabled = false; + + private static final String LIB_BASE = "cujava_cusparse"; + + private CuJavaCusparse() { + + } + + static { + CuJavaLibLoader.load(LIB_BASE); + } + + private static int checkCusparseStatus(int result) { + if(exceptionsEnabled && result != cusparseStatus.CUSPARSE_STATUS_SUCCESS) { + throw new CudaException(cusparseStatus.statusString(result)); + } + return result; + } + + public static void setExceptionsEnabled(boolean enabled) { + exceptionsEnabled = enabled; + } + + public static int cusparseSpGEMM_copy(cusparseHandle handle, int opA, int opB, Pointer alpha, + cusparseConstSpMatDescr matA, cusparseConstSpMatDescr matB, Pointer beta, cusparseSpMatDescr matC, + int computeType, int alg, cusparseSpGEMMDescr spgemmDescr) { + return checkCusparseStatus( + cusparseSpGEMM_copyNative(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, alg, spgemmDescr)); + } + + private static native int cusparseSpGEMM_copyNative(cusparseHandle handle, int opA, int opB, Pointer alpha, + cusparseConstSpMatDescr matA, cusparseConstSpMatDescr matB, Pointer beta, cusparseSpMatDescr matC, + int computeType, int alg, cusparseSpGEMMDescr spgemmDescr); + + public static int cusparseGetMatIndexBase(cusparseMatDescr descrA) { + return checkCusparseStatus(cusparseGetMatIndexBaseNative(descrA)); + } + + private static native int cusparseGetMatIndexBaseNative(cusparseMatDescr descrA); + + public static int cusparseCreateCsr(cusparseSpMatDescr spMatDescr, long rows, long cols, long nnz, + Pointer csrRowOffsets, Pointer csrColInd, Pointer csrValues, int csrRowOffsetsType, int csrColIndType, + int idxBase, int valueType) { + return checkCusparseStatus( + cusparseCreateCsrNative(spMatDescr, rows, cols, nnz, csrRowOffsets, csrColInd, csrValues, csrRowOffsetsType, + csrColIndType, idxBase, valueType)); + } + + private static native int cusparseCreateCsrNative(cusparseSpMatDescr spMatDescr, long rows, long cols, long nnz, + Pointer csrRowOffsets, Pointer csrColInd, Pointer csrValues, int csrRowOffsetsType, int csrColIndType, + int idxBase, int valueType); + + public static int cusparseCreateDnVec(cusparseDnVecDescr dnVecDescr, long size, Pointer values, int valueType) { + return checkCusparseStatus(cusparseCreateDnVecNative(dnVecDescr, size, values, valueType)); + } + + private static native int cusparseCreateDnVecNative(cusparseDnVecDescr dnVecDescr, long size, Pointer values, + int valueType); + + public static int cusparseSpMV_bufferSize(cusparseHandle handle, int opA, Pointer alpha, + cusparseConstSpMatDescr matA, cusparseConstDnVecDescr vecX, Pointer beta, cusparseDnVecDescr vecY, + int computeType, int alg, long[] bufferSize) { + return checkCusparseStatus( + cusparseSpMV_bufferSizeNative(handle, opA, alpha, matA, vecX, beta, vecY, computeType, alg, bufferSize)); + } + + private static native int cusparseSpMV_bufferSizeNative(cusparseHandle handle, int opA, Pointer alpha, + cusparseConstSpMatDescr matA, cusparseConstDnVecDescr vecX, Pointer beta, cusparseDnVecDescr vecY, + int computeType, int alg, long[] bufferSize); + + public static int cusparseSpMV(cusparseHandle handle, int opA, Pointer alpha, cusparseConstSpMatDescr matA, + cusparseConstDnVecDescr vecX, Pointer beta, cusparseDnVecDescr vecY, int computeType, int alg, + Pointer externalBuffer) { + return checkCusparseStatus( + cusparseSpMVNative(handle, opA, alpha, matA, vecX, beta, vecY, computeType, alg, externalBuffer)); + } + + private static native int cusparseSpMVNative(cusparseHandle handle, int opA, Pointer alpha, + cusparseConstSpMatDescr matA, cusparseConstDnVecDescr vecX, Pointer beta, cusparseDnVecDescr vecY, + int computeType, int alg, Pointer externalBuffer); + + public static int cusparseDestroy(cusparseHandle handle) { + return checkCusparseStatus(cusparseDestroyNative(handle)); + } + + private static native int cusparseDestroyNative(cusparseHandle handle); + + public static int cusparseDestroyDnVec(cusparseConstDnVecDescr dnVecDescr) { + return checkCusparseStatus(cusparseDestroyDnVecNative(dnVecDescr)); + } + + private static native int cusparseDestroyDnVecNative(cusparseConstDnVecDescr dnVecDescr); + + public static int cusparseDestroyDnMat(cusparseConstDnMatDescr dnMatDescr) { + return checkCusparseStatus(cusparseDestroyDnMatNative(dnMatDescr)); + } + + private static native int cusparseDestroyDnMatNative(cusparseConstDnMatDescr dnMatDescr); + + public static int cusparseDestroySpMat(cusparseConstSpMatDescr spMatDescr) { + return checkCusparseStatus(cusparseDestroySpMatNative(spMatDescr)); + } + + private static native int cusparseDestroySpMatNative(cusparseConstSpMatDescr spMatDescr); + + public static int cusparseSpMM(cusparseHandle handle, int opA, int opB, Pointer alpha, cusparseConstSpMatDescr matA, + cusparseConstDnMatDescr matB, Pointer beta, cusparseDnMatDescr matC, int computeType, int alg, + Pointer externalBuffer) { + return checkCusparseStatus( + cusparseSpMMNative(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, alg, externalBuffer)); + } + + private static native int cusparseSpMMNative(cusparseHandle handle, int opA, int opB, Pointer alpha, + cusparseConstSpMatDescr matA, cusparseConstDnMatDescr matB, Pointer beta, cusparseDnMatDescr matC, + int computeType, int alg, Pointer externalBuffer); + + public static int cusparseSpMM_bufferSize(cusparseHandle handle, int opA, int opB, Pointer alpha, + cusparseConstSpMatDescr matA, cusparseConstDnMatDescr matB, Pointer beta, cusparseDnMatDescr matC, + int computeType, int alg, long[] bufferSize) { + return checkCusparseStatus( + cusparseSpMM_bufferSizeNative(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, alg, + bufferSize)); + } + + private static native int cusparseSpMM_bufferSizeNative(cusparseHandle handle, int opA, int opB, Pointer alpha, + cusparseConstSpMatDescr matA, cusparseConstDnMatDescr matB, Pointer beta, cusparseDnMatDescr matC, + int computeType, int alg, long[] bufferSize); + + public static int cusparseCreateDnMat(cusparseDnMatDescr dnMatDescr, long rows, long cols, long ld, Pointer values, + int valueType, int order) { + return checkCusparseStatus(cusparseCreateDnMatNative(dnMatDescr, rows, cols, ld, values, valueType, order)); + } + + private static native int cusparseCreateDnMatNative(cusparseDnMatDescr dnMatDescr, long rows, long cols, long ld, + Pointer values, int valueType, int order); + + public static int cusparseCsrSetPointers(cusparseSpMatDescr spMatDescr, Pointer csrRowOffsets, Pointer csrColInd, + Pointer csrValues) { + return checkCusparseStatus(cusparseCsrSetPointersNative(spMatDescr, csrRowOffsets, csrColInd, csrValues)); + } + + private static native int cusparseCsrSetPointersNative(cusparseSpMatDescr spMatDescr, Pointer csrRowOffsets, + Pointer csrColInd, Pointer csrValues); + + public static int cusparseCsr2cscEx2(cusparseHandle handle, int m, int n, int nnz, Pointer csrVal, + Pointer csrRowPtr, Pointer csrColInd, Pointer cscVal, Pointer cscColPtr, Pointer cscRowInd, int valType, + int copyValues, int idxBase, int alg, Pointer buffer) { + return checkCusparseStatus( + cusparseCsr2cscEx2Native(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, cscVal, cscColPtr, cscRowInd, + valType, copyValues, idxBase, alg, buffer)); + } + + private static native int cusparseCsr2cscEx2Native(cusparseHandle handle, int m, int n, int nnz, Pointer csrVal, + Pointer csrRowPtr, Pointer csrColInd, Pointer cscVal, Pointer cscColPtr, Pointer cscRowInd, int valType, + int copyValues, int idxBase, int alg, Pointer buffer); + + public static int cusparseCsr2cscEx2_bufferSize(cusparseHandle handle, int m, int n, int nnz, Pointer csrVal, + Pointer csrRowPtr, Pointer csrColInd, Pointer cscVal, Pointer cscColPtr, Pointer cscRowInd, int valType, + int copyValues, int idxBase, int alg, long[] bufferSize) { + return checkCusparseStatus( + cusparseCsr2cscEx2_bufferSizeNative(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, cscVal, cscColPtr, + cscRowInd, valType, copyValues, idxBase, alg, bufferSize)); + } + + private static native int cusparseCsr2cscEx2_bufferSizeNative(cusparseHandle handle, int m, int n, int nnz, + Pointer csrVal, Pointer csrRowPtr, Pointer csrColInd, Pointer cscVal, Pointer cscColPtr, Pointer cscRowInd, + int valType, int copyValues, int idxBase, int alg, long[] bufferSize); + + public static int cusparseDcsrgeam2(cusparseHandle handle, int m, int n, Pointer alpha, cusparseMatDescr descrA, + int nnzA, Pointer csrSortedValA, Pointer csrSortedRowPtrA, Pointer csrSortedColIndA, Pointer beta, + cusparseMatDescr descrB, int nnzB, Pointer csrSortedValB, Pointer csrSortedRowPtrB, Pointer csrSortedColIndB, + cusparseMatDescr descrC, Pointer csrSortedValC, Pointer csrSortedRowPtrC, Pointer csrSortedColIndC, + Pointer pBuffer) { + return checkCusparseStatus( + cusparseDcsrgeam2Native(handle, m, n, alpha, descrA, nnzA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, beta, descrB, nnzB, csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, + csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer)); + } + + private static native int cusparseDcsrgeam2Native(cusparseHandle handle, int m, int n, Pointer alpha, + cusparseMatDescr descrA, int nnzA, Pointer csrSortedValA, Pointer csrSortedRowPtrA, Pointer csrSortedColIndA, + Pointer beta, cusparseMatDescr descrB, int nnzB, Pointer csrSortedValB, Pointer csrSortedRowPtrB, + Pointer csrSortedColIndB, cusparseMatDescr descrC, Pointer csrSortedValC, Pointer csrSortedRowPtrC, + Pointer csrSortedColIndC, Pointer pBuffer); + + public static int cusparseDcsrgeam2_bufferSizeExt(cusparseHandle handle, int m, int n, Pointer alpha, + cusparseMatDescr descrA, int nnzA, Pointer csrSortedValA, Pointer csrSortedRowPtrA, Pointer csrSortedColIndA, + Pointer beta, cusparseMatDescr descrB, int nnzB, Pointer csrSortedValB, Pointer csrSortedRowPtrB, + Pointer csrSortedColIndB, cusparseMatDescr descrC, Pointer csrSortedValC, Pointer csrSortedRowPtrC, + Pointer csrSortedColIndC, long[] pBufferSizeInBytes) { + return checkCusparseStatus( + cusparseDcsrgeam2_bufferSizeExtNative(handle, m, n, alpha, descrA, nnzA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, beta, descrB, nnzB, csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, + csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes)); + } + + private static native int cusparseDcsrgeam2_bufferSizeExtNative(cusparseHandle handle, int m, int n, Pointer alpha, + cusparseMatDescr descrA, int nnzA, Pointer csrSortedValA, Pointer csrSortedRowPtrA, Pointer csrSortedColIndA, + Pointer beta, cusparseMatDescr descrB, int nnzB, Pointer csrSortedValB, Pointer csrSortedRowPtrB, + Pointer csrSortedColIndB, cusparseMatDescr descrC, Pointer csrSortedValC, Pointer csrSortedRowPtrC, + Pointer csrSortedColIndC, long[] pBufferSizeInBytes); + + public static int cusparseSparseToDense(cusparseHandle handle, cusparseConstSpMatDescr matA, + cusparseDnMatDescr matB, int alg, Pointer externalBuffer) { + return checkCusparseStatus(cusparseSparseToDenseNative(handle, matA, matB, alg, externalBuffer)); + } + + private static native int cusparseSparseToDenseNative(cusparseHandle handle, cusparseConstSpMatDescr matA, + cusparseDnMatDescr matB, int alg, Pointer externalBuffer); + + public static int cusparseSparseToDense_bufferSize(cusparseHandle handle, cusparseConstSpMatDescr matA, + cusparseDnMatDescr matB, int alg, long[] bufferSize) { + return checkCusparseStatus(cusparseSparseToDense_bufferSizeNative(handle, matA, matB, alg, bufferSize)); + } + + private static native int cusparseSparseToDense_bufferSizeNative(cusparseHandle handle, + cusparseConstSpMatDescr matA, cusparseDnMatDescr matB, int alg, long[] bufferSize); + + public static int cusparseDenseToSparse_bufferSize(cusparseHandle handle, cusparseConstDnMatDescr matA, + cusparseSpMatDescr matB, int alg, long[] bufferSize) { + return checkCusparseStatus(cusparseDenseToSparse_bufferSizeNative(handle, matA, matB, alg, bufferSize)); + } + + private static native int cusparseDenseToSparse_bufferSizeNative(cusparseHandle handle, + cusparseConstDnMatDescr matA, cusparseSpMatDescr matB, int alg, long[] bufferSize); + + public static int cusparseDenseToSparse_analysis(cusparseHandle handle, cusparseConstDnMatDescr matA, + cusparseSpMatDescr matB, int alg, Pointer externalBuffer) { + return checkCusparseStatus(cusparseDenseToSparse_analysisNative(handle, matA, matB, alg, externalBuffer)); + } + + private static native int cusparseDenseToSparse_analysisNative(cusparseHandle handle, cusparseConstDnMatDescr matA, + cusparseSpMatDescr matB, int alg, Pointer externalBuffer); + + public static int cusparseDenseToSparse_convert(cusparseHandle handle, cusparseConstDnMatDescr matA, + cusparseSpMatDescr matB, int alg, Pointer externalBuffer) { + return checkCusparseStatus(cusparseDenseToSparse_convertNative(handle, matA, matB, alg, externalBuffer)); + } + + private static native int cusparseDenseToSparse_convertNative(cusparseHandle handle, cusparseConstDnMatDescr matA, + cusparseSpMatDescr matB, int alg, Pointer externalBuffer); + + public static int cusparseDnnz(cusparseHandle handle, int dirA, int m, int n, cusparseMatDescr descrA, Pointer A, + int lda, Pointer nnzPerRowCol, Pointer nnzTotalDevHostPtr) { + return checkCusparseStatus( + cusparseDnnzNative(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, nnzTotalDevHostPtr)); + } + + private static native int cusparseDnnzNative(cusparseHandle handle, int dirA, int m, int n, cusparseMatDescr descrA, + Pointer A, int lda, Pointer nnzPerRowCol, Pointer nnzTotalDevHostPtr); + + public static int cusparseSetMatType(cusparseMatDescr descrA, int type) { + return checkCusparseStatus(cusparseSetMatTypeNative(descrA, type)); + } + + private static native int cusparseSetMatTypeNative(cusparseMatDescr descrA, int type); + + public static int cusparseSetMatIndexBase(cusparseMatDescr descrA, int base) { + return checkCusparseStatus(cusparseSetMatIndexBaseNative(descrA, base)); + } + + private static native int cusparseSetMatIndexBaseNative(cusparseMatDescr descrA, int base); + + public static int cusparseSetPointerMode(cusparseHandle handle, int mode) { + return checkCusparseStatus(cusparseSetPointerModeNative(handle, mode)); + } + + private static native int cusparseSetPointerModeNative(cusparseHandle handle, int mode); + + public static int cusparseXcsrgeam2Nnz(cusparseHandle handle, int m, int n, cusparseMatDescr descrA, int nnzA, + Pointer csrSortedRowPtrA, Pointer csrSortedColIndA, cusparseMatDescr descrB, int nnzB, Pointer csrSortedRowPtrB, + Pointer csrSortedColIndB, cusparseMatDescr descrC, Pointer csrSortedRowPtrC, Pointer nnzTotalDevHostPtr, + Pointer workspace) { + return checkCusparseStatus( + cusparseXcsrgeam2NnzNative(handle, m, n, descrA, nnzA, csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, + csrSortedRowPtrB, csrSortedColIndB, descrC, csrSortedRowPtrC, nnzTotalDevHostPtr, workspace)); + } + + private static native int cusparseXcsrgeam2NnzNative(cusparseHandle handle, int m, int n, cusparseMatDescr descrA, + int nnzA, Pointer csrSortedRowPtrA, Pointer csrSortedColIndA, cusparseMatDescr descrB, int nnzB, + Pointer csrSortedRowPtrB, Pointer csrSortedColIndB, cusparseMatDescr descrC, Pointer csrSortedRowPtrC, + Pointer nnzTotalDevHostPtr, Pointer workspace); + + public static int cusparseSpGEMM_workEstimation(cusparseHandle handle, int opA, int opB, Pointer alpha, + cusparseConstSpMatDescr matA, cusparseConstSpMatDescr matB, Pointer beta, cusparseSpMatDescr matC, + int computeType, int alg, cusparseSpGEMMDescr spgemmDescr, long[] bufferSize1, Pointer externalBuffer1) { + return checkCusparseStatus( + cusparseSpGEMM_workEstimationNative(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, alg, + spgemmDescr, bufferSize1, externalBuffer1)); + } + + private static native int cusparseSpGEMM_workEstimationNative(cusparseHandle handle, int opA, int opB, + Pointer alpha, cusparseConstSpMatDescr matA, cusparseConstSpMatDescr matB, Pointer beta, + cusparseSpMatDescr matC, int computeType, int alg, cusparseSpGEMMDescr spgemmDescr, long[] bufferSize1, + Pointer externalBuffer1); + + public static int cusparseSpGEMM_compute(cusparseHandle handle, int opA, int opB, Pointer alpha, + cusparseConstSpMatDescr matA, cusparseConstSpMatDescr matB, Pointer beta, cusparseSpMatDescr matC, + int computeType, int alg, cusparseSpGEMMDescr spgemmDescr, long[] bufferSize2, Pointer externalBuffer2) { + return checkCusparseStatus( + cusparseSpGEMM_computeNative(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, alg, spgemmDescr, + bufferSize2, externalBuffer2)); + } + + private static native int cusparseSpGEMM_computeNative(cusparseHandle handle, int opA, int opB, Pointer alpha, + cusparseConstSpMatDescr matA, cusparseConstSpMatDescr matB, Pointer beta, cusparseSpMatDescr matC, + int computeType, int alg, cusparseSpGEMMDescr spgemmDescr, long[] bufferSize2, Pointer externalBuffer2); + + public static int cusparseSpMatGetSize(cusparseConstSpMatDescr spMatDescr, long[] rows, long[] cols, long[] nnz) { + return checkCusparseStatus(cusparseSpMatGetSizeNative(spMatDescr, rows, cols, nnz)); + } + + private static native int cusparseSpMatGetSizeNative(cusparseConstSpMatDescr spMatDescr, long[] rows, long[] cols, + long[] nnz); + + public static int cusparseXcsrsort(cusparseHandle handle, int m, int n, int nnz, cusparseMatDescr descrA, + Pointer csrRowPtrA, Pointer csrColIndA, Pointer P, Pointer pBuffer) { + return checkCusparseStatus( + cusparseXcsrsortNative(handle, m, n, nnz, descrA, csrRowPtrA, csrColIndA, P, pBuffer)); + } + + private static native int cusparseXcsrsortNative(cusparseHandle handle, int m, int n, int nnz, + cusparseMatDescr descrA, Pointer csrRowPtrA, Pointer csrColIndA, Pointer P, Pointer pBuffer); + + public static int cusparseXcsrsort_bufferSizeExt(cusparseHandle handle, int m, int n, int nnz, Pointer csrRowPtrA, + Pointer csrColIndA, long[] pBufferSizeInBytes) { + return checkCusparseStatus( + cusparseXcsrsort_bufferSizeExtNative(handle, m, n, nnz, csrRowPtrA, csrColIndA, pBufferSizeInBytes)); + } + + private static native int cusparseXcsrsort_bufferSizeExtNative(cusparseHandle handle, int m, int n, int nnz, + Pointer csrRowPtrA, Pointer csrColIndA, long[] pBufferSizeInBytes); + + public static int cusparseCreate(cusparseHandle handle) { + return checkCusparseStatus(cusparseCreateNative(handle)); + } + + private static native int cusparseCreateNative(cusparseHandle handle); + + public static int cusparseCreateIdentityPermutation(cusparseHandle handle, int n, Pointer p) { + return checkCusparseStatus(cusparseCreateIdentityPermutationNative(handle, n, p)); + } + + private static native int cusparseCreateIdentityPermutationNative(cusparseHandle handle, int n, Pointer p); + +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseAction.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseAction.java new file mode 100644 index 00000000000..4ac0d81b9b9 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseAction.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +public class cusparseAction { + + public static final int CUSPARSE_ACTION_SYMBOLIC = 0; + + public static final int CUSPARSE_ACTION_NUMERIC = 1; + + + private cusparseAction() { + // Private constructor to prevent instantiation + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseConstDnMatDescr.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseConstDnMatDescr.java new file mode 100644 index 00000000000..8759e915a01 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseConstDnMatDescr.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +import org.apache.sysds.cujava.NativePointerObject; + +public class cusparseConstDnMatDescr extends NativePointerObject { + + public cusparseConstDnMatDescr() { + // Default constructor + } + + cusparseConstDnMatDescr(cusparseDnMatDescr other) { + super(other); + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseConstDnVecDescr.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseConstDnVecDescr.java new file mode 100644 index 00000000000..a11667a8474 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseConstDnVecDescr.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +import org.apache.sysds.cujava.NativePointerObject; + +public class cusparseConstDnVecDescr extends NativePointerObject { + + public cusparseConstDnVecDescr() { + // Default constructor + } + + cusparseConstDnVecDescr(cusparseDnVecDescr other) { + super(other); + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseConstSpMatDescr.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseConstSpMatDescr.java new file mode 100644 index 00000000000..ce1309d1b8d --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseConstSpMatDescr.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +import org.apache.sysds.cujava.NativePointerObject; + +public class cusparseConstSpMatDescr extends NativePointerObject { + + public cusparseConstSpMatDescr() { + // Default constructor + } + + cusparseConstSpMatDescr(cusparseSpMatDescr other) { + super(other); + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseCsr2CscAlg.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseCsr2CscAlg.java new file mode 100644 index 00000000000..3eba805e54f --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseCsr2CscAlg.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +public class cusparseCsr2CscAlg { + + public static final int CUSPARSE_CSR2CSC_ALG_DEFAULT = 1; + public static final int CUSPARSE_CSR2CSC_ALG1 = 1; + + private cusparseCsr2CscAlg() { + // Private constructor to prevent instantiation + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDenseToSparseAlg.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDenseToSparseAlg.java new file mode 100644 index 00000000000..6324cee2601 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDenseToSparseAlg.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +public class cusparseDenseToSparseAlg { + + public static final int CUSPARSE_DENSETOSPARSE_ALG_DEFAULT = 0; + + private cusparseDenseToSparseAlg() { + // Private constructor to prevent instantiation + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDiagType.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDiagType.java new file mode 100644 index 00000000000..3d451bcb295 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDiagType.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +public class cusparseDiagType { + + public static final int CUSPARSE_DIAG_TYPE_NON_UNIT = 0; + + public static final int CUSPARSE_DIAG_TYPE_UNIT = 1; + + private cusparseDiagType() { + // Private constructor to prevent instantiation + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDirection.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDirection.java new file mode 100644 index 00000000000..443422c78cd --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDirection.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +public class cusparseDirection { + + public static final int CUSPARSE_DIRECTION_ROW = 0; + + public static final int CUSPARSE_DIRECTION_COLUMN = 1; + + private cusparseDirection() { + // Private constructor to prevent instantiation + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDnMatDescr.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDnMatDescr.java new file mode 100644 index 00000000000..dbcf1326c0c --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDnMatDescr.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +import org.apache.sysds.cujava.NativePointerObject; + +public class cusparseDnMatDescr extends NativePointerObject { + + public cusparseDnMatDescr() { + // Default constructor + } + + public cusparseConstDnMatDescr asConst() { + return new cusparseConstDnMatDescr(this); + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDnVecDescr.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDnVecDescr.java new file mode 100644 index 00000000000..4973ab04c49 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDnVecDescr.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +import org.apache.sysds.cujava.NativePointerObject; + +public class cusparseDnVecDescr extends NativePointerObject { + + public cusparseDnVecDescr() { + // Default constructor + } + + public cusparseConstDnVecDescr asConst() { + return new cusparseConstDnVecDescr(this); + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseFillMode.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseFillMode.java new file mode 100644 index 00000000000..45be77d6551 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseFillMode.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +public class cusparseFillMode { + + public static final int CUSPARSE_FILL_MODE_LOWER = 0; + + public static final int CUSPARSE_FILL_MODE_UPPER = 1; + + private cusparseFillMode() { + // Private constructor to prevent instantiation + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseHandle.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseHandle.java new file mode 100644 index 00000000000..7289333e194 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseHandle.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +import org.apache.sysds.cujava.NativePointerObject; + +public class cusparseHandle extends NativePointerObject { + + public cusparseHandle() { + // Default constructor + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseIndexBase.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseIndexBase.java new file mode 100644 index 00000000000..bb7622bf2eb --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseIndexBase.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +public class cusparseIndexBase { + + public static final int CUSPARSE_INDEX_BASE_ZERO = 0; + + public static final int CUSPARSE_INDEX_BASE_ONE = 1; + + private cusparseIndexBase() { + // Private constructor to prevent instantiation + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseIndexType.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseIndexType.java new file mode 100644 index 00000000000..59e5dbd6060 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseIndexType.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +public class cusparseIndexType { + + public static final int CUSPARSE_INDEX_16U = 1; + + public static final int CUSPARSE_INDEX_32I = 2; + + public static final int CUSPARSE_INDEX_64I = 3; + + private cusparseIndexType() { + // Private constructor to prevent instantiation + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseMatDescr.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseMatDescr.java new file mode 100644 index 00000000000..ef8dd2e90e7 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseMatDescr.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +import org.apache.sysds.cujava.NativePointerObject; + +public class cusparseMatDescr extends NativePointerObject { + + public cusparseMatDescr() { + // Default constructor + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseMatrixType.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseMatrixType.java new file mode 100644 index 00000000000..4f71f78be2a --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseMatrixType.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +public class cusparseMatrixType { + + public static final int CUSPARSE_MATRIX_TYPE_GENERAL = 0; + + public static final int CUSPARSE_MATRIX_TYPE_SYMMETRIC = 1; + + public static final int CUSPARSE_MATRIX_TYPE_HERMITIAN = 2; + + public static final int CUSPARSE_MATRIX_TYPE_TRIANGULAR = 3; + + private cusparseMatrixType() { + // Private constructor to prevent instantiation + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseOperation.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseOperation.java new file mode 100644 index 00000000000..c56167060b8 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseOperation.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +public class cusparseOperation { + + public static final int CUSPARSE_OPERATION_NON_TRANSPOSE = 0; + + public static final int CUSPARSE_OPERATION_TRANSPOSE = 1; + + public static final int CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE = 2; + + private cusparseOperation() { + // Private constructor to prevent instantiation + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseOrder.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseOrder.java new file mode 100644 index 00000000000..6efedd2cbad --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseOrder.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +public class cusparseOrder { + + public static final int CUSPARSE_ORDER_COL = 1; + + public static final int CUSPARSE_ORDER_ROW = 2; + + private cusparseOrder() { + // Private constructor to prevent instantiation + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparsePointerMode.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparsePointerMode.java new file mode 100644 index 00000000000..80f65a3937c --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparsePointerMode.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +public class cusparsePointerMode { + + public static final int CUSPARSE_POINTER_MODE_HOST = 0; + + public static final int CUSPARSE_POINTER_MODE_DEVICE = 1; + + private cusparsePointerMode() { + // Private constructor to prevent instantiation + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpGEMMAlg.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpGEMMAlg.java new file mode 100644 index 00000000000..3b874858892 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpGEMMAlg.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +public class cusparseSpGEMMAlg { + + public static final int CUSPARSE_SPGEMM_DEFAULT = 0; + + public static final int CUSPARSE_SPGEMM_CSR_ALG_DETERMINITIC = 1; + + public static final int CUSPARSE_SPGEMM_CSR_ALG_NONDETERMINITIC = 2; + + public static final int CUSPARSE_SPGEMM_ALG1 = 3; + + public static final int CUSPARSE_SPGEMM_ALG2 = 4; + + public static final int CUSPARSE_SPGEMM_ALG3 = 5; + + private cusparseSpGEMMAlg() { + // Private constructor to prevent instantiation + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpGEMMDescr.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpGEMMDescr.java new file mode 100644 index 00000000000..a3edd1e2ef3 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpGEMMDescr.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +import org.apache.sysds.cujava.NativePointerObject; + +public class cusparseSpGEMMDescr extends NativePointerObject { + + public cusparseSpGEMMDescr() { + // Default constructor + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpMMAlg.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpMMAlg.java new file mode 100644 index 00000000000..be51e280504 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpMMAlg.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +public class cusparseSpMMAlg { + + public static final int CUSPARSE_SPMM_ALG_DEFAULT = 0; + + public static final int CUSPARSE_SPMM_COO_ALG1 = 1; + + public static final int CUSPARSE_SPMM_COO_ALG2 = 2; + + public static final int CUSPARSE_SPMM_COO_ALG3 = 3; + + public static final int CUSPARSE_SPMM_COO_ALG4 = 5; + + public static final int CUSPARSE_SPMM_CSR_ALG1 = 4; + + public static final int CUSPARSE_SPMM_CSR_ALG2 = 6; + + public static final int CUSPARSE_SPMM_CSR_ALG3 = 12; + + public static final int CUSPARSE_SPMM_BLOCKED_ELL_ALG1 = 13; + + private cusparseSpMMAlg() { + // Private constructor to prevent instantiation + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpMVAlg.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpMVAlg.java new file mode 100644 index 00000000000..23b6896c7e3 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpMVAlg.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +public class cusparseSpMVAlg { + + public static final int CUSPARSE_SPMV_ALG_DEFAULT = 0; + + public static final int CUSPARSE_SPMV_CSR_ALG1 = 2; + + public static final int CUSPARSE_SPMV_CSR_ALG2 = 3; + + public static final int CUSPARSE_SPMV_COO_ALG1 = 1; + + public static final int CUSPARSE_SPMV_COO_ALG2 = 4; + + + private cusparseSpMVAlg() { + // Private constructor to prevent instantiation + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpMatDescr.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpMatDescr.java new file mode 100644 index 00000000000..2fbfb47042e --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpMatDescr.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +import org.apache.sysds.cujava.NativePointerObject; + +public class cusparseSpMatDescr extends NativePointerObject { + + public cusparseSpMatDescr() { + // Default constructor + } + + public cusparseConstSpMatDescr asConst() { + return new cusparseConstSpMatDescr(this); + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSparseToDenseAlg.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSparseToDenseAlg.java new file mode 100644 index 00000000000..39ef312a11a --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSparseToDenseAlg.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.cusparse; + +public class cusparseSparseToDenseAlg { + + public static final int CUSPARSE_SPARSETODENSE_ALG_DEFAULT = 0; + + private cusparseSparseToDenseAlg() { + // Private constructor to prevent instantiation + } +} diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseStatus.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseStatus.java new file mode 100644 index 00000000000..8387557c909 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseStatus.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/** + * Source for the numerical value: + * https://gitlab.com/nvidia/headers/cuda-individual/cusparse/-/blob/d4fd9303b8a5a770d11c2c60211e3f9e76410e51/cusparse.h + */ + +package org.apache.sysds.cujava.cusparse; + +public class cusparseStatus { + + public static final int CUSPARSE_STATUS_SUCCESS = 0; + + public static final int CUSPARSE_STATUS_NOT_INITIALIZED = 1; + + public static final int CUSPARSE_STATUS_ALLOC_FAILED = 2; + + public static final int CUSPARSE_STATUS_INVALID_VALUE = 3; + + public static final int CUSPARSE_STATUS_ARCH_MISMATCH = 4; + + public static final int CUSPARSE_STATUS_MAPPING_ERROR = 5; + + public static final int CUSPARSE_STATUS_EXECUTION_FAILED = 6; + + public static final int CUSPARSE_STATUS_INTERNAL_ERROR = 7; + + public static final int CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED = 8; + + public static final int CUSPARSE_STATUS_ZERO_PIVOT = 9; + + public static final int CUSPARSE_STATUS_NOT_SUPPORTED = 10; + + public static final int CUSPARSE_STATUS_INSUFFICIENT_RESOURCES = 11; + + public static String statusString(int err) { + return switch(err) { + case CUSPARSE_STATUS_SUCCESS -> "CUSPARSE_STATUS_SUCCESS"; + case CUSPARSE_STATUS_NOT_INITIALIZED -> "CUSPARSE_STATUS_NOT_INITIALIZED"; + case CUSPARSE_STATUS_ALLOC_FAILED -> "CUSPARSE_STATUS_ALLOC_FAILED"; + case CUSPARSE_STATUS_INVALID_VALUE -> "CUSPARSE_STATUS_INVALID_VALUE"; + case CUSPARSE_STATUS_ARCH_MISMATCH -> "CUSPARSE_STATUS_ARCH_MISMATCH"; + case CUSPARSE_STATUS_MAPPING_ERROR -> "CUSPARSE_STATUS_MAPPING_ERROR"; + case CUSPARSE_STATUS_EXECUTION_FAILED -> "CUSPARSE_STATUS_EXECUTION_FAILED"; + case CUSPARSE_STATUS_INTERNAL_ERROR -> "CUSPARSE_STATUS_INTERNAL_ERROR"; + case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED -> "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED"; + case CUSPARSE_STATUS_ZERO_PIVOT -> "CUSPARSE_STATUS_ZERO_PIVOT"; + case CUSPARSE_STATUS_NOT_SUPPORTED -> "CUSPARSE_STATUS_NOT_SUPPORTED"; + case CUSPARSE_STATUS_INSUFFICIENT_RESOURCES -> "CUSPARSE_STATUS_INSUFFICIENT_RESOURCES"; + default -> "Invalid error"; + }; + } + +} diff --git a/src/main/java/org/apache/sysds/cujava/driver/CUcontext.java b/src/main/java/org/apache/sysds/cujava/driver/CUcontext.java new file mode 100644 index 00000000000..188d1908056 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/driver/CUcontext.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.driver; + +import org.apache.sysds.cujava.NativePointerObject; + +public class CUcontext extends NativePointerObject { + + public CUcontext() { + } +} diff --git a/src/main/java/org/apache/sysds/cujava/driver/CUdevice.java b/src/main/java/org/apache/sysds/cujava/driver/CUdevice.java new file mode 100644 index 00000000000..86b027717ef --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/driver/CUdevice.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.driver; + +import org.apache.sysds.cujava.NativePointerObject; + +public class CUdevice extends NativePointerObject { + + public CUdevice() {} +} diff --git a/src/main/java/org/apache/sysds/cujava/driver/CUdevice_attribute.java b/src/main/java/org/apache/sysds/cujava/driver/CUdevice_attribute.java new file mode 100644 index 00000000000..d50e014ec21 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/driver/CUdevice_attribute.java @@ -0,0 +1,756 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.driver; + +/** + * This class is a java-side replication of CUdevice_attribute. + * The descriptions were directly taken from: + * https://docs.nvidia.com/cuda/archive/12.6.1/pdf/CUDA_Driver_API.pdf + */ + +public class CUdevice_attribute { + + /** + * Maximum number of threads per block + */ + public static final int CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1; + + /** + * Maximum block dimension X + */ + public static final int CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2; + + /** + * Maximum block dimension Y + */ + public static final int CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3; + + /** + * Maximum block dimension Z + */ + public static final int CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4; + + /** + * Maximum grid dimension X + */ + public static final int CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5; + + /** + * Maximum grid dimension Y + */ + public static final int CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6; + + /** + * Maximum grid dimension Z + */ + public static final int CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7; + + /** + * Maximum shared memory available per block in bytes + */ + public static final int CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8; + + /** + * @deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK + */ + public static final int CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8; + + /** + * Memory available on device for __constant__ variables in a CUDA C kernel in bytes + */ + public static final int CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9; + + /** + * Warp size in threads + */ + public static final int CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10; + + /** + * Maximum pitch in bytes allowed by memory copies + */ + public static final int CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11; + + /** + * Maximum number of 32-bit registers available per block + */ + public static final int CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12; + + /** + * @deprecated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK + */ + public static final int CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12; + + /** + * Typical clock frequency in kilohertz + */ + public static final int CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13; + + /** + * Alignment requirement for textures + */ + public static final int CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14; + + /** + * Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use instead + * CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT. + */ + public static final int CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15; + + /** + * Number of multiprocessors on device + */ + public static final int CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16; + + /** + * Specifies whether there is a run time limit on kernels + */ + public static final int CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17; + + /** + * Device is integrated with host memory + */ + public static final int CU_DEVICE_ATTRIBUTE_INTEGRATED = 18; + + /** + * Device can map host memory into CUDA address space + */ + public static final int CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19; + + /** + * Compute mode (See CUcomputemode for details) + */ + public static final int CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20; + + /** + * Maximum 1D texture width + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21; + + /** + * Maximum 2D texture width + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22; + + /** + * Maximum 2D texture height + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23; + + /** + * Maximum 3D texture width + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24; + + /** + * Maximum 3D texture height + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25; + + /** + * Maximum 3D texture depth + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26; + + /** + * Maximum 2D layered texture width + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27; + + /** + * Maximum 2D layered texture height + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28; + + /** + * Maximum layers in a 2D layered texture + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29; + + /** + * @deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27; + + /** + * @deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28; + + /** + * @deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29; + + /** + * Alignment requirement for surfaces + */ + public static final int CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30; + + /** + * Device can possibly execute multiple kernels concurrently + */ + public static final int CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31; + + /** + * Device has ECC support enabled + */ + public static final int CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32; + + /** + * PCI bus ID of the device + */ + public static final int CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33; + + /** + * PCI device ID of the device + */ + public static final int CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34; + + /** + * Device is using TCC driver model + */ + public static final int CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35; + + /** + * Peak memory clock frequency in kilohertz + */ + public static final int CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36; + + /** + * Global memory bus width in bits + */ + public static final int CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37; + + /** + * Size of L2 cache in bytes + */ + public static final int CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38; + + /** + * Maximum resident threads per multiprocessor + */ + public static final int CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39; + + /** + * Number of asynchronous engines + */ + public static final int CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40; + + /** + * Device shares a unified address space with the host + */ + public static final int CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41; + + /** + * Maximum 1D layered texture width + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42; + + /** + * Maximum layers in a 1D layered texture + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43; + + /** + * @deprecated, do not use. + */ + public static final int CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44; + + /** + * Maximum 2D texture width if CUDA_ARRAY3D_TEXTURE_GATHER is set + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45; + + /** + * Maximum 2D texture height if CUDA_ARRAY3D_TEXTURE_GATHER is set + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46; + + /** + * Alternate maximum 3D texture width + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47; + + /** + * Alternate maximum 3D texture height + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48; + + /** + * Alternate maximum 3D texture depth + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49; + + /** + * PCI domain ID of the device + */ + public static final int CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50; + + /** + * Pitch alignment requirement for textures + */ + public static final int CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51; + + /** + * Maximum cubemap texture width/height + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52; + + /** + * Maximum cubemap layered texture width/height + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53; + + /** + * Maximum layers in a cubemap layered texture + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54; + + /** + * Maximum 1D surface width + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55; + + /** + * Maximum 2D surface width + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56; + + /** + * Maximum 2D surface height + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57; + + /** + * Maximum 3D surface width + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58; + + /** + * Maximum 3D surface height + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59; + + /** + * Maximum 3D surface depth + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60; + + /** + * Maximum 1D layered surface width + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61; + + /** + * Maximum layers in a 1D layered surface + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62; + + /** + * Maximum 2D layered surface width + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63; + + /** + * Maximum 2D layered surface height + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64; + + /** + * Maximum layers in a 2D layered surface + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65; + + /** + * Maximum cubemap surface width + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66; + + /** + * Maximum cubemap layered surface width + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67; + + /** + * Maximum layers in a cubemap layered surface + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68; + + /** + * @deprecated, do not use. Use cudaDeviceGetTexture1DLinearMaxWidth() or + * cuDeviceGetTexture1DLinearMaxWidth() instead. + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69; + + /** + * Maximum 2D linear texture width + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70; + + /** + * Maximum 2D linear texture height + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71; + + /** + * Maximum 2D linear texture pitch in bytes + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72; + + /** + * Maximum mipmapped 2D texture width + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73; + + /** + * Maximum mipmapped 2D texture height + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74; + + /** + * Major compute capability version number + */ + public static final int CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75; + + /** + * Minor compute capability version number + */ + public static final int CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76; + + /** + * Maximum mipmapped 1D texture width + */ + public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77; + + /** + * Device supports stream priorities + */ + public static final int CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78; + + /** + * Device supports caching globals in L1 + */ + public static final int CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79; + + /** + * Device supports caching locals in L1 + */ + public static final int CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80; + + /** + * Maximum shared memory available per multiprocessor in bytes + */ + public static final int CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81; + + /** + * Maximum number of 32-bit registers available per multiprocessor + */ + public static final int CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82; + + /** + * Device can allocate managed memory on this system + */ + public static final int CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83; + + /** + * Device is on a multi-GPU board + */ + public static final int CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84; + + /** + * Unique id for a group of devices on the same multi-GPU board + */ + public static final int CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85; + + /** + * Link between the device and the host supports native atomic operations (this is a placeholder + * attribute, and is not supported on any current hardware) + */ + public static final int CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86; + + /** + * Ratio of single precision performance (in floating-point operations per second) to double precision + * performance + */ + public static final int CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87; + + /** + * Device supports coherently accessing pageable memory without calling cudaHostRegister on it + */ + public static final int CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88; + + /** + * Device can coherently access managed memory concurrently with the CPU + */ + public static final int CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89; + + /** + * Device supports compute preemption. + */ + public static final int CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90; + + /** + * Device can access host registered memory at the same virtual address as the CPU + */ + public static final int CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91; + + /** + * @deprecated, along with v1 MemOps API, cuStreamBatchMemOp and related APIs are supported. + */ + public static final int CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS_V1 = 92; + + /** + * @deprecated, along with v1 MemOps API, 64-bit operations are supported in cuStreamBatchMemOp + * and related APIs. + */ + public static final int CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS_V1 = 93; + + /** + * @deprecated, along with v1 MemOps API, CU_STREAM_WAIT_VALUE_NOR is supported. + */ + public static final int CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V1 = 94; + + /** + * Device supports launching cooperative kernels via cuLaunchCooperativeKernel + */ + public static final int CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95; + + /** + * @deprecated, cuLaunchCooperativeKernelMultiDevice is deprecated. + */ + public static final int CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96; + + /** + * Maximum optin shared memory per block + */ + public static final int CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97; + + /** + * The CU_STREAM_WAIT_VALUE_FLUSH flag and the + * CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are supported on the device. See + * Stream Memory Operations for additional details. + */ + public static final int CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = 98; + + /** + * Device supports host memory registration via cudaHostRegister. + */ + public static final int CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = 99; + + /** + * Device accesses pageable memory via the host's page tables. + */ + public static final int CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100; + + /** + * The host can directly access managed memory on the device without migration. + */ + public static final int CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101; + + /** + * @deprecated, Use + * CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED + */ + public static final int CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED = 102; + + /** + * Device supports virtual memory management APIs like cuMemAddressReserve, cuMemCreate, + * cuMemMap and related APIs + */ + public static final int CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED = 102; + + /** + * Device supports exporting memory to a posix file descriptor with + * cuMemExportToShareableHandle, if requested via cuMemCreate + */ + public static final int CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103; + + /** + * Device supports exporting memory to a Win32 NT handle with cuMemExportToShareableHandle, + * if requested via cuMemCreate + */ + public static final int CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104; + + /** + * Device supports exporting memory to a Win32 KMT handle with + * cuMemExportToShareableHandle, if requested via cuMemCreate + */ + public static final int CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105; + + /** + * Maximum number of blocks per multiprocessor + */ + public static final int CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = 106; + + /** + * Device supports compression of memory + */ + public static final int CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED = 107; + + /** + * Maximum L2 persisting lines capacity setting in bytes. + */ + public static final int CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE = 108; + + /** + * Maximum value of CUaccessPolicyWindow::num_bytes. + */ + public static final int CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE = 109; + + /** + * Device supports specifying the GPUDirect RDMA flag with cuMemCreate + */ + public static final int CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED = 110; + + /** + * Shared memory reserved by CUDA driver per block in bytes + */ + public static final int CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK = 111; + + /** + * Device supports sparse CUDA arrays and sparse CUDA mipmapped arrays + */ + public static final int CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED = 112; + + /** + * Device supports using the cuMemHostRegister flag CU_MEMHOSTERGISTER_READ_ONLY to + * register memory that must be mapped as read-only to the GPU + */ + public static final int CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED = 113; + + /** + * External timeline semaphore interop is supported on the device + */ + public static final int CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED = 114; + + /** + * Device supports using the cuMemAllocAsync and cuMemPool family of APIs + */ + public static final int CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED = 115; + + /** + * Device supports GPUDirect RDMA APIs, like nvidia_p2p_get_pages + * (see https://docs.nvidia.com/cuda/gpudirect-rdma for more information) + */ + public static final int CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED = 116; + + /** + * The returned attribute shall be interpreted as a bitmask, where the individual bits are described by + * the CUflushGPUDirectRDMAWritesOptions enum + */ + public static final int CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS = 117; + + /** + * GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope + * indicated by the returned attribute. See CUGPUDirectRDMAWritesOrdering for the numerical + * values returned here. + */ + public static final int CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING = 118; + + /** + * Handle types supported with mempool based IPC + */ + public static final int CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES = 119; + + /** + * Indicates device supports cluster launch + */ + public static final int CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH = 120; + + /** + * Device supports deferred mapping CUDA arrays and CUDA mipmapped arrays + */ + public static final int CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED = 121; + + /** + * 64-bit operations are supported in cuStreamBatchMemOp and related MemOp APIs. + */ + public static final int CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = 122; + + /** + * CU_STREAM_WAIT_VALUE_NOR is supported by MemOp APIs. + */ + public static final int CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 123; + + /** + * Device supports buffer sharing with dma_buf mechanism. + */ + public static final int CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED = 124; + + /** + * Device supports IPC Events. + */ + public static final int CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED = 125; + + /** + * Number of memory domains the device supports. + */ + public static final int CU_DEVICE_ATTRIBUTE_MEM_SYNC_DOMAIN_COUNT = 126; + + /** + * Device supports accessing memory using Tensor Map. + */ + public static final int CU_DEVICE_ATTRIBUTE_TENSOR_MAP_ACCESS_SUPPORTED = 127; + + /** + * Device supports exporting memory to a fabric handle with cuMemExportToShareableHandle() or + * requested with cuMemCreate() + */ + public static final int CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_FABRIC_SUPPORTED = 128; + + /** + * Device supports unified function pointers. + */ + public static final int CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS = 129; + + /** + * NUMA configuration of a device: value is of type CUdeviceNumaConfig enum + */ + public static final int CU_DEVICE_ATTRIBUTE_NUMA_CONFIG = 130; + + /** + * NUMA node ID of the GPU memory + */ + public static final int CU_DEVICE_ATTRIBUTE_NUMA_ID = 131; + + /** + * Device supports switch multicast and reduction operations. + */ + public static final int CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED = 132; + + /** + * Indicates if contexts created on this device will be shared via MPS + */ + public static final int CU_DEVICE_ATTRIBUTE_MPS_ENABLED = 133; + + /** + * NUMA ID of the host node closest to the device. Returns -1 when system does not support NUMA. + */ + public static final int CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID = 134; + + /** + * Device supports CIG with D3D12. + */ + public static final int CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED = 135; + + //CU_DEVICE_ATTRIBUTE_MAX + + +} diff --git a/src/main/java/org/apache/sysds/cujava/driver/CUdeviceptr.java b/src/main/java/org/apache/sysds/cujava/driver/CUdeviceptr.java new file mode 100644 index 00000000000..e6299b19e68 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/driver/CUdeviceptr.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.driver; + +import org.apache.sysds.cujava.Pointer; + +public class CUdeviceptr extends Pointer { + + public CUdeviceptr() { + } + + protected CUdeviceptr(CUdeviceptr other) { + super(other); + } + + protected CUdeviceptr(CUdeviceptr other, long byteOffset) { + super(other, byteOffset); + } + + @Override + public CUdeviceptr withByteOffset(long byteOffset) { + return new CUdeviceptr(this, byteOffset); + } +} diff --git a/src/main/java/org/apache/sysds/cujava/driver/CUfunction.java b/src/main/java/org/apache/sysds/cujava/driver/CUfunction.java new file mode 100644 index 00000000000..288b63ee097 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/driver/CUfunction.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.driver; + +import org.apache.sysds.cujava.NativePointerObject; + +public class CUfunction extends NativePointerObject { + + public CUfunction() { + } +} diff --git a/src/main/java/org/apache/sysds/cujava/driver/CUmodule.java b/src/main/java/org/apache/sysds/cujava/driver/CUmodule.java new file mode 100644 index 00000000000..a7481e2960d --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/driver/CUmodule.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.driver; + +import org.apache.sysds.cujava.NativePointerObject; + +public class CUmodule extends NativePointerObject { + + public CUmodule() { + } +} diff --git a/src/main/java/org/apache/sysds/cujava/driver/CUresult.java b/src/main/java/org/apache/sysds/cujava/driver/CUresult.java new file mode 100644 index 00000000000..2814b771e68 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/driver/CUresult.java @@ -0,0 +1,721 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.driver; + +/** + * The descriptions are directly taken from: + * https://docs.nvidia.com/cuda/archive/12.6.1/pdf/CUDA_Driver_API.pdf + */ + +public class CUresult { + + + /** + * The API call returned with no errors. In the case of query calls, this also means that the operation + * being queried is complete (see cuEventQuery() and cuStreamQuery()). + */ + public static final int CUDA_SUCCESS = 0; + + /** + * This indicates that one or more of the parameters passed to the API call is not within an acceptable + * range of values. + */ + public static final int CUDA_ERROR_INVALID_VALUE = 1; + + /** + * The API call failed because it was unable to allocate enough memory or other resources to perform + * the requested operation. + */ + public static final int CUDA_ERROR_OUT_OF_MEMORY = 2; + + /** + * This indicates that the CUDA driver has not been initialized with cuInit() or that initialization has + * failed. + */ + public static final int CUDA_ERROR_NOT_INITIALIZED = 3; + + /** + * This indicates that the CUDA driver is in the process of shutting down. + */ + public static final int CUDA_ERROR_DEINITIALIZED = 4; + + /** + * This indicates profiler is not initialized for this run. This can happen when the application is running + * with external profiling tools like visual profiler. + */ + public static final int CUDA_ERROR_PROFILER_DISABLED = 5; + + /** + * @deprecated + * This error return is deprecated as of CUDA 5.0. It is no longer an error to attempt to + * enable/disable the profiling via cuProfilerStart or cuProfilerStop without initialization. + */ + public static final int CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6; + + /** + * @deprecated + * This error return is deprecated as of CUDA 5.0. It is no longer an error to call + * cuProfilerStart() when profiling is already enabled. + */ + public static final int CUDA_ERROR_PROFILER_ALREADY_STARTED = 7; + + /** + * @deprecated + * This error return is deprecated as of CUDA 5.0. It is no longer an error to call + * cuProfilerStop() when profiling is already disabled. + */ + public static final int CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8; + + /** + * This indicates that the CUDA driver that the application has loaded is a stub library. Applications + * that run with the stub rather than a real driver loaded will result in CUDA API returning this error. + */ + public static final int CUDA_ERROR_STUB_LIBRARY = 34; + + /** + * This indicates that requested CUDA device is unavailable at the current time. Devices + * are often unavailable due to use of CU_COMPUTEMODE_EXCLUSIVE_PROCESS or + * CU_COMPUTEMODE_PROHIBITED. + */ + public static final int CUDA_ERROR_DEVICE_UNAVAILABLE = 46; + + /** + * This indicates that no CUDA-capable devices were detected by the installed CUDA driver. + */ + public static final int CUDA_ERROR_NO_DEVICE = 100; + + /** + * This indicates that the device ordinal supplied by the user does not correspond to a valid CUDA + * device or that the action requested is invalid for the specified device. + */ + public static final int CUDA_ERROR_INVALID_DEVICE = 101; + + /** + * This error indicates that the Grid license is not applied. + */ + public static final int CUDA_ERROR_DEVICE_NOT_LICENSED = 102; + + /** + * This indicates that the device kernel image is invalid. This can also indicate an invalid CUDA + * module. + */ + public static final int CUDA_ERROR_INVALID_IMAGE = 200; + + /** + * This most frequently indicates that there is no context bound to the current thread. This can also + * be returned if the context passed to an API call is not a valid handle (such as a context that has had + * cuCtxDestroy() invoked on it). This can also be returned if a user mixes different API versions + * (i.e. 3010 context with 3020 API calls). See cuCtxGetApiVersion() for more details. This can also + * be returned if the green context passed to an API call was not converted to a CUcontext using + * cuCtxFromGreenCtx API. + */ + public static final int CUDA_ERROR_INVALID_CONTEXT = 201; + + /** + * This indicated that the context being supplied as a parameter to the API call was already the active + * context. Deprecated This error return is deprecated as of CUDA 3.2. It is no longer an error to + * attempt to push the active context via cuCtxPushCurrent(). + */ + public static final int CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202; + + /** + * This indicates that a map or register operation has failed. + */ + public static final int CUDA_ERROR_MAP_FAILED = 205; + + /** + * This indicates that an unmap or unregister operation has failed. + */ + public static final int CUDA_ERROR_UNMAP_FAILED = 206; + + /** + * This indicates that the specified array is currently mapped and thus cannot be destroyed. + */ + public static final int CUDA_ERROR_ARRAY_IS_MAPPED = 207; + + /** + * This indicates that the resource is already mapped. + */ + public static final int CUDA_ERROR_ALREADY_MAPPED = 208; + + /** + * This indicates that there is no kernel image available that is suitable for the device. This can occur + * when a user specifies code generation options for a particular CUDA source file that do not include + * the corresponding device configuration. + */ + public static final int CUDA_ERROR_NO_BINARY_FOR_GPU = 209; + + /** + * This indicates that a resource has already been acquired. + */ + public static final int CUDA_ERROR_ALREADY_ACQUIRED = 210; + + /** + * This indicates that a resource is not mapped. + */ + public static final int CUDA_ERROR_NOT_MAPPED = 211; + + /** + * This indicates that a mapped resource is not available for access as an array. + */ + public static final int CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212; + + /** + * This indicates that a mapped resource is not available for access as a pointer. + */ + public static final int CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213; + + /** + * This indicates that an uncorrectable ECC error was detected during execution. + */ + public static final int CUDA_ERROR_ECC_UNCORRECTABLE = 214; + + /** + * This indicates that the CUlimit passed to the API call is not supported by the active device. + */ + public static final int CUDA_ERROR_UNSUPPORTED_LIMIT = 215; + + /** + * This indicates that the CUcontext passed to the API call can only be bound to a single CPU thread at + * a time but is already bound to a CPU thread. + */ + public static final int CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216; + + /** + * This indicates that peer access is not supported across the given devices. + */ + public static final int CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217; + + /** + * This indicates that a PTX JIT compilation failed. + */ + public static final int CUDA_ERROR_INVALID_PTX = 218; + + /** + * This indicates an error with OpenGL or DirectX context. + */ + public static final int CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219; + + /** + * This indicates that an uncorrectable NVLink error was detected during the execution. + */ + public static final int CUDA_ERROR_NVLINK_UNCORRECTABLE = 220; + + /** + * This indicates that the PTX JIT compiler library was not found. + */ + public static final int CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221; + + /** + * This indicates that the provided PTX was compiled with an unsupported toolchain. + */ + public static final int CUDA_ERROR_UNSUPPORTED_PTX_VERSION = 222; + + /** + * This indicates that the PTX JIT compilation was disabled. + */ + public static final int CUDA_ERROR_JIT_COMPILATION_DISABLED = 223; + + /** + * This indicates that the CUexecAffinityType passed to the API call is not supported by the active + * device. + */ + public static final int CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY = 224; + + /** + * This indicates that the code to be compiled by the PTX JIT contains unsupported call to + * cudaDeviceSynchronize. + */ + public static final int CUDA_ERROR_UNSUPPORTED_DEVSIDE_SYNC = 225; + + /** + * This indicates that the device kernel source is invalid. This includes compilation/linker errors + * encountered in device code or user error. + */ + public static final int CUDA_ERROR_INVALID_SOURCE = 300; + + /** + * This indicates that the file specified was not found. + */ + public static final int CUDA_ERROR_FILE_NOT_FOUND = 301; + + /** + * This indicates that a link to a shared object failed to resolve. + */ + public static final int CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302; + + /** + * This indicates that initialization of a shared object failed. + */ + public static final int CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303; + + /** + * This indicates that an OS call failed. + */ + public static final int CUDA_ERROR_OPERATING_SYSTEM = 304; + + /** + * This indicates that a resource handle passed to the API call was not valid. Resource handles are + * opaque types like CUstream and CUevent. + */ + public static final int CUDA_ERROR_INVALID_HANDLE = 400; + + /** + * This indicates that a resource required by the API call is not in a valid state to perform the requested + * operation. + */ + public static final int CUDA_ERROR_ILLEGAL_STATE = 401; + + /** + * This indicates an attempt was made to introspect an object in a way that would discard semantically + * important information. This is either due to the object using funtionality newer than the API version + * used to introspect it or omission of optional return arguments. + */ + public static final int CUDA_ERROR_LOSSY_QUERY = 402; + + /** + * This indicates that a named symbol was not found. Examples of symbols are global/constant + * variable names, driver function names, texture names, and surface names. + */ + public static final int CUDA_ERROR_NOT_FOUND = 500; + + /** + * This indicates that asynchronous operations issued previously have not completed yet. This result + * is not actually an error, but must be indicated differently than CUDA_SUCCESS (which indicates + * completion). Calls that may return this value include cuEventQuery() and cuStreamQuery(). + */ + public static final int CUDA_ERROR_NOT_READY = 600; + + /** + * While executing a kernel, the device encountered a load or store instruction on an invalid memory + * address. This leaves the process in an inconsistent state and any further CUDA work will return the + * same error. To continue using CUDA, the process must be terminated and relaunched. + */ + public static final int CUDA_ERROR_ILLEGAL_ADDRESS = 700; + + /** + * This indicates that a launch did not occur because it did not have appropriate resources. This error + * usually indicates that the user has attempted to pass too many arguments to the device kernel, or + * the kernel launch specifies too many threads for the kernel's register count. Passing arguments of + * the wrong size (i.e. a 64-bit pointer when a 32-bit int is expected) is equivalent to passing too many + * arguments and can also result in this error. + */ + public static final int CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701; + + /** + * This indicates that the device kernel took too long to execute. This can only occur if timeouts are + * enabled - see the device attribute CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for + * more information. This leaves the process in an inconsistent state and any further CUDA work will + * return the same error. To continue using CUDA, the process must be terminated and relaunched. + */ + public static final int CUDA_ERROR_LAUNCH_TIMEOUT = 702; + + /** + * This error indicates a kernel launch that uses an incompatible texturing mode. + */ + public static final int CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703; + + /** + * This error indicates that a call to cuCtxEnablePeerAccess() is trying to re-enable peer access to a + * context which has already had peer access to it enabled. + */ + public static final int CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704; + + /** + * This error indicates that cuCtxDisablePeerAccess() is trying to disable peer access which has not + * been enabled yet via cuCtxEnablePeerAccess(). + */ + public static final int CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705; + + /** + * This error indicates that the primary context for the specified device has already been initialized. + */ + public static final int CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708; + + /** + * This error indicates that the context current to the calling thread has been destroyed using + * cuCtxDestroy, or is a primary context which has not yet been initialized. + */ + public static final int CUDA_ERROR_CONTEXT_IS_DESTROYED = 709; + + /** + * A device-side assert triggered during kernel execution. The context cannot be used anymore, and + * must be destroyed. All existing device memory allocations from this context are invalid and must be + * reconstructed if the program is to continue using CUDA. + */ + public static final int CUDA_ERROR_ASSERT = 710; + + /** + * This error indicates that the hardware resources required to enable peer access have been exhausted + * for one or more of the devices passed to cuCtxEnablePeerAccess(). + */ + public static final int CUDA_ERROR_TOO_MANY_PEERS = 711; + + /** + * This error indicates that the memory range passed to cuMemHostRegister() has already been + * registered. + */ + public static final int CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712; + + /** + * This error indicates that the pointer passed to cuMemHostUnregister() does not correspond to any + * currently registered memory region. + */ + public static final int CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED = 713; + + /** + * While executing a kernel, the device encountered a stack error. This can be due to stack corruption + * or exceeding the stack size limit. This leaves the process in an inconsistent state and any further + * CUDA work will return the same error. To continue using CUDA, the process must be terminated + * and relaunched. + */ + public static final int CUDA_ERROR_HARDWARE_STACK_ERROR = 714; + + /** + * While executing a kernel, the device encountered an illegal instruction. This leaves the process in an + * inconsistent state and any further CUDA work will return the same error. To continue using CUDA, + * the process must be terminated and relaunched. + */ + public static final int CUDA_ERROR_ILLEGAL_INSTRUCTION = 715; + + /** + * While executing a kernel, the device encountered a load or store instruction on a memory address + * which is not aligned. This leaves the process in an inconsistent state and any further CUDA + * work will return the same error. To continue using CUDA, the process must be terminated and + * relaunched. + */ + public static final int CUDA_ERROR_MISALIGNED_ADDRESS = 716; + + /** + * While executing a kernel, the device encountered an instruction which can only operate on memory + * locations in certain address spaces (global, shared, or local), but was supplied a memory address + * not belonging to an allowed address space. This leaves the process in an inconsistent state and any + * further CUDA work will return the same error. To continue using CUDA, the process must be + * terminated and relaunched. + */ + public static final int CUDA_ERROR_INVALID_ADDRESS_SPACE = 717; + + /** + * While executing a kernel, the device program counter wrapped its address space. This leaves the + * process in an inconsistent state and any further CUDA work will return the same error. To continue + * using CUDA, the process must be terminated and relaunched. + */ + public static final int CUDA_ERROR_INVALID_PC = 718; + + /** + * An exception occurred on the device while executing a kernel. Common causes include + * dereferencing an invalid device pointer and accessing out of bounds shared memory. Less common + * cases can be system specific - more information about these cases can be found in the system + * specific user guide. This leaves the process in an inconsistent state and any further CUDA work will + * return the same error. To continue using CUDA, the process must be terminated and relaunched. + */ + public static final int CUDA_ERROR_LAUNCH_FAILED = 719; + + /** + * This error indicates that the number of blocks launched per grid for a kernel that was launched + * via either cuLaunchCooperativeKernel or cuLaunchCooperativeKernelMultiDevice exceeds the + * maximum number of blocks as allowed by cuOccupancyMaxActiveBlocksPerMultiprocessor or + * cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors as + * specified by the device attribute CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT. + */ + public static final int CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720; + + /** + * This error indicates that the attempted operation is not permitted. + */ + public static final int CUDA_ERROR_NOT_PERMITTED = 800; + + /** + * This error indicates that the attempted operation is not supported on the current system or device. + */ + public static final int CUDA_ERROR_NOT_SUPPORTED = 801; + + /** + * This error indicates that the system is not yet ready to start any CUDA work. To continue using + * CUDA, verify the system configuration is in a valid state and all required driver daemons are + * actively running. More information about this error can be found in the system specific user guide. + */ + public static final int CUDA_ERROR_SYSTEM_NOT_READY = 802; + + /** + * This error indicates that there is a mismatch between the versions of the display driver and the + * CUDA driver. Refer to the compatibility documentation for supported versions. + */ + public static final int CUDA_ERROR_SYSTEM_DRIVER_MISMATCH = 803; + + /** + * This error indicates that the system was upgraded to run with forward compatibility but the visible + * hardware detected by CUDA does not support this configuration. Refer to the compatibility + * documentation for the supported hardware matrix or ensure that only supported hardware is visible + * during initialization via the CUDA_VISIBLE_DEVICES environment variable. + */ + public static final int CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804; + + /** + * This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS + * server. + */ + public static final int CUDA_ERROR_MPS_CONNECTION_FAILED = 805; + + /** + * This error indicates that the remote procedural call between the MPS server and the MPS client + * failed. + */ + public static final int CUDA_ERROR_MPS_RPC_FAILURE = 806; + + /** + * This error indicates that the MPS server is not ready to accept new MPS client requests. This error + * can be returned when the MPS server is in the process of recovering from a fatal failure. + */ + public static final int CUDA_ERROR_MPS_SERVER_NOT_READY = 807; + + /** + * This error indicates that the hardware resources required to create MPS client have been exhausted. + */ + public static final int CUDA_ERROR_MPS_MAX_CLIENTS_REACHED = 808; + + /** + * This error indicates the the hardware resources required to support device connections have been + * exhausted. + */ + public static final int CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED = 809; + + /** + * This error indicates that the MPS client has been terminated by the server. To continue using + * CUDA, the process must be terminated and relaunched. + */ + public static final int CUDA_ERROR_MPS_CLIENT_TERMINATED = 810; + + /** + * This error indicates that the module is using CUDA Dynamic Parallelism, but the current + * configuration, like MPS, does not support it. + */ + public static final int CUDA_ERROR_CDP_NOT_SUPPORTED = 811; + + /** + * This error indicates that a module contains an unsupported interaction between different versions of + * CUDA Dynamic Parallelism. + */ + public static final int CUDA_ERROR_CDP_VERSION_MISMATCH = 812; + + /** + * This error indicates that the operation is not permitted when the stream is capturing. + */ + public static final int CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED = 900; + + /** + * This error indicates that the current capture sequence on the stream has been invalidated due to a + * previous error. + */ + public static final int CUDA_ERROR_STREAM_CAPTURE_INVALIDATED = 901; + + /** + * This error indicates that the operation would have resulted in a merge of two independent capture + * sequences. + */ + public static final int CUDA_ERROR_STREAM_CAPTURE_MERGE = 902; + + /** + * This error indicates that the capture was not initiated in this stream. + */ + public static final int CUDA_ERROR_STREAM_CAPTURE_UNMATCHED = 903; + + /** + * This error indicates that the capture sequence contains a fork that was not joined to the primary + * stream. + */ + public static final int CUDA_ERROR_STREAM_CAPTURE_UNJOINED = 904; + + /** + * This error indicates that a dependency would have been created which crosses the capture sequence + * boundary. Only implicit in-stream ordering dependencies are allowed to cross the boundary. + */ + public static final int CUDA_ERROR_STREAM_CAPTURE_ISOLATION = 905; + + /** + * This error indicates a disallowed implicit dependency on a current capture sequence from + * cudaStreamLegacy. + */ + public static final int CUDA_ERROR_STREAM_CAPTURE_IMPLICIT = 906; + + /** + * This error indicates that the operation is not permitted on an event which was last recorded in a + * capturing stream. + */ + public static final int CUDA_ERROR_CAPTURED_EVENT = 907; + + /** + * A stream capture sequence not initiated with the CU_STREAM_CAPTURE_MODE_RELAXED + * argument to cuStreamBeginCapture was passed to cuStreamEndCapture in a different thread. + */ + public static final int CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD = 908; + + /** + * This error indicates that the timeout specified for the wait operation has lapsed. + */ + public static final int CUDA_ERROR_TIMEOUT = 909; + + /** + * This error indicates that the graph update was not performed because it included changes which + * violated constraints specific to instantiated graph update. + */ + public static final int CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE = 910; + + /** + * This indicates that an async error has occurred in a device outside of CUDA. If CUDA was waiting + * for an external device's signal before consuming shared data, the external device signaled an error + * indicating that the data is not valid for consumption. This leaves the process in an inconsistent state + * and any further CUDA work will return the same error. To continue using CUDA, the process must + * be terminated and relaunched. + */ + public static final int CUDA_ERROR_EXTERNAL_DEVICE = 911; + + /** + * Indicates a kernel launch error due to cluster misconfiguration. + */ + public static final int CUDA_ERROR_INVALID_CLUSTER_SIZE = 912; + + /** + * Indiciates a function handle is not loaded when calling an API that requires a loaded function. + */ + public static final int CUDA_ERROR_FUNCTION_NOT_LOADED = 913; + + /** + * This error indicates one or more resources passed in are not valid resource types for the operation. + */ + public static final int CUDA_ERROR_INVALID_RESOURCE_TYPE = 914; + + /** + * This error indicates one or more resources are insufficient or non-applicable for the operation. + */ + public static final int CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION = 915; + + /** + * This indicates that an unknown internal error has occurred. + */ + public static final int CUDA_ERROR_UNKNOWN = 999; + + public static String resultString(int err){ + return switch (err) { + case CUDA_SUCCESS -> "CUDA_SUCCESS"; + case CUDA_ERROR_INVALID_VALUE -> "CUDA_ERROR_INVALID_VALUE"; + case CUDA_ERROR_OUT_OF_MEMORY -> "CUDA_ERROR_OUT_OF_MEMORY"; + case CUDA_ERROR_NOT_INITIALIZED -> "CUDA_ERROR_NOT_INITIALIZED"; + case CUDA_ERROR_DEINITIALIZED -> "CUDA_ERROR_DEINITIALIZED"; + case CUDA_ERROR_PROFILER_DISABLED -> "CUDA_ERROR_PROFILER_DISABLED"; + case CUDA_ERROR_PROFILER_NOT_INITIALIZED -> "CUDA_ERROR_PROFILER_NOT_INITIALIZED"; + case CUDA_ERROR_PROFILER_ALREADY_STARTED -> "CUDA_ERROR_PROFILER_ALREADY_STARTED"; + case CUDA_ERROR_PROFILER_ALREADY_STOPPED -> "CUDA_ERROR_PROFILER_ALREADY_STOPPED"; + case CUDA_ERROR_STUB_LIBRARY -> "CUDA_ERROR_STUB_LIBRARY"; + case CUDA_ERROR_DEVICE_UNAVAILABLE -> "CUDA_ERROR_DEVICE_UNAVAILABLE"; + case CUDA_ERROR_NO_DEVICE -> "CUDA_ERROR_NO_DEVICE"; + case CUDA_ERROR_INVALID_DEVICE -> "CUDA_ERROR_INVALID_DEVICE"; + case CUDA_ERROR_DEVICE_NOT_LICENSED -> "CUDA_ERROR_DEVICE_NOT_LICENSED"; + case CUDA_ERROR_INVALID_IMAGE -> "CUDA_ERROR_INVALID_IMAGE"; + case CUDA_ERROR_INVALID_CONTEXT -> "CUDA_ERROR_INVALID_CONTEXT"; + case CUDA_ERROR_CONTEXT_ALREADY_CURRENT -> "CUDA_ERROR_CONTEXT_ALREADY_CURRENT"; + case CUDA_ERROR_MAP_FAILED -> "CUDA_ERROR_MAP_FAILED"; + case CUDA_ERROR_UNMAP_FAILED -> "CUDA_ERROR_UNMAP_FAILED"; + case CUDA_ERROR_ARRAY_IS_MAPPED -> "CUDA_ERROR_ARRAY_IS_MAPPED"; + case CUDA_ERROR_ALREADY_MAPPED -> "CUDA_ERROR_ALREADY_MAPPED"; + case CUDA_ERROR_NO_BINARY_FOR_GPU -> "CUDA_ERROR_NO_BINARY_FOR_GPU"; + case CUDA_ERROR_ALREADY_ACQUIRED -> "CUDA_ERROR_ALREADY_ACQUIRED"; + case CUDA_ERROR_NOT_MAPPED -> "CUDA_ERROR_NOT_MAPPED"; + case CUDA_ERROR_NOT_MAPPED_AS_ARRAY -> "CUDA_ERROR_NOT_MAPPED_AS_ARRAY"; + case CUDA_ERROR_NOT_MAPPED_AS_POINTER -> "CUDA_ERROR_NOT_MAPPED_AS_POINTER"; + case CUDA_ERROR_ECC_UNCORRECTABLE -> "CUDA_ERROR_ECC_UNCORRECTABLE"; + case CUDA_ERROR_UNSUPPORTED_LIMIT -> "CUDA_ERROR_UNSUPPORTED_LIMIT"; + case CUDA_ERROR_CONTEXT_ALREADY_IN_USE -> "CUDA_ERROR_CONTEXT_ALREADY_IN_USE"; + case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED -> "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED"; + case CUDA_ERROR_INVALID_PTX -> "CUDA_ERROR_INVALID_PTX"; + case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT -> "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT"; + case CUDA_ERROR_NVLINK_UNCORRECTABLE -> "CUDA_ERROR_NVLINK_UNCORRECTABLE"; + case CUDA_ERROR_JIT_COMPILER_NOT_FOUND -> "CUDA_ERROR_JIT_COMPILER_NOT_FOUND"; + case CUDA_ERROR_UNSUPPORTED_PTX_VERSION -> "CUDA_ERROR_UNSUPPORTED_PTX_VERSION"; + case CUDA_ERROR_JIT_COMPILATION_DISABLED -> "CUDA_ERROR_JIT_COMPILATION_DISABLED"; + case CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY -> "CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY"; + case CUDA_ERROR_UNSUPPORTED_DEVSIDE_SYNC -> "CUDA_ERROR_UNSUPPORTED_DEVSIDE_SYNC"; + case CUDA_ERROR_INVALID_SOURCE -> "CUDA_ERROR_INVALID_SOURCE"; + case CUDA_ERROR_FILE_NOT_FOUND -> "CUDA_ERROR_FILE_NOT_FOUND"; + case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND -> "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND"; + case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED -> "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED"; + case CUDA_ERROR_OPERATING_SYSTEM -> "CUDA_ERROR_OPERATING_SYSTEM"; + case CUDA_ERROR_INVALID_HANDLE -> "CUDA_ERROR_INVALID_HANDLE"; + case CUDA_ERROR_ILLEGAL_STATE -> "CUDA_ERROR_ILLEGAL_STATE"; + case CUDA_ERROR_LOSSY_QUERY -> "CUDA_ERROR_LOSSY_QUERY"; + case CUDA_ERROR_NOT_FOUND -> "CUDA_ERROR_NOT_FOUND"; + case CUDA_ERROR_NOT_READY -> "CUDA_ERROR_NOT_READY"; + case CUDA_ERROR_ILLEGAL_ADDRESS -> "CUDA_ERROR_ILLEGAL_ADDRESS"; + case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES -> "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES"; + case CUDA_ERROR_LAUNCH_TIMEOUT -> "CUDA_ERROR_LAUNCH_TIMEOUT"; + case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING -> "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING"; + case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED -> "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED"; + case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED -> "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED"; + case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE -> "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE"; + case CUDA_ERROR_CONTEXT_IS_DESTROYED -> "CUDA_ERROR_CONTEXT_IS_DESTROYED"; + case CUDA_ERROR_ASSERT -> "CUDA_ERROR_ASSERT"; + case CUDA_ERROR_TOO_MANY_PEERS -> "CUDA_ERROR_TOO_MANY_PEERS"; + case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED -> "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED"; + case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED -> "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED"; + case CUDA_ERROR_HARDWARE_STACK_ERROR -> "CUDA_ERROR_HARDWARE_STACK_ERROR"; + case CUDA_ERROR_ILLEGAL_INSTRUCTION -> "CUDA_ERROR_ILLEGAL_INSTRUCTION"; + case CUDA_ERROR_MISALIGNED_ADDRESS -> "CUDA_ERROR_MISALIGNED_ADDRESS"; + case CUDA_ERROR_INVALID_ADDRESS_SPACE -> "CUDA_ERROR_INVALID_ADDRESS_SPACE"; + case CUDA_ERROR_INVALID_PC -> "CUDA_ERROR_INVALID_PC"; + case CUDA_ERROR_LAUNCH_FAILED -> "CUDA_ERROR_LAUNCH_FAILED"; + case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE -> "CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE"; + case CUDA_ERROR_NOT_PERMITTED -> "CUDA_ERROR_NOT_PERMITTED"; + case CUDA_ERROR_NOT_SUPPORTED -> "CUDA_ERROR_NOT_SUPPORTED"; + case CUDA_ERROR_SYSTEM_NOT_READY -> "CUDA_ERROR_SYSTEM_NOT_READY"; + case CUDA_ERROR_SYSTEM_DRIVER_MISMATCH -> "CUDA_ERROR_SYSTEM_DRIVER_MISMATCH"; + case CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE -> "CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE"; + case CUDA_ERROR_MPS_CONNECTION_FAILED -> "CUDA_ERROR_MPS_CONNECTION_FAILED"; + case CUDA_ERROR_MPS_RPC_FAILURE -> "CUDA_ERROR_MPS_RPC_FAILURE"; + case CUDA_ERROR_MPS_SERVER_NOT_READY -> "CUDA_ERROR_MPS_SERVER_NOT_READY"; + case CUDA_ERROR_MPS_MAX_CLIENTS_REACHED -> "CUDA_ERROR_MPS_MAX_CLIENTS_REACHED"; + case CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED -> "CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED"; + case CUDA_ERROR_MPS_CLIENT_TERMINATED -> "CUDA_ERROR_MPS_CLIENT_TERMINATED"; + case CUDA_ERROR_CDP_NOT_SUPPORTED -> "CUDA_ERROR_CDP_NOT_SUPPORTED"; + case CUDA_ERROR_CDP_VERSION_MISMATCH -> "CUDA_ERROR_CDP_VERSION_MISMATCH"; + case CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED -> "CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED"; + case CUDA_ERROR_STREAM_CAPTURE_INVALIDATED -> "CUDA_ERROR_STREAM_CAPTURE_INVALIDATED"; + case CUDA_ERROR_STREAM_CAPTURE_MERGE -> "CUDA_ERROR_STREAM_CAPTURE_MERGE"; + case CUDA_ERROR_STREAM_CAPTURE_UNMATCHED -> "CUDA_ERROR_STREAM_CAPTURE_UNMATCHED"; + case CUDA_ERROR_STREAM_CAPTURE_UNJOINED -> "CUDA_ERROR_STREAM_CAPTURE_UNJOINED"; + case CUDA_ERROR_STREAM_CAPTURE_ISOLATION -> "CUDA_ERROR_STREAM_CAPTURE_ISOLATION"; + case CUDA_ERROR_STREAM_CAPTURE_IMPLICIT -> "CUDA_ERROR_STREAM_CAPTURE_IMPLICIT"; + case CUDA_ERROR_CAPTURED_EVENT -> "CUDA_ERROR_CAPTURED_EVENT"; + case CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD -> "CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD"; + case CUDA_ERROR_TIMEOUT -> "CUDA_ERROR_TIMEOUT"; + case CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE -> "CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE"; + case CUDA_ERROR_EXTERNAL_DEVICE -> "CUDA_ERROR_EXTERNAL_DEVICE"; + case CUDA_ERROR_INVALID_CLUSTER_SIZE -> "CUDA_ERROR_INVALID_CLUSTER_SIZE"; + case CUDA_ERROR_FUNCTION_NOT_LOADED -> "CUDA_ERROR_FUNCTION_NOT_LOADED"; + case CUDA_ERROR_INVALID_RESOURCE_TYPE -> "CUDA_ERROR_INVALID_RESOURCE_TYPE"; + case CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION -> "CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION"; + case CUDA_ERROR_UNKNOWN -> "CUDA_ERROR_UNKNOWN"; + default -> "Invalid error"; + }; + } + + private CUresult() { + // prevent instantiation. + } +} diff --git a/src/main/java/org/apache/sysds/cujava/driver/CUstream.java b/src/main/java/org/apache/sysds/cujava/driver/CUstream.java new file mode 100644 index 00000000000..1b1b07a6ec3 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/driver/CUstream.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.driver; + +import org.apache.sysds.cujava.NativePointerObject; +import org.apache.sysds.cujava.runtime.cudaStream_t; + +public class CUstream extends NativePointerObject { + + public CUstream() { + } + + public CUstream(cudaStream_t stream) { + super(stream); + } + + CUstream(long value) { + super(value); + } +} diff --git a/src/main/java/org/apache/sysds/cujava/driver/CuJavaDriver.java b/src/main/java/org/apache/sysds/cujava/driver/CuJavaDriver.java new file mode 100644 index 00000000000..fcc495df005 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/driver/CuJavaDriver.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.driver; + +import org.apache.sysds.cujava.CuJavaLibLoader; +import org.apache.sysds.cujava.CudaException; +import org.apache.sysds.cujava.Pointer; + +public class CuJavaDriver { + + private static boolean exceptionsEnabled = false; + + private static final String LIB_BASE = "cujava_driver"; + + private CuJavaDriver() { + + } + + static { + CuJavaLibLoader.load(LIB_BASE); + } + + private static int checkCudaResult(int result) { + if(exceptionsEnabled && result != CUresult.CUDA_SUCCESS) { + throw new CudaException(CUresult.resultString(result)); + } + return result; + } + + public static int cuCtxCreate(CUcontext pctx, int flags, CUdevice dev) { + return checkCudaResult(cuCtxCreateNative(pctx, flags, dev)); + } + + private static native int cuCtxCreateNative(CUcontext pctx, int flags, CUdevice dev); + + public static int cuDeviceGet(CUdevice device, int ordinal) { + return checkCudaResult(cuDeviceGetNative(device, ordinal)); + } + + private static native int cuDeviceGetNative(CUdevice device, int ordinal); + + public static int cuDeviceGetCount(int count[]) { + return checkCudaResult(cuDeviceGetCountNative(count)); + } + + private static native int cuDeviceGetCountNative(int count[]); + + public static int cuInit(int flags) { + return checkCudaResult(cuInitNative(flags)); + } + + private static native int cuInitNative(int flags); + + public static int cuLaunchKernel(CUfunction f, int gridDimX, int gridDimY, int gridDimZ, int blockDimX, + int blockDimY, int blockDimZ, int sharedMemBytes, CUstream hStream, Pointer kernelParams, Pointer extra) { + return checkCudaResult( + cuLaunchKernelNative(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, + hStream, kernelParams, extra)); + } + + private static native int cuLaunchKernelNative(CUfunction f, int gridDimX, int gridDimY, int gridDimZ, + int blockDimX, int blockDimY, int blockDimZ, int sharedMemBytes, CUstream hStream, Pointer kernelParams, + Pointer extra); + + public static int cuModuleGetFunction(CUfunction hfunc, CUmodule hmod, String name) { + return checkCudaResult(cuModuleGetFunctionNative(hfunc, hmod, name)); + } + + private static native int cuModuleGetFunctionNative(CUfunction hfunc, CUmodule hmod, String name); + + public static int cuModuleLoadDataEx(CUmodule phMod, Pointer p, int numOptions, int options[], + Pointer optionValues) { + if(numOptions == 0) { + options = (options != null) ? options : new int[0]; + optionValues = (optionValues != null) ? optionValues : Pointer.to(new int[0]); + } + return checkCudaResult(cuModuleLoadDataExNative(phMod, p, numOptions, options, optionValues)); + } + + private static native int cuModuleLoadDataExNative(CUmodule phMod, Pointer p, int numOptions, int options[], + Pointer optionValues); + + public static void setExceptionsEnabled(boolean enabled) { + exceptionsEnabled = enabled; + } + + public static int cuMemAlloc(CUdeviceptr dptr, long bytesize) { + return checkCudaResult(cuMemAllocNative(dptr, bytesize)); + } + + private static native int cuMemAllocNative(CUdeviceptr dptr, long bytesize); + + public static int cuModuleUnload(CUmodule hmod) { + return checkCudaResult(cuModuleUnloadNative(hmod)); + } + + private static native int cuModuleUnloadNative(CUmodule hmod); + + public static int cuCtxDestroy(CUcontext ctx) { + return checkCudaResult(cuCtxDestroyNative(ctx)); + } + + private static native int cuCtxDestroyNative(CUcontext ctx); + + public static int cuMemFree(CUdeviceptr dptr) { + return checkCudaResult(cuMemFreeNative(dptr)); + } + + private static native int cuMemFreeNative(CUdeviceptr dptr); + + + public static int cuMemcpyDtoH(Pointer dstHost, CUdeviceptr srcDevice, long ByteCount) { + return checkCudaResult(cuMemcpyDtoHNative(dstHost, srcDevice, ByteCount)); + } + + private static native int cuMemcpyDtoHNative(Pointer dstHost, CUdeviceptr srcDevice, long ByteCount); + + public static int cuCtxSynchronize() { + return checkCudaResult(cuCtxSynchronizeNative()); + } + + private static native int cuCtxSynchronizeNative(); + + public static int cuDeviceGetAttribute(int pi[], int attrib, CUdevice dev) { + return checkCudaResult(cuDeviceGetAttributeNative(pi, attrib, dev)); + } + + private static native int cuDeviceGetAttributeNative(int pi[], int attrib, CUdevice dev); +} diff --git a/src/main/java/org/apache/sysds/cujava/interop/JCudaAdapter.java b/src/main/java/org/apache/sysds/cujava/interop/JCudaAdapter.java new file mode 100644 index 00000000000..0fbb4b014cd --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/interop/JCudaAdapter.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.interop; + +import java.lang.reflect.Field; + +public class JCudaAdapter { + private JCudaAdapter() {} + + public static jcuda.Pointer toJCuda(org.apache.sysds.cujava.Pointer p) { + try { + jcuda.Pointer q = new jcuda.Pointer(); + + // jcuda.NativePointerObject.nativePointer = cuJava nativePointer + Field np = jcuda.NativePointerObject.class.getDeclaredField("nativePointer"); + np.setAccessible(true); + np.setLong(q, p.getNativePointer()); + + // jcuda.Pointer.byteOffset = cuJava byteOffset + Field bo = jcuda.Pointer.class.getDeclaredField("byteOffset"); + bo.setAccessible(true); + bo.setLong(q, p.getByteOffset()); + + return q; + } catch (ReflectiveOperationException e) { + throw new IllegalStateException("cuJava→JCuda pointer adaptation failed", e); + } + } +} diff --git a/src/main/java/org/apache/sysds/cujava/runtime/CuJava.java b/src/main/java/org/apache/sysds/cujava/runtime/CuJava.java new file mode 100644 index 00000000000..24f7246a943 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/runtime/CuJava.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.runtime; + +import org.apache.sysds.cujava.CuJavaLibLoader; +import org.apache.sysds.cujava.CudaException; +import org.apache.sysds.cujava.Pointer; + +public class CuJava { + + private static boolean exceptionsEnabled = false; + + private static final String LIB_BASE = "cujava_runtime"; + + public static final int cudaMemAttachGlobal = 1; + + public static final int cudaDeviceScheduleBlockingSync = 4; + + private CuJava() { + + } + + static { + CuJavaLibLoader.load(LIB_BASE); + } + + private static int checkCudaError(int result) { + if(exceptionsEnabled && result != CudaError.cudaSuccess) { + throw new CudaException(CudaError.errorString(result)); + } + return result; + } + + public static int cudaMemcpy(Pointer dst, Pointer src, long count, int cudaMemcpyKind_kind) { + return checkCudaError(cudaMemcpyNative(dst, src, count, cudaMemcpyKind_kind)); + } + + private static native int cudaMemcpyNative(Pointer dst, Pointer src, long count, int cudaMemcpyKind_kind); + + public static int cudaMalloc(Pointer devPtr, long size) { + return checkCudaError(cudaMallocNative(devPtr, size)); + } + + private static native int cudaMallocNative(Pointer devPtr, long size); + + public static int cudaFree(Pointer devPtr) { + return checkCudaError(cudaFreeNative(devPtr)); + } + + private static native int cudaFreeNative(Pointer devPtr); + + public static int cudaMemset(Pointer mem, int c, long count) { + return checkCudaError(cudaMemsetNative(mem, c, count)); + } + + private static native int cudaMemsetNative(Pointer mem, int c, long count); + + public static int cudaDeviceSynchronize() { + return checkCudaError(cudaDeviceSynchronizeNative()); + } + + private static native int cudaDeviceSynchronizeNative(); + + public static void setExceptionsEnabled(boolean enabled) { + exceptionsEnabled = enabled; + } + + public static int cudaMallocManaged(Pointer devPtr, long size, int flags) { + return checkCudaError(cudaMallocManagedNative(devPtr, size, flags)); + } + + private static native int cudaMallocManagedNative(Pointer devPtr, long size, int flags); + + public static int cudaMemGetInfo(long free[], long total[]) { + return checkCudaError(cudaMemGetInfoNative(free, total)); + } + + private static native int cudaMemGetInfoNative(long free[], long total[]); + + public static int cudaGetDeviceCount(int count[]) { + return checkCudaError(cudaGetDeviceCountNative(count)); + } + + private static native int cudaGetDeviceCountNative(int count[]); + + public static int cudaSetDevice(int device) { + return checkCudaError(cudaSetDeviceNative(device)); + } + + private static native int cudaSetDeviceNative(int device); + + public static int cudaSetDeviceFlags(int flags) { + return checkCudaError(cudaSetDeviceFlagsNative(flags)); + } + + private static native int cudaSetDeviceFlagsNative(int flags); + + public static int cudaGetDevice(int device[]) { + return checkCudaError(cudaGetDeviceNative(device)); + } + + private static native int cudaGetDeviceNative(int device[]); + + public static int cudaGetDeviceProperties(CudaDeviceProp prop, int device) { + return checkCudaError(cudaGetDevicePropertiesNative(prop, device)); + } + + private static native int cudaGetDevicePropertiesNative(CudaDeviceProp prop, int device); + +} diff --git a/src/main/java/org/apache/sysds/cujava/runtime/CudaDeviceProp.java b/src/main/java/org/apache/sysds/cujava/runtime/CudaDeviceProp.java new file mode 100644 index 00000000000..18c5e1ea7bc --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/runtime/CudaDeviceProp.java @@ -0,0 +1,503 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.runtime; + +/** + * This class replicates the CUDA device properties (cudaDeviceProp). + * The descriptions are directly taken from the Documentation: + * https://docs.nvidia.com/cuda/archive/12.8.0/pdf/CUDA_Runtime_API.pdf + */ +public class CudaDeviceProp { + + /** + * The maximum value of cudaAccessPolicyWindow::num_bytes. + */ + public int accessPolicyMaxWindowSize; + + /** + * Number of asynchronous engines + */ + public int asyncEngineCount; + + /** + * Device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer + */ + public int canMapHostMemory; + + /** + * Device can access host registered memory at the same virtual address as the CPU + */ + public int canUseHostPointerForRegisteredMem; + + /** + * @deprecated in CUDA 12 Clock frequency in kilohertz + */ + public int clockRate; + + /** + * Indicates device supports cluster launch + */ + public int clusterLaunch; + + /** + * @deprecated Compute mode (See cudaComputeMode) + */ + public int computeMode; + + /** + * Device supports Compute Preemption + */ + public int computePreemptionSupported; + + /** + * Device can possibly execute multiple kernels concurrently + */ + public int concurrentKernels; + + /** + * Device can coherently access managed memory concurrently with the CPU + */ + public int concurrentManagedAccess; + + /** + * Device supports launching cooperative kernels via cudaLaunchCooperativeKernel + */ + public int cooperativeLaunch; + + /** + * @deprecated cudaLaunchCooperativeKernelMultiDevice is deprecated. + */ + public int cooperativeMultiDeviceLaunch; + + /** + * 1 if the device supports deferred mapping CUDA arrays and CUDA mipmapped arrays + */ + public int deferredMappingCudaArraySupported; + + /** + * Device can concurrently copy memory and execute a kernel. Deprecated. Use instead asyncEngineCount. + */ + public int deviceOverlap; + + /** + * Host can directly access managed memory on the device without migration. + */ + public int directManagedMemAccessFromHost; + + /** + * Device has ECC support enabled + */ + public int ECCEnabled; + + /** + * Device supports caching globals in L1 + */ + public int globalL1CacheSupported; + + /** + * Bitmask to be interpreted according to the cudaFlushGPUDirectRDMAWritesOptions enum + */ + public int gpuDirectRDMAFlushWritesOptions; + + /** + * 1 if the device supports GPUDirect RDMA APIs, 0 otherwise + */ + public int gpuDirectRDMASupported; + + /** + * See the cudaGPUDirectRDMAWritesOrdering enum for numerical values + */ + public int gpuDirectRDMAWritesOrdering; + + /** + * Link between the device and the host supports native atomic operations + */ + public int hostNativeAtomicSupported; + + /** + * Device supports using the cudaHostRegister flag cudaHostRegisterReadOnly to register memory that must be mapped + * as read-only to the GPU + */ + public int hostRegisterReadOnlySupported; + + /** + * Device supports host memory registration via cudaHostRegister. + */ + public int hostRegisterSupported; + + /** + * Device is integrated as opposed to discrete + */ + public int integrated; + + /** + * Device supports IPC Events. + */ + public int ipcEventSupported; + + /** + * Device is on a multi-GPU board + */ + public int isMultiGpuBoard; + + /** + * @deprecated Specified whether there is a run time limit on kernels + */ + public int kernelExecTimeoutEnabled; + + /** + * Size of L2 cache in bytes + */ + public int l2CacheSize; + + /** + * Device supports caching locals in L1 + */ + public int localL1CacheSupported; + + /** + * 8-byte locally unique identifier. Value is undefined on TCC and non-Windows platforms + */ + public byte[] luid = new byte[8]; + + /** + * LUID device node mask. Value is undefined on TCC and non-Windows platforms + */ + public int luidDeviceNodeMask; + + /** + * Major compute capability + */ + public int major; + + /** + * Device supports allocating managed memory on this system + */ + public int managedMemory; + + /** + * Maximum number of resident blocks per multiprocessor + */ + public int maxBlocksPerMultiProcessor; + + /** + * Maximum size of each dimension of a grid + */ + public int[] maxGridSize = new int[3]; + + /** + * Maximum 1D surface size + */ + public int maxSurface1D; + + /** + * Maximum 1D layered surface dimensions + */ + public int[] maxSurface1DLayered = new int[2]; + + /** + * Maximum 2D surface dimensions + */ + public int[] maxSurface2D = new int[2]; + + /** + * Maximum 2D layered surface dimensions + */ + public int[] maxSurface2DLayered = new int[3]; + + /** + * Maximum 3D surface dimensions + */ + public int[] maxSurface3D = new int[3]; + + /** + * Maximum Cubemap surface dimensions + */ + public int maxSurfaceCubemap; + + /** + * Maximum Cubemap layered surface dimensions + */ + public int[] maxSurfaceCubemapLayered = new int[2]; + + /** + * Maximum 1D texture size + */ + public int maxTexture1D; + + /** + * Maximum 1D layered texture dimensions + */ + public int[] maxTexture1DLayered = new int[2]; + + /** + * @deprecated Use cudaDeviceGetTexture1DLinearMaxWidth() or cuDeviceGetTexture1DLinearMaxWidth() instead. + */ + public int maxTexture1DLinear; + + /** + * Maximum 1D mipmapped texture size + */ + public int maxTexture1DMipmap; + + /** + * Maximum 2D texture dimensions + */ + public int[] maxTexture2D = new int[2]; + + /** + * Maximum 2D texture dimensions if texture gather operations have to be performed + */ + public int[] maxTexture2DGather = new int[2]; + + /** + * Maximum 2D layered texture dimensions + */ + public int[] maxTexture2DLayered = new int[3]; + + /** + * Maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory + */ + public int[] maxTexture2DLinear = new int[3]; + + /** + * Maximum 2D mipmapped texture dimensions + */ + public int[] maxTexture2DMipmap = new int[2]; + + /** + * Maximum 3D texture dimensions + */ + public int[] maxTexture3D = new int[3]; + + /** + * Contains the maximum alternate 3D texture dimensions + */ + public int[] maxTexture3DAlt = new int[3]; + + /** + * Maximum Cubemap texture dimensions + */ + public int maxTextureCubemap; + + /** + * Maximum Cubemap layered texture dimensions + */ + public int[] maxTextureCubemapLayered = new int[2]; + + /** + * The maximum sizes of each dimension of a block; + */ + public int[] maxThreadsDim = new int[3]; + + /** + * The maximum number of threads per block; + */ + public int maxThreadsPerBlock; + + /** + * The number of maximum resident threads per multiprocessor. + */ + public int maxThreadsPerMultiProcessor; + + /** + * The memory bus width in bits + */ + public int memoryBusWidth; + + /** + * @deprecated The peak memory clock frequency in kilohertz. + */ + public int memoryClockRate; + + /** + * 1 if the device supports using the cudaMallocAsync and cudaMemPool family of APIs, 0 otherwise + */ + public int memoryPoolsSupported; + + /** + * Bitmask of handle types supported with mempool-based IPC + */ + public int memoryPoolSupportedHandleTypes; + + /** + * The maximum pitch in bytes allowed by the memory copy functions that involve memory regions allocated through + * cudaMallocPitch(); + */ + public long memPitch; + + /** + * Minor compute capability + */ + public int minor; + + /** + * Unique identifier for a group of devices on the same multi-GPU board + */ + public int multiGpuBoardGroupID; + + /** + * Number of multiprocessors on device + */ + public int multiProcessorCount; + + /** + * An ASCII string identifying the device; + */ + public byte[] name = new byte[256]; + + /** + * Device supports coherently accessing pageable memory without calling cudaHostRegister on it + */ + public int pageableMemoryAccess; + + /** + * Device accesses pageable memory via the host's page tables + */ + public int pageableMemoryAccessUsesHostPageTables; + + /** + * PCI bus ID of the device + */ + public int pciBusID; + + /** + * PCI device ID of the device + */ + public int pciDeviceID; + + /** + * PCI domain ID of the device + */ + public int pciDomainID; + + /** + * Device's maximum l2 persisting lines capacity setting in bytes + */ + public int persistingL2CacheMaxSize; + + /** + * The maximum number of 32-bit registers available to a thread block; this number is shared by all thread blocks + * simultaneously resident on a multiprocessor; + */ + public int regsPerBlock; + + /** + * 32-bit registers available per multiprocessor + */ + public int regsPerMultiprocessor; + + /** + * Reserved for future use + */ + public int reserved; + + /** + * Shared memory reserved by CUDA driver per block in bytes + */ + public long reservedSharedMemPerBlock; + + /** + * The maximum amount of shared memory available to a thread block in bytes; this amount is shared by all thread + * blocks simultaneously resident on a multiprocessor; + */ + public long sharedMemPerBlock; + + /** + * Per device maximum shared memory per block usable by special opt in + */ + public long sharedMemPerBlockOptin; + + /** + * Shared memory available per multiprocessor in bytes + */ + public long sharedMemPerMultiprocessor; + + /** + * @deprecated Ratio of single precision performance (in floating-point operations per second) to double precision + * performance + */ + public int singleToDoublePrecisionPerfRatio; + + /** + * 1 if the device supports sparse CUDA arrays and sparse CUDA mipmapped arrays, 0 otherwise + */ + public int sparseCudaArraySupported; + + /** + * Is 1 if the device supports stream priorities, or 0 if it is not supported + */ + public int streamPrioritiesSupported; + + /** + * Alignment requirements for surfaces + */ + public long surfaceAlignment; + + /** + * 1 if device is a Tesla device using TCC driver, 0 otherwise + */ + public int tccDriver; + + /** + * The alignment requirement; texture base addresses that are aligned to textureAlignment bytes do not need an + * offset applied to texture fetches; + */ + public long textureAlignment; + + /** + * Pitch alignment requirement for texture references bound to pitched memory + */ + public long texturePitchAlignment; + + /** + * External timeline semaphore interop is supported on the device + */ + public int timelineSemaphoreInteropSupported; + + /** + * The total amount of constant memory available on the device in bytes; + */ + public long totalConstMem; + + /** + * The total amount of global memory available on the device in bytes; + */ + public long totalGlobalMem; + + /** + * 1 if the device shares a unified address space with the host and 0 otherwise. + */ + public int unifiedAddressing; + + /** + * Indicates device supports unified pointers + */ + public int unifiedFunctionPointers; + + /** + * The warp size in threads; + */ + public int warpSize; + + // Uninitialized CudaDeviceProp object + public CudaDeviceProp() { + } +} diff --git a/src/main/java/org/apache/sysds/cujava/runtime/CudaError.java b/src/main/java/org/apache/sysds/cujava/runtime/CudaError.java new file mode 100644 index 00000000000..d8c6fab7d29 --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/runtime/CudaError.java @@ -0,0 +1,996 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.runtime; + +/** + * This class replicates the Cuda error types from the CUDA runtime API. + * The descriptions are directly taken from the Documentation: + * https://docs.nvidia.com/cuda/archive/12.8.0/pdf/CUDA_Runtime_API.pdf + */ +public class CudaError { + + /** + * The API call returned with no errors. In the case of query calls, this also means that the operation being + * queried is complete + */ + public static final int cudaSuccess = 0; + + /** + * This indicates that one or more of the parameters passed to the API call is not within an acceptable range of + * values. + */ + public static final int cudaErrorInvalidValue = 1; + + /** + * The API call failed because it was unable to allocate enough memory or other resources to perform the requested + * operation + */ + public static final int cudaErrorMemoryAllocation = 2; + + /** + * The API call failed because the CUDA driver and runtime could not be initialized. + */ + public static final int cudaErrorInitializationError = 3; + + /** + * This indicates that a CUDA Runtime API call cannot be executed because it is being called during process shut + * down, at a point in time after CUDA driver has been unloaded. + */ + public static final int cudaErrorCudartUnloading = 4; + + /** + * This indicates profiler is not initialized for this run. This can happen when the application is running with + * external profiling tools like visual profiler. + */ + public static final int cudaErrorProfilerDisabled = 5; + + /** + * @deprecated + * This error return is deprecated as of CUDA 5.0. It is no longer an error to attempt to enable/disable the + * profiling via cudaProfilerStart or cudaProfilerStop without initialization. + */ + public static final int cudaErrorProfilerNotInitialized = 6; + + /** + * @deprecated + * This error return is deprecated as of CUDA 5.0. It is no longer an error to call cudaProfilerStart() when + * profiling is already enabled. + */ + public static final int cudaErrorProfilerAlreadyStarted = 7; + + /** + * @deprecated + * This error return is deprecated as of CUDA 5.0. It is no longer an error to call cudaProfilerStop() when + * profiling is already disabled. + */ + public static final int cudaErrorProfilerAlreadyStopped = 8; + + /** + * This indicates that a kernel launch is requesting resources that can never be satisfied by the current device. + * Requesting more shared memory per block than the device supports will trigger this error, as will requesting too + * many threads or blocks. See cudaDeviceProp for more device limitations. + */ + public static final int cudaErrorInvalidConfiguration = 9; + + /** + * This indicates that one or more of the pitch-related parameters passed to the API call is not within the + * acceptable range for pitch. + */ + public static final int cudaErrorInvalidPitchValue = 12; + + /** + * This indicates that the symbol name/identifier passed to the API call is not a valid name or identifier. + */ + public static final int cudaErrorInvalidSymbol = 13; + + /** + * @deprecated + * This indicates that at least one host pointer passed to the API call is not a valid host pointer. This error + * return is deprecated as of CUDA 10.1. + */ + public static final int cudaErrorInvalidHostPointer = 16; + + /** + * @deprecated + * This indicates that at least one device pointer passed to the API call is not a valid device pointer. This error + * return is deprecated as of CUDA 10.1. + */ + public static final int cudaErrorInvalidDevicePointer = 17; + + /** + * This indicates that the texture passed to the API call is not a valid texture. + */ + public static final int cudaErrorInvalidTexture = 18; + + /** + * This indicates that the texture binding is not valid. This occurs if you call cudaGetTextureAlignmentOffset() + * with an unbound texture. + */ + public static final int cudaErrorInvalidTextureBinding = 19; + + /** + * This indicates that the channel descriptor passed to the API call is not valid. This occurs if the format is not + * one of the formats specified by cudaChannelFormatKind, or if one of the dimensions is invalid. + */ + public static final int cudaErrorInvalidChannelDescriptor = 20; + + /** + * This indicates that the direction of the memcpy passed to the API call is not one of the types specified by + * cudaMemcpyKind. + */ + public static final int cudaErrorInvalidMemcpyDirection = 21; + + /** + * @deprecated + * This indicated that the user has taken the address of a constant variable, which was forbidden up until the CUDA + * 3.1 release. This error return is deprecated as of CUDA 3.1. Variables in constant memory may now have their + * address taken by the runtime via cudaGetSymbolAddress(). + */ + public static final int cudaErrorAddressOfConstant = 22; + + /** + * @deprecated + * This indicated that a texture fetch was not able to be performed. This was previously used for device emulation + * of texture operations. This error return is deprecated as of CUDA 3.1. Device emulation mode was removed with the + * CUDA 3.1 release. + */ + public static final int cudaErrorTextureFetchFailed = 23; + + /** + * @deprecated + * This indicated that a texture was not bound for access. This was previously used for device emulation of texture + * operations. his error return is deprecated as of CUDA 3.1. Device emulation mode was removed with the CUDA 3.1 + * release + */ + public static final int cudaErrorTextureNotBound = 24; + + /** + * @deprecated + * This indicated that a synchronization operation had failed. This was previously used for some device emulation + * functions. This error return is deprecated as of CUDA 3.1. Device emulation mode was removed with the CUDA 3.1 + * release. + */ + public static final int cudaErrorSynchronizationError = 25; + + /** + * This indicates that a non-float texture was being accessed with linear filtering. This is not supported by CUDA. + */ + public static final int cudaErrorInvalidFilterSetting = 26; + + /** + * This indicates that an attempt was made to read an unsupported data type as a normalized float. This is not + * supported by CUDA. + */ + public static final int cudaErrorInvalidNormSetting = 27; + + /** + * @deprecated + * Mixing of device and device emulation code was not allowed. This error return is deprecated as of CUDA 3.1. + * Device emulation mode was removed with the CUDA 3.1 release. + */ + public static final int cudaErrorMixedDeviceExecution = 28; + + /** + * @deprecated + * This indicates that the API call is not yet implemented. Production releases of CUDA will never return this + * error. This error return is deprecated as of CUDA 4.1. + */ + public static final int cudaErrorNotYetImplemented = 31; + + /** + * This indicated that an emulated device pointer exceeded the 32-bit address range. Deprecated This error return is + * deprecated as of CUDA 3.1. Device emulation mode was removed with the CUDA 3.1 release. + */ + public static final int cudaErrorMemoryValueTooLarge = 32; + + /** + * This indicates that the CUDA driver that the application has loaded is a stub library. Applications that run with + * the stub rather than a real driver loaded will result in CUDA API returning this error. + */ + public static final int cudaErrorStubLibrary = 34; + + /** + * This indicates that the installed NVIDIA CUDA driver is older than the CUDA runtime library. This is not a + * supported configuration. Users should install an updated NVIDIA display driver to allow the application to run + */ + public static final int cudaErrorInsufficientDriver = 35; + + /** + * This indicates that the API call requires a newer CUDA driver than the one currently installed. Users should + * install an updated NVIDIA CUDA driver to allow the API call to succeed. + */ + public static final int cudaErrorCallRequiresNewerDriver = 36; + + /** + * This indicates that the surface passed to the API call is not a valid surface. + */ + public static final int cudaErrorInvalidSurface = 37; + + /** + * This indicates that multiple global or constant variables (across separate CUDA source files in the application) + * share the same string name. + */ + public static final int cudaErrorDuplicateVariableName = 43; + + /** + * This indicates that multiple textures (across separate CUDA source files in the application) share the same + * string name. + */ + public static final int cudaErrorDuplicateTextureName = 44; + + /** + * This indicates that multiple surfaces (across separate CUDA source files in the application) share the same + * string name. + */ + public static final int cudaErrorDuplicateSurfaceName = 45; + + /** + * This indicates that all CUDA devices are busy or unavailable at the current time. Devices are often + * busy/unavailable due to use of cudaComputeModeProhibited, cudaComputeModeExclusiveProcess, or when long-running + * CUDA kernels have filled up the GPU and are blocking new work from starting. They can also be unavailable due to + * memory constraints on a device that already has active CUDA work being performed. + */ + public static final int cudaErrorDevicesUnavailable = 46; + + /** + * This indicates that the current context is not compatible with this the CUDA Runtime. This can only occur if you + * are using CUDA Runtime/Driver interoperability and have created an existing Driver context using the driver API. + * The Driver context may be incompatible either because the Driver context was created using an older version of + * the API, because the Runtime API call expects a primary driver context and the Driver context is not primary, or + * because the Driver context has been destroyed. Please see Interactions with the CUDA Driver API" for more + * information. + */ + public static final int cudaErrorIncompatibleDriverContext = 49; + + /** + * The device function being invoked (usually via cudaLaunchKernel()) was not previously configured via the + * cudaConfigureCall() function. + */ + public static final int cudaErrorMissingConfiguration = 52; + + /** + * @deprecated + * This indicated that a previous kernel launch failed. This was previously used for device emulation of kernel + * launches. This error return is deprecated as of CUDA 3.1. Device emulation mode was removed with the CUDA 3.1 + * release. + */ + public static final int cudaErrorPriorLaunchFailure = 53; + + /** + * This error indicates that a device runtime grid launch did not occur because the depth of the child grid would + * exceed the maximum supported number of nested grid launches. + */ + public static final int cudaErrorLaunchMaxDepthExceeded = 65; + + /** + * This error indicates that a grid launch did not occur because the kernel uses file-scoped textures which are + * unsupported by the device runtime. Kernels launched via the device runtime only support textures created with the + * Texture Object API's. + */ + public static final int cudaErrorLaunchFileScopedTex = 66; + + /** + * This error indicates that a grid launch did not occur because the kernel uses file-scoped surfaces which are + * unsupported by the device runtime. Kernels launched via the device runtime only support surfaces created with the + * Surface Object API's. + */ + public static final int cudaErrorLaunchFileScopedSurf = 67; + + /** + * This error indicates that a call to cudaDeviceSynchronize made from the device runtime failed because the call + * was made at grid depth greater than either the default (2 levels of grids) or user specified device limit + * cudaLimitDevRuntimeSyncDepth. To be able to synchronize on launched grids at a greater depth successfully, the + * maximum nested depth at which cudaDeviceSynchronize will be called must be specified with the + * cudaLimitDevRuntimeSyncDepth limit to the cudaDeviceSetLimit api before the host-side launch of a kernel using + * the device runtime. Keep in mind that additional levels of sync depth require the runtime to reserve large + * amounts of device memory that cannot be used for user allocations. Note that cudaDeviceSynchronize made from + * device runtime is only supported on devices of compute capability lass than 9.0. + */ + public static final int cudaErrorSyncDepthExceeded = 68; + + /** + * This error indicates that a device runtime grid launch failed because the launch would exceed the limit + * cudaLimitDevRuntimePendingLaunchCount. For this launch to proceed successfully, cudaDeviceSetLimit must be called + * to set the cudaLimitDevRuntimePendingLaunchCount to be higher than the upper bound of outstanding launches that + * can be issued to the device runtime. Keep in mind that raising the limit of pending device runtime launches will + * require the runtime to reserve device memory that cannot be used for user allocations. + */ + public static final int cudaErrorLaunchPendingCountExceeded = 69; + + /** + * The requested device function does not exist or is not compiled for the proper device architecture. + */ + public static final int cudaErrorInvalidDeviceFunction = 98; + + /** + * This indicates that no CUDA-capable devices were detected by the installed CUDA driver. + */ + public static final int cudaErrorNoDevice = 100; + + /** + * This indicates that the device ordinal supplied by the user does not correspond to a valid CUDA device or that + * the action requested is invalid for the specified device. + */ + public static final int cudaErrorInvalidDevice = 101; + + /** + * This indicates that the device doesn't have a valid Grid License. + */ + public static final int cudaErrorDeviceNotLicensed = 102; + + /** + * By default, the CUDA runtime may perform a minimal set of self-tests, as well as CUDA driver tests, to establish + * the validity of both. Introduced in CUDA 11.2, this error return indicates that at least one of these tests has + * failed and the validity of either the runtime or the driver could not be established. + */ + public static final int cudaErrorSoftwareValidityNotEstablished = 103; + + /** + * This indicates an internal startup failure in the CUDA runtime. + */ + public static final int cudaErrorStartupFailure = 127; + + /** + * This indicates that the device kernel image is invalid. + */ + public static final int cudaErrorInvalidKernelImage = 200; + + /** + * This most frequently indicates that there is no context bound to the current thread. This can also be returned if + * the context passed to an API call is not a valid handle (such as a context that has had cuCtxDestroy() invoked on + * it). This can also be returned if a user mixes different API versions (i.e. 3010 context with 3020 API calls). + * See cuCtxGetApiVersion() for more details. + */ + public static final int cudaErrorDeviceUninitialized = 201; + + /** + * This indicates that the buffer object could not be mapped. + */ + public static final int cudaErrorMapBufferObjectFailed = 205; + + /** + * This indicates that the buffer object could not be unmapped. + */ + public static final int cudaErrorUnmapBufferObjectFailed = 206; + + /** + * This indicates that the specified array is currently mapped and thus cannot be destroyed. + */ + public static final int cudaErrorArrayIsMapped = 207; + + /** + * This indicates that the resource is already mapped. + */ + public static final int cudaErrorAlreadyMapped = 208; + + /** + * This indicates that there is no kernel image available that is suitable for the device. This can occur when a + * user specifies code generation options for a particular CUDA source file that do not include the corresponding + * device configuration. + */ + public static final int cudaErrorNoKernelImageForDevice = 209; + + /** + * This indicates that a resource has already been acquired. + */ + public static final int cudaErrorAlreadyAcquired = 210; + + /** + * This indicates that a resource is not mapped. + */ + public static final int cudaErrorNotMapped = 211; + + /** + * This indicates that a mapped resource is not available for access as an array. + */ + public static final int cudaErrorNotMappedAsArray = 212; + + /** + * This indicates that a mapped resource is not available for access as a pointer. + */ + public static final int cudaErrorNotMappedAsPointer = 213; + + /** + * This indicates that an uncorrectable ECC error was detected during execution. + */ + public static final int cudaErrorECCUncorrectable = 214; + + /** + * This indicates that the cudaLimit passed to the API call is not supported by the active device. + */ + public static final int cudaErrorUnsupportedLimit = 215; + + /** + * This indicates that a call tried to access an exclusive-thread device that is already in use by a different + * thread. + */ + public static final int cudaErrorDeviceAlreadyInUse = 216; + + /** + * This error indicates that P2P access is not supported across the given devices. + */ + public static final int cudaErrorPeerAccessUnsupported = 217; + + /** + * A PTX compilation failed. The runtime may fall back to compiling PTX if an application does not contain a + * suitable binary for the current device. + */ + public static final int cudaErrorInvalidPtx = 218; + + /** + * This indicates an error with the OpenGL or DirectX context. + */ + public static final int cudaErrorInvalidGraphicsContext = 219; + + /** + * This indicates that an uncorrectable NVLink error was detected during the execution. + */ + public static final int cudaErrorNvlinkUncorrectable = 220; + + /** + * This indicates that the PTX JIT compiler library was not found. The JIT Compiler library is used for PTX + * compilation. The runtime may fall back to compiling PTX if an application does not contain a suitable binary for + * the current device. + */ + public static final int cudaErrorJitCompilerNotFound = 221; + + /** + * This indicates that the provided PTX was compiled with an unsupported toolchain. The most common reason for this, + * is the PTX was generated by a compiler newer than what is supported by the CUDA driver and PTX JIT compiler. + */ + public static final int cudaErrorUnsupportedPtxVersion = 222; + + /** + * This indicates that the JIT compilation was disabled. The JIT compilation compiles PTX. The runtime may fall back + * to compiling PTX if an application does not contain a suitable binary for the current device. + */ + public static final int cudaErrorJitCompilationDisabled = 223; + + /** + * This indicates that the provided execution affinity is not supported by the device. + */ + public static final int cudaErrorUnsupportedExecAffinity = 224; + + /** + * This indicates that the code to be compiled by the PTX JIT contains unsupported call to cudaDeviceSynchronize. + */ + public static final int cudaErrorUnsupportedDevSideSync = 225; + + /** + * This indicates that an exception occurred on the device that is now contained by the GPU's error containment + * capability. Common causes are - a. Certain types of invalid accesses of peer GPU memory over nvlink b. Certain + * classes of hardware errors This leaves the process in an inconsistent state and any further CUDA work will return + * the same error. To continue using CUDA, the process must be terminated and relaunched + */ + public static final int cudaErrorContained = 226; + + /** + * This indicates that the device kernel source is invalid. + */ + public static final int cudaErrorInvalidSource = 300; + + /** + * This indicates that the file specified was not found. + */ + public static final int cudaErrorFileNotFound = 301; + + /** + * This indicates that a link to a shared object failed to resolve. + */ + public static final int cudaErrorSharedObjectSymbolNotFound = 302; + + /** + * This indicates that initialization of a shared object failed. + */ + public static final int cudaErrorSharedObjectInitFailed = 303; + + /** + * This error indicates that an OS call failed. + */ + public static final int cudaErrorOperatingSystem = 304; + + /** + * This indicates that a resource handle passed to the API call was not valid. Resource handles are opaque types + * like cudaStream_t and cudaEvent_t. + */ + public static final int cudaErrorInvalidResourceHandle = 400; + + /** + * This indicates that a resource required by the API call is not in a valid state to perform the requested + * operation. + */ + public static final int cudaErrorIllegalState = 401; + + /** + * This indicates an attempt was made to introspect an object in a way that would discard semantically important + * information. This is either due to the object using funtionality newer than the API version used to introspect it + * or omission of optional return arguments. + */ + public static final int cudaErrorLossyQuery = 402; + + /** + * This indicates that a named symbol was not found. Examples of symbols are global/constant variable names, driver + * function names, texture names, and surface names. + */ + public static final int cudaErrorSymbolNotFound = 500; + + /** + * This indicates that asynchronous operations issued previously have not completed yet. This result is not actually + * an error, but must be indicated differently than cudaSuccess (which indicates completion). Calls that may return + * this value include cudaEventQuery() and cudaStreamQuery(). + */ + public static final int cudaErrorNotReady = 600; + + /** + * The device encountered a load or store instruction on an invalid memory address. This leaves the process in an + * inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must + * be terminated and relaunched. + */ + public static final int cudaErrorIllegalAddress = 700; + + /** + * This indicates that a launch did not occur because it did not have appropriate resources. Although this error is + * similar to cudaErrorInvalidConfiguration, this error usually indicates that the user has attempted to pass too + * many arguments to the device kernel, or the kernel launch specifies too many threads for the kernel's register + * count. + */ + public static final int cudaErrorLaunchOutOfResources = 701; + + /** + * This indicates that the device kernel took too long to execute. This can only occur if timeouts are enabled - see + * the device property kernelExecTimeoutEnabled for more information. This leaves the process in an inconsistent + * state and any further CUDA work will return the same error. To continue using CUDA, the process must be + * terminated and relaunched. + */ + public static final int cudaErrorLaunchTimeout = 702; + + /** + * This error indicates a kernel launch that uses an incompatible texturing mode. + */ + public static final int cudaErrorLaunchIncompatibleTexturing = 703; + + /** + * This error indicates that a call to cudaDeviceEnablePeerAccess() is trying to re-enable peer addressing on from a + * context which has already had peer addressing enabled. + */ + public static final int cudaErrorPeerAccessAlreadyEnabled = 704; + + /** + * This error indicates that cudaDeviceDisablePeerAccess() is trying to disable peer addressing which has not been + * enabled yet via cudaDeviceEnablePeerAccess() + */ + public static final int cudaErrorPeerAccessNotEnabled = 705; + + /** + * This indicates that the user has called cudaSetValidDevices(), cudaSetDeviceFlags(), cudaD3D9SetDirect3DDevice(), + * cudaD3D10SetDirect3DDevice, cudaD3D11SetDirect3DDevice(), or cudaVDPAUSetVDPAUDevice() after initializing the + * CUDA runtime by calling non-device management operations (allocating memory and launching kernels are examples of + * non-device management operations). This error can also be returned if using runtime/driver interoperability and + * there is an existing CUcontext active on the host thread. + */ + public static final int cudaErrorSetOnActiveProcess = 708; + + /** + * This error indicates that the context current to the calling thread has been destroyed using cuCtxDestroy, or is + * a primary context which has not yet been initialized. + */ + public static final int cudaErrorContextIsDestroyed = 709; + + /** + * An assert triggered in device code during kernel execution. The device cannot be used again. All existing + * allocations are invalid. To continue using CUDA, the process must be terminated and relaunched + */ + public static final int cudaErrorAssert = 710; + + /** + * This error indicates that the hardware resources required to enable peer access have been exhausted for one or + * more of the devices passed to cudaEnablePeerAccess(). + */ + public static final int cudaErrorTooManyPeers = 711; + + /** + * This error indicates that the memory range passed to cudaHostRegister() has already been registered. + */ + public static final int cudaErrorHostMemoryAlreadyRegistered = 712; + + /** + * This error indicates that the pointer passed to cudaHostUnregister() does not correspond to any currently + * registered memory region. + */ + public static final int cudaErrorHostMemoryNotRegistered = 713; + + /** + * Device encountered an error in the call stack during kernel execution, possibly due to stack corruption or + * exceeding the stack size limit. This leaves the process in an inconsistent state and any further CUDA work will + * return the same error. To continue using CUDA, the process must be terminated and relaunched. + */ + public static final int cudaErrorHardwareStackError = 714; + + /** + * The device encountered an illegal instruction during kernel execution This leaves the process in an inconsistent + * state and any further CUDA work will return the same error. To continue using CUDA, the process must be + * terminated and relaunched. + */ + public static final int cudaErrorIllegalInstruction = 715; + + /** + * The device encountered a load or store instruction on a memory address which is not aligned. This leaves the + * process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, + * the process must be terminated and relaunched. + */ + public static final int cudaErrorMisalignedAddress = 716; + + /** + * While executing a kernel, the device encountered an instruction which can only operate on memory locations in + * certain address spaces (global, shared, or local), but was supplied a memory address not belonging to an allowed + * address space. This leaves the process in an inconsistent state and any further CUDA work will return the same + * error. To continue using CUDA, the process must be terminated and relaunched. + */ + public static final int cudaErrorInvalidAddressSpace = 717; + + /** + * The device encountered an invalid program counter. This leaves the process in an inconsistent state and any + * further CUDA work will return the same error. To continue using CUDA, the process must be terminated and + * relaunched. + */ + public static final int cudaErrorInvalidPc = 718; + + /** + * An exception occurred on the device while executing a kernel. Common causes include dereferencing an invalid + * device pointer and accessing out of bounds shared memory. Less common cases can be system specific - more + * information about these cases can be found in the system specific user guide. This leaves the process in an + * inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must + * be terminated and relaunched. + */ + public static final int cudaErrorLaunchFailure = 719; + + /** + * This error indicates that the number of blocks launched per grid for a kernel that was launched via either + * cudaLaunchCooperativeKernel or cudaLaunchCooperativeKernelMultiDevice exceeds the maximum number of blocks as + * allowed by cudaOccupancyMaxActiveBlocksPerMultiprocessor or + * cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors as specified by the + * device attribute cudaDevAttrMultiProcessorCount. + */ + public static final int cudaErrorCooperativeLaunchTooLarge = 720; + + /** + * An exception occurred on the device while exiting a kernel using tensor memory: the tensor memory was not + * completely deallocated. This leaves the process in an inconsistent state and any further CUDA work will return + * the same error. To continue using CUDA, the process must be terminated and relaunched. + */ + public static final int cudaErrorTensorMemoryLeak = 721; + + /** + * This error indicates the attempted operation is not permitted. + */ + public static final int cudaErrorNotPermitted = 800; + + /** + * This error indicates the attempted operation is not supported on the current system or device. + */ + public static final int cudaErrorNotSupported = 801; + + /** + * This error indicates that the system is not yet ready to start any CUDA work. To continue using CUDA, verify the + * system configuration is in a valid state and all required driver daemons are actively running. More information + * about this error can be found in the system specific user guide. + */ + public static final int cudaErrorSystemNotReady = 802; + + /** + * This error indicates that there is a mismatch between the versions of the display driver and the CUDA driver. + * Refer to the compatibility documentation for supported versions. + */ + public static final int cudaErrorSystemDriverMismatch = 803; + + /** + * This error indicates that the system was upgraded to run with forward compatibility but the visible hardware + * detected by CUDA does not support this configuration. Refer to the compatibility documentation for the supported + * hardware matrix or ensure that only supported hardware is visible during initialization via the + * CUDA_VISIBLE_DEVICES environment variable. + */ + public static final int cudaErrorCompatNotSupportedOnDevice = 804; + + /** + * This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS server. + */ + public static final int cudaErrorMpsConnectionFailed = 805; + + /** + * This error indicates that the remote procedural call between the MPS server and the MPS client failed. + */ + public static final int cudaErrorMpsRpcFailure = 806; + + /** + * This error indicates that the MPS server is not ready to accept new MPS client requests. This error can be + * returned when the MPS server is in the process of recovering from a fatal failure. + */ + public static final int cudaErrorMpsServerNotReady = 807; + + /** + * This error indicates that the hardware resources required to create MPS client have been exhausted. + */ + public static final int cudaErrorMpsMaxClientsReached = 808; + + /** + * This error indicates the hardware resources required to device connections have been exhausted. + */ + public static final int cudaErrorMpsMaxConnectionsReached = 809; + + /** + * This error indicates that the MPS client has been terminated by the server. To continue using CUDA, the process + * must be terminated and relaunched. + */ + public static final int cudaErrorMpsClientTerminated = 810; + + /** + * This error indicates, that the program is using CUDA Dynamic Parallelism, but the current configuration, like + * MPS, does not support it. + */ + public static final int cudaErrorCdpNotSupported = 811; + + /** + * This error indicates, that the program contains an unsupported interaction between different versions of CUDA + * Dynamic Parallelism. + */ + public static final int cudaErrorCdpVersionMismatch = 812; + + /** + * The operation is not permitted when the stream is capturing. + */ + public static final int cudaErrorStreamCaptureUnsupported = 900; + + /** + * The current capture sequence on the stream has been invalidated due to a previous error. + */ + public static final int cudaErrorStreamCaptureInvalidated = 901; + + /** + * The operation would have resulted in a merge of two independent capture sequences. + */ + public static final int cudaErrorStreamCaptureMerge = 902; + + /** + * The capture was not initiated in this stream. + */ + public static final int cudaErrorStreamCaptureUnmatched = 903; + + /** + * The capture sequence contains a fork that was not joined to the primary stream. + */ + public static final int cudaErrorStreamCaptureUnjoined = 904; + + /** + * A dependency would have been created which crosses the capture sequence boundary. Only implicit in-stream + * ordering dependencies are allowed to cross the boundary. + */ + public static final int cudaErrorStreamCaptureIsolation = 905; + + /** + * The operation would have resulted in a disallowed implicit dependency on a current capture sequence from + * cudaStreamLegacy. + */ + public static final int cudaErrorStreamCaptureImplicit = 906; + + /** + * The operation is not permitted on an event which was last recorded in a capturing stream. + */ + public static final int cudaErrorCapturedEvent = 907; + + /** + * A stream capture sequence not initiated with the cudaStreamCaptureModeRelaxed argument to cudaStreamBeginCapture + * was passed to cudaStreamEndCapture in a different thread. + */ + public static final int cudaErrorStreamCaptureWrongThread = 908; + + /** + * This indicates that the wait operation has timed out. + */ + public static final int cudaErrorTimeout = 909; + + /** + * This error indicates that the graph update was not performed because it included changes which violated + * constraints specific to instantiated graph update. + */ + public static final int cudaErrorGraphExecUpdateFailure = 910; + + /** + * This indicates that an async error has occurred in a device outside of CUDA. If CUDA was waiting for an external + * device's signal before consuming shared data, the external device signaled an error indicating that the data is + * not valid for consumption. This leaves the process in an inconsistent state and any further CUDA work will return + * the same error. To continue using CUDA, the process must be terminated and relaunched. + */ + public static final int cudaErrorExternalDevice = 911; + + /** + * This indicates that a kernel launch error has occurred due to cluster misconfiguration. + */ + public static final int cudaErrorInvalidClusterSize = 912; + + /** + * Indiciates a function handle is not loaded when calling an API that requires a loaded function. + */ + public static final int cudaErrorFunctionNotLoaded = 913; + + /** + * This error indicates one or more resources passed in are not valid resource types for the operation. + */ + public static final int cudaErrorInvalidResourceType = 914; + + /** + * This error indicates one or more resources are insufficient or non-applicable for the operation. + */ + public static final int cudaErrorInvalidResourceConfiguration = 915; + + /** + * This indicates that an unknown internal error has occurred. + */ + public static final int cudaErrorUnknown = 999; + + public static final int cudaErrorApiFailureBase = 10000; + + /** + * Returns the string representation of the passes error code. + */ + public static String errorString(int err){ + return switch(err){ + case cudaSuccess -> "cudaSuccess"; + case cudaErrorInvalidValue -> "cudaErrorInvalidValue"; + case cudaErrorMemoryAllocation -> "cudaErrorMemoryAllocation"; + case cudaErrorInitializationError -> "cudaErrorInitializationError"; + case cudaErrorCudartUnloading -> "cudaErrorCudartUnloading"; + case cudaErrorProfilerDisabled -> "cudaErrorProfilerDisabled"; + case cudaErrorProfilerNotInitialized -> "cudaErrorProfilerNotInitialized"; + case cudaErrorProfilerAlreadyStarted -> "cudaErrorProfilerAlreadyStarted"; + case cudaErrorProfilerAlreadyStopped -> "cudaErrorProfilerAlreadyStopped"; + case cudaErrorInvalidConfiguration -> "cudaErrorInvalidConfiguration"; + case cudaErrorInvalidPitchValue -> "cudaErrorInvalidPitchValue"; + case cudaErrorInvalidSymbol -> "cudaErrorInvalidSymbol"; + case cudaErrorInvalidHostPointer -> "cudaErrorInvalidHostPointer"; + case cudaErrorInvalidDevicePointer -> "cudaErrorInvalidDevicePointer"; + case cudaErrorInvalidTexture -> "cudaErrorInvalidTexture"; + case cudaErrorInvalidTextureBinding -> "cudaErrorInvalidTextureBinding"; + case cudaErrorInvalidChannelDescriptor -> "cudaErrorInvalidChannelDescriptor"; + case cudaErrorInvalidMemcpyDirection -> "cudaErrorInvalidMemcpyDirection"; + case cudaErrorAddressOfConstant -> "cudaErrorAddressOfConstant"; + case cudaErrorTextureFetchFailed -> "cudaErrorTextureFetchFailed"; + case cudaErrorTextureNotBound -> "cudaErrorTextureNotBound"; + case cudaErrorSynchronizationError -> "cudaErrorSynchronizationError"; + case cudaErrorInvalidFilterSetting -> "cudaErrorInvalidFilterSetting"; + case cudaErrorInvalidNormSetting -> "cudaErrorInvalidNormSetting"; + case cudaErrorMixedDeviceExecution -> "cudaErrorMixedDeviceExecution"; + case cudaErrorNotYetImplemented -> "cudaErrorNotYetImplemented"; + case cudaErrorMemoryValueTooLarge -> "cudaErrorMemoryValueTooLarge"; + case cudaErrorStubLibrary -> "cudaErrorStubLibrary"; + case cudaErrorInsufficientDriver -> "cudaErrorInsufficientDriver"; + case cudaErrorCallRequiresNewerDriver -> "cudaErrorCallRequiresNewerDriver"; + case cudaErrorInvalidSurface -> "cudaErrorInvalidSurface"; + case cudaErrorDuplicateVariableName -> "cudaErrorDuplicateVariableName"; + case cudaErrorDuplicateTextureName -> "cudaErrorDuplicateTextureName"; + case cudaErrorDuplicateSurfaceName -> "cudaErrorDuplicateSurfaceName"; + case cudaErrorDevicesUnavailable -> "cudaErrorDevicesUnavailable"; + case cudaErrorIncompatibleDriverContext -> "cudaErrorIncompatibleDriverContext"; + case cudaErrorMissingConfiguration -> "cudaErrorMissingConfiguration"; + case cudaErrorPriorLaunchFailure -> "cudaErrorPriorLaunchFailure"; + case cudaErrorLaunchMaxDepthExceeded -> "cudaErrorLaunchMaxDepthExceeded"; + case cudaErrorLaunchFileScopedTex -> "cudaErrorLaunchFileScopedTex"; + case cudaErrorLaunchFileScopedSurf -> "cudaErrorLaunchFileScopedSurf"; + case cudaErrorSyncDepthExceeded -> "cudaErrorSyncDepthExceeded"; + case cudaErrorLaunchPendingCountExceeded -> "cudaErrorLaunchPendingCountExceeded"; + case cudaErrorInvalidDeviceFunction -> "cudaErrorInvalidDeviceFunction"; + case cudaErrorNoDevice -> "cudaErrorNoDevice"; + case cudaErrorInvalidDevice -> "cudaErrorInvalidDevice"; + case cudaErrorDeviceNotLicensed -> "cudaErrorDeviceNotLicensed"; + case cudaErrorSoftwareValidityNotEstablished -> "cudaErrorSoftwareValidityNotEstablished"; + case cudaErrorStartupFailure -> "cudaErrorStartupFailure"; + case cudaErrorInvalidKernelImage -> "cudaErrorInvalidKernelImage"; + case cudaErrorDeviceUninitialized -> "cudaErrorDeviceUninitialized"; + case cudaErrorMapBufferObjectFailed -> "cudaErrorMapBufferObjectFailed"; + case cudaErrorUnmapBufferObjectFailed -> "cudaErrorUnmapBufferObjectFailed"; + case cudaErrorArrayIsMapped -> "cudaErrorArrayIsMapped"; + case cudaErrorAlreadyMapped -> "cudaErrorAlreadyMapped"; + case cudaErrorNoKernelImageForDevice -> "cudaErrorNoKernelImageForDevice"; + case cudaErrorAlreadyAcquired -> "cudaErrorAlreadyAcquired"; + case cudaErrorNotMapped -> "cudaErrorNotMapped"; + case cudaErrorNotMappedAsArray -> "cudaErrorNotMappedAsArray"; + case cudaErrorNotMappedAsPointer -> "cudaErrorNotMappedAsPointer"; + case cudaErrorECCUncorrectable -> "cudaErrorECCUncorrectable"; + case cudaErrorUnsupportedLimit -> "cudaErrorUnsupportedLimit"; + case cudaErrorDeviceAlreadyInUse -> "cudaErrorDeviceAlreadyInUse"; + case cudaErrorPeerAccessUnsupported -> "cudaErrorPeerAccessUnsupported"; + case cudaErrorInvalidPtx -> "cudaErrorInvalidPtx"; + case cudaErrorInvalidGraphicsContext -> "cudaErrorInvalidGraphicsContext"; + case cudaErrorNvlinkUncorrectable -> "cudaErrorNvlinkUncorrectable"; + case cudaErrorJitCompilerNotFound -> "cudaErrorJitCompilerNotFound"; + case cudaErrorUnsupportedPtxVersion -> "cudaErrorUnsupportedPtxVersion"; + case cudaErrorJitCompilationDisabled -> "cudaErrorJitCompilationDisabled"; + case cudaErrorUnsupportedExecAffinity -> "cudaErrorUnsupportedExecAffinity"; + case cudaErrorUnsupportedDevSideSync -> "cudaErrorUnsupportedDevSideSync"; + case cudaErrorContained -> "cudaErrorContained"; + case cudaErrorInvalidSource -> "cudaErrorInvalidSource"; + case cudaErrorFileNotFound -> "cudaErrorFileNotFound"; + case cudaErrorSharedObjectSymbolNotFound -> "cudaErrorSharedObjectSymbolNotFound"; + case cudaErrorSharedObjectInitFailed -> "cudaErrorSharedObjectInitFailed"; + case cudaErrorOperatingSystem -> "cudaErrorOperatingSystem"; + case cudaErrorInvalidResourceHandle -> "cudaErrorInvalidResourceHandle"; + case cudaErrorIllegalState -> "cudaErrorIllegalState"; + case cudaErrorLossyQuery -> "cudaErrorLossyQuery"; + case cudaErrorSymbolNotFound -> "cudaErrorSymbolNotFound"; + case cudaErrorNotReady -> "cudaErrorNotReady"; + case cudaErrorIllegalAddress -> "cudaErrorIllegalAddress"; + case cudaErrorLaunchOutOfResources -> "cudaErrorLaunchOutOfResources"; + case cudaErrorLaunchTimeout -> "cudaErrorLaunchTimeout"; + case cudaErrorLaunchIncompatibleTexturing -> "cudaErrorLaunchIncompatibleTexturing"; + case cudaErrorPeerAccessAlreadyEnabled -> "cudaErrorPeerAccessAlreadyEnabled"; + case cudaErrorPeerAccessNotEnabled -> "cudaErrorPeerAccessNotEnabled"; + case cudaErrorSetOnActiveProcess -> "cudaErrorSetOnActiveProcess"; + case cudaErrorContextIsDestroyed -> "cudaErrorContextIsDestroyed"; + case cudaErrorAssert -> "cudaErrorAssert"; + case cudaErrorTooManyPeers -> "cudaErrorTooManyPeers"; + case cudaErrorHostMemoryAlreadyRegistered -> "cudaErrorHostMemoryAlreadyRegistered"; + case cudaErrorHostMemoryNotRegistered -> "cudaErrorHostMemoryNotRegistered"; + case cudaErrorHardwareStackError -> "cudaErrorHardwareStackError"; + case cudaErrorIllegalInstruction -> "cudaErrorIllegalInstruction"; + case cudaErrorMisalignedAddress -> "cudaErrorMisalignedAddress"; + case cudaErrorInvalidAddressSpace -> "cudaErrorInvalidAddressSpace"; + case cudaErrorInvalidPc -> "cudaErrorInvalidPc"; + case cudaErrorLaunchFailure -> "cudaErrorLaunchFailure"; + case cudaErrorCooperativeLaunchTooLarge -> "cudaErrorCooperativeLaunchTooLarge"; + case cudaErrorTensorMemoryLeak -> "cudaErrorTensorMemoryLeak"; + case cudaErrorNotPermitted -> "cudaErrorNotPermitted"; + case cudaErrorNotSupported -> "cudaErrorNotSupported"; + case cudaErrorSystemNotReady -> "cudaErrorSystemNotReady"; + case cudaErrorSystemDriverMismatch -> "cudaErrorSystemDriverMismatch"; + case cudaErrorCompatNotSupportedOnDevice -> "cudaErrorCompatNotSupportedOnDevice"; + case cudaErrorMpsConnectionFailed -> "cudaErrorMpsConnectionFailed"; + case cudaErrorMpsRpcFailure -> "cudaErrorMpsRpcFailure"; + case cudaErrorMpsServerNotReady -> "cudaErrorMpsServerNotReady"; + case cudaErrorMpsMaxClientsReached -> "cudaErrorMpsMaxClientsReached"; + case cudaErrorMpsMaxConnectionsReached -> "cudaErrorMpsMaxConnectionsReached"; + case cudaErrorMpsClientTerminated -> "cudaErrorMpsClientTerminated"; + case cudaErrorCdpNotSupported -> "cudaErrorCdpNotSupported"; + case cudaErrorCdpVersionMismatch -> "cudaErrorCdpVersionMismatch"; + case cudaErrorStreamCaptureUnsupported -> "cudaErrorStreamCaptureUnsupported"; + case cudaErrorStreamCaptureInvalidated -> "cudaErrorStreamCaptureInvalidated"; + case cudaErrorStreamCaptureMerge -> "cudaErrorStreamCaptureMerge"; + case cudaErrorStreamCaptureUnmatched -> "cudaErrorStreamCaptureUnmatched"; + case cudaErrorStreamCaptureUnjoined -> "cudaErrorStreamCaptureUnjoined"; + case cudaErrorStreamCaptureIsolation -> "cudaErrorStreamCaptureIsolation"; + case cudaErrorStreamCaptureImplicit -> "cudaErrorStreamCaptureImplicit"; + case cudaErrorCapturedEvent -> "cudaErrorCapturedEvent"; + case cudaErrorStreamCaptureWrongThread -> "cudaErrorStreamCaptureWrongThread"; + case cudaErrorTimeout -> "cudaErrorTimeout"; + case cudaErrorGraphExecUpdateFailure -> "cudaErrorGraphExecUpdateFailure"; + case cudaErrorExternalDevice -> "cudaErrorExternalDevice"; + case cudaErrorInvalidClusterSize -> "cudaErrorInvalidClusterSize"; + case cudaErrorFunctionNotLoaded -> "cudaErrorFunctionNotLoaded"; + case cudaErrorInvalidResourceType -> "cudaErrorInvalidResourceType"; + case cudaErrorInvalidResourceConfiguration -> "cudaErrorInvalidResourceConfiguration"; + case cudaErrorUnknown -> "cudaErrorUnknown"; + case cudaErrorApiFailureBase -> "cudaErrorApiFailureBase"; + default -> "Invalid error"; + }; + } + + private CudaError() { + // prevent instantiation. + } + +} diff --git a/src/main/java/org/apache/sysds/cujava/runtime/CudaMemcpyKind.java b/src/main/java/org/apache/sysds/cujava/runtime/CudaMemcpyKind.java new file mode 100644 index 00000000000..900cc153e9a --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/runtime/CudaMemcpyKind.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.runtime; + +public class CudaMemcpyKind { + + /** + * Host -> Host + */ + public static final int cudaMemcpyHostToHost = 0; + + /** + * Host -> Device + */ + public static final int cudaMemcpyHostToDevice = 1; + + /** + * Device -> Host + */ + public static final int cudaMemcpyDeviceToHost = 2; + + /** + * Device -> Device + */ + public static final int cudaMemcpyDeviceToDevice = 3; + + /** + * Autodetect the copy direction (host↔device or device↔device) based on the source and destination pointers. + * Requires Unified Virtual Addressing (UVA). + */ + public static final int cudaMemcpyDefault = 4; + + private CudaMemcpyKind() { + // Private constructor to prevent instantiation. + } +} diff --git a/src/main/java/org/apache/sysds/cujava/runtime/cudaStream_t.java b/src/main/java/org/apache/sysds/cujava/runtime/cudaStream_t.java new file mode 100644 index 00000000000..60a6f6e2abe --- /dev/null +++ b/src/main/java/org/apache/sysds/cujava/runtime/cudaStream_t.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.cujava.runtime; + +import org.apache.sysds.cujava.NativePointerObject; +import org.apache.sysds.cujava.driver.CUstream; + +public class cudaStream_t extends NativePointerObject { + + public cudaStream_t() { + } + + public cudaStream_t(CUstream stream) { + super(stream); + } + + cudaStream_t(long value) { + super(value); + } +}