diff --git a/src/main/cpp/jni/CMakeLists.txt b/src/main/cpp/jni/CMakeLists.txt
new file mode 100644
index 00000000000..268e124b6bd
--- /dev/null
+++ b/src/main/cpp/jni/CMakeLists.txt
@@ -0,0 +1,30 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+cmake_minimum_required(VERSION 3.18)
+project(cujava_jni LANGUAGES CXX)
+
+# Build the subprojects
+add_subdirectory(common)
+add_subdirectory(runtime)
+add_subdirectory(driver)
+add_subdirectory(cusparse)
+add_subdirectory(cublas)
diff --git a/src/main/cpp/jni/build_cujava_libs.sh b/src/main/cpp/jni/build_cujava_libs.sh
new file mode 100755
index 00000000000..4ceaab2373f
--- /dev/null
+++ b/src/main/cpp/jni/build_cujava_libs.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+set -euo pipefail
+
+# Usage (from src/main/cpp/jni):
+#   chmod +x build_cujava_libs.sh
+#   ./build_cujava_libs.sh            # default build dir: ./build, type: Release
+
+BUILD_DIR="${1:-build}"
+BUILD_TYPE="${BUILD_TYPE:-Release}"
+
+echo "==> Configuring (BUILD_DIR=$BUILD_DIR, BUILD_TYPE=$BUILD_TYPE)"
+cmake -S . -B "$BUILD_DIR" -DCMAKE_BUILD_TYPE="$BUILD_TYPE"
+
+echo "==> Building"
+cmake --build "$BUILD_DIR" --config "$BUILD_TYPE" -j
+
+echo "==> Done. Artifacts should be in ../../lib"
+ls -l ../lib/libcujava_runtime.so || true
diff --git a/src/main/cpp/jni/common/CMakeLists.txt b/src/main/cpp/jni/common/CMakeLists.txt
new file mode 100644
index 00000000000..1450c394802
--- /dev/null
+++ b/src/main/cpp/jni/common/CMakeLists.txt
@@ -0,0 +1,54 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+cmake_minimum_required(VERSION 3.18)
+
+project(CuJavaCommonJNI LANGUAGES CXX)
+
+find_package(JNI REQUIRED)
+
+add_library(CuJavaCommonJNI STATIC
+    cujava_logger.cpp
+    cujava_jni_utils.cpp
+    cujava_pointer_utils.cpp
+)
+
+# PIC because this static lib is linked into shared libs
+set_target_properties(CuJavaCommonJNI PROPERTIES
+    CXX_STANDARD 11
+    POSITION_INDEPENDENT_CODE ON
+    ARCHIVE_OUTPUT_DIRECTORY  ${CMAKE_CURRENT_SOURCE_DIR}/../../lib
+    LIBRARY_OUTPUT_DIRECTORY  ${CMAKE_CURRENT_SOURCE_DIR}/../../lib
+    RUNTIME_OUTPUT_DIRECTORY  ${CMAKE_CURRENT_SOURCE_DIR}/../../lib
+)
+
+target_include_directories(CuJavaCommonJNI
+    PUBLIC
+        ${JNI_INCLUDE_DIRS}
+        ${CMAKE_CURRENT_SOURCE_DIR}          # headers in common/
+)
+
+# Propagate JNI to consumers (runtime, etc.)
+target_link_libraries(CuJavaCommonJNI
+    PUBLIC
+        ${JNI_LIBRARIES}
+)
+
diff --git a/src/main/cpp/jni/common/cujava_jni_utils.cpp b/src/main/cpp/jni/common/cujava_jni_utils.cpp
new file mode 100644
index 00000000000..e6e64c632fb
--- /dev/null
+++ b/src/main/cpp/jni/common/cujava_jni_utils.cpp
@@ -0,0 +1,276 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "cujava_jni_utils.hpp"
+#include "cujava_logger.hpp"
+
+// Cached method ID (same as JCuda; useful for convertString if you add it later)
+jmethodID String_getBytes = nullptr;
+
+
+int initJNIUtils(JNIEnv *env) {
+    jclass cls = nullptr;
+
+    // java.lang.String#getBytes()[B
+    if (!init(env, cls, "java/lang/String")) return JNI_ERR;
+    if (!init(env, cls, String_getBytes, "getBytes", "()[B")) return JNI_ERR;
+
+    return JNI_VERSION_1_4;
+}
+
+/** Find a class by name. */
+bool init(JNIEnv *env, jclass& cls, const char *name) {
+    cls = env->FindClass(name);
+    if (cls == nullptr) {
+        Logger::log(LOG_ERROR, "Failed to access class '%s'\n", name);
+        return false;
+    }
+    return true;
+}
+
+/** Create a global ref to a class. */
+bool initGlobal(JNIEnv *env, jclass &globalCls, const char *className) {
+    jclass cls = nullptr;
+    if (!init(env, cls, className)) return false;
+    globalCls = (jclass)env->NewGlobalRef(cls);
+    if (globalCls == nullptr) {
+        Logger::log(LOG_ERROR, "Failed to create reference to class %s\n", className);
+        return false;
+    }
+    return true;
+}
+
+/** Resolve a field ID. */
+bool init(JNIEnv *env, jclass cls, jfieldID& field, const char *name, const char *signature) {
+    field = env->GetFieldID(cls, name, signature);
+    if (field == nullptr) {
+        Logger::log(LOG_ERROR, "Failed to access field '%s' with signature '%s'\n", name, signature);
+        return false;
+    }
+    return true;
+}
+
+/** Resolve a method ID. */
+bool init(JNIEnv *env, jclass cls, jmethodID& method, const char *name, const char *signature) {
+    method = env->GetMethodID(cls, name, signature);
+    if (method == nullptr) {
+        Logger::log(LOG_ERROR, "Failed to access method '%s' with signature '%s'\n", name, signature);
+        return false;
+    }
+    return true;
+}
+
+/** Global class + no-args constructor, convenient helper. */
+bool init(JNIEnv *env, jclass &globalCls, jmethodID &constructor, const char *className) {
+    jclass cls = nullptr;
+    if (!init(env, cls, className)) return false;
+    if (!init(env, cls, constructor, "<init>", "()V")) return false;
+
+    globalCls = (jclass)env->NewGlobalRef(cls);
+    if (globalCls == nullptr) {
+        Logger::log(LOG_ERROR, "Failed to create reference to class %s\n", className);
+        return false;
+    }
+    return true;
+}
+
+/** Resolve the standard 'long nativePointer' field for a class. */
+bool initNativePointer(JNIEnv *env, jfieldID& field, const char *className) {
+    jclass cls = env->FindClass(className);
+    if (cls == nullptr) {
+        Logger::log(LOG_ERROR, "Failed to access class %s\n", className);
+        return false;
+    }
+    return init(env, cls, field, "nativePointer", "J");
+}
+
+/** Throw a Java exception by FQN. */
+void ThrowByName(JNIEnv *env, const char *name, const char *msg) {
+    jclass cls = env->FindClass(name);
+    if (cls != nullptr) {
+        env->ThrowNew(cls, msg ? msg : "");
+        env->DeleteLocalRef(cls);
+    }
+}
+
+/** Utility to set one element of a long[] array. */
+bool set(JNIEnv *env, jlongArray ja, int index, jlong value) {
+    if (ja == nullptr) return true;
+
+    jsize len = env->GetArrayLength(ja);
+    if (index < 0 || index >= len) {
+        ThrowByName(env, "java/lang/ArrayIndexOutOfBoundsException",
+                    "Array index out of bounds");
+        return false;
+    }
+
+    jlong *a = (jlong*)env->GetPrimitiveArrayCritical(ja, nullptr);
+    if (a == nullptr) return false;
+
+    a[index] = value;
+    env->ReleasePrimitiveArrayCritical(ja, a, 0);
+    return true;
+}
+
+/** Utility to set one element of an int[] array. */
+bool set(JNIEnv *env, jintArray ja, int index, jint value) {
+    if (ja == nullptr) {
+        return true;
+    }
+    jsize len = env->GetArrayLength(ja);
+    if (index < 0 || index >= len) {
+        ThrowByName(env, "java/lang/ArrayIndexOutOfBoundsException",
+            "Array index out of bounds");
+        return false;
+    }
+    jint *a = (jint*)env->GetPrimitiveArrayCritical(ja, NULL);
+    if (a == nullptr) {
+        return false;
+    }
+    a[index] = value;
+    env->ReleasePrimitiveArrayCritical(ja, a, 0);
+    return true;
+}
+
+/** Helpers for setting cudaDeviceProperties. */
+bool setFieldBytes(JNIEnv* env, jobject obj, jfieldID fid, const jbyte* src, jsize n) {
+    jbyteArray arr = (jbyteArray)env->GetObjectField(obj, fid);
+    if (arr == nullptr || env->GetArrayLength(arr) < n) {
+        jbyteArray tmp = env->NewByteArray(n);
+        if (tmp == nullptr) return false;
+        env->SetObjectField(obj, fid, tmp);
+        arr = tmp;
+    }
+    env->SetByteArrayRegion(arr, 0, n, src);
+    return !env->ExceptionCheck();
+}
+
+bool setFieldInts(JNIEnv* env, jobject obj, jfieldID fid, const jint* src, jsize n) {
+    jintArray arr = (jintArray)env->GetObjectField(obj, fid);
+    if (arr == nullptr || env->GetArrayLength(arr) < n) {
+        jintArray tmp = env->NewIntArray(n);
+        if (tmp == nullptr) return false;
+        env->SetObjectField(obj, fid, tmp);
+        arr = tmp;
+    }
+    env->SetIntArrayRegion(arr, 0, n, src);
+    return !env->ExceptionCheck();
+}
+
+bool zeroFieldInts(JNIEnv* env, jobject obj, jfieldID fid) {
+    jintArray arr = (jintArray)env->GetObjectField(obj, fid);
+    if (arr == nullptr) return true;
+    jsize n = env->GetArrayLength(arr);
+    if (n <= 0) return true;
+    jint* zeros = new (std::nothrow) jint[n]();
+    if (!zeros) {
+        ThrowByName(env, "java/lang/OutOfMemoryError", "Out of memory zeroing int array");
+        return false;
+    }
+    env->SetIntArrayRegion(arr, 0, n, zeros);
+    delete[] zeros;
+    return !env->ExceptionCheck();
+}
+
+
+char* toNativeCString(JNIEnv* env, jstring js, int* length) {
+    if (js == nullptr) return nullptr;
+
+    if (env->EnsureLocalCapacity(2) < 0) {
+        ThrowByName(env, "java/lang/OutOfMemoryError",
+                    "Out of memory during string reference creation");
+        return nullptr;
+    }
+
+    jbyteArray bytes = (jbyteArray)env->CallObjectMethod(js, String_getBytes);
+    if (env->ExceptionCheck() || bytes == nullptr) {
+        return nullptr;
+    }
+
+    jint len = env->GetArrayLength(bytes);
+    if (length) *length = (int)len;
+
+    char* out = new char[len + 1];
+    if (out == nullptr) {
+        ThrowByName(env, "java/lang/OutOfMemoryError",
+                    "Out of memory during string creation");
+        env->DeleteLocalRef(bytes);
+        return nullptr;
+    }
+
+    env->GetByteArrayRegion(bytes, 0, len, (jbyte*)out);
+    out[len] = '\0';
+    env->DeleteLocalRef(bytes);
+    return out;
+}
+
+
+bool allocNativeArrayFromJLongs(JNIEnv* env, jlongArray javaArr, size_t*& nativeArr, bool copyFromJava) {
+    if (javaArr == nullptr) {
+        nativeArr = nullptr;
+        return true;
+    }
+    jsize n = env->GetArrayLength(javaArr);
+
+    size_t* tmp = new (std::nothrow) size_t[(size_t)n];
+    if (!tmp) {
+        ThrowByName(env, "java/lang/OutOfMemoryError", "Out of memory during array creation");
+        nativeArr = nullptr;
+        return false;
+    }
+
+    if (copyFromJava) {
+        jlong* jptr = (jlong*)env->GetPrimitiveArrayCritical(javaArr, nullptr);
+        if (!jptr) {
+            delete[] tmp; nativeArr = nullptr; return false;
+        }
+        for (jsize i = 0; i < n; ++i) tmp[i] = (size_t)jptr[i];
+        env->ReleasePrimitiveArrayCritical(javaArr, jptr, JNI_ABORT); // input-only
+    }
+
+    nativeArr = tmp;
+    return true;
+}
+
+bool commitAndFreeNativeArrayToJLongs(JNIEnv* env, size_t*& nativeArr, jlongArray javaArr, bool copyToJava) {
+    if (javaArr == nullptr) {
+        delete[] nativeArr; nativeArr = nullptr; return true;
+    }
+    if (copyToJava && nativeArr) {
+        jsize n = env->GetArrayLength(javaArr);
+        jlong* jptr = (jlong*)env->GetPrimitiveArrayCritical(javaArr, nullptr);
+        if (!jptr) {
+            delete[] nativeArr; nativeArr = nullptr;
+            return false;
+        }
+        for (jsize i = 0; i < n; ++i) jptr[i] = (jlong)nativeArr[i];
+        env->ReleasePrimitiveArrayCritical(javaArr, jptr, 0); // commit
+    }
+    delete[] nativeArr;
+    nativeArr = nullptr;
+    return true;
+}
+
+// Back-compat wrappers
+bool initNative(JNIEnv* env, jlongArray javaArr, size_t*& nativeArr, bool fill) {
+    return allocNativeArrayFromJLongs(env, javaArr, nativeArr, fill);
+}
+bool releaseNative(JNIEnv* env, size_t*& nativeArr, jlongArray javaArr, bool writeBack) {
+    return commitAndFreeNativeArrayToJLongs(env, nativeArr, javaArr, writeBack);
+}
diff --git a/src/main/cpp/jni/common/cujava_jni_utils.hpp b/src/main/cpp/jni/common/cujava_jni_utils.hpp
new file mode 100644
index 00000000000..179ba706a0c
--- /dev/null
+++ b/src/main/cpp/jni/common/cujava_jni_utils.hpp
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef CUJAVA_JNI_UTILS_HPP
+#define CUJAVA_JNI_UTILS_HPP
+
+#include <jni.h>
+#include <new>
+
+bool init(JNIEnv* env, jclass& cls, const char* name);
+bool initGlobal(JNIEnv* env, jclass& globalCls, const char* className);
+bool init(JNIEnv* env, jclass cls, jfieldID& field, const char* name, const char* signature);
+bool init(JNIEnv* env, jclass cls, jmethodID& method, const char* name, const char* signature);
+bool init(JNIEnv* env, jclass& globalCls, jmethodID& constructor, const char* className);
+bool initNativePointer(JNIEnv* env, jfieldID& field, const char* className);
+bool set(JNIEnv *env, jlongArray ja, int index, jlong value);
+bool set(JNIEnv *env, jintArray ja, int index, jint value);
+bool setFieldBytes(JNIEnv* env, jobject obj, jfieldID fid, const jbyte* src, jsize n);
+bool setFieldInts (JNIEnv* env, jobject obj, jfieldID fid, const jint*  src, jsize n);
+bool zeroFieldInts(JNIEnv* env, jobject obj, jfieldID fid);
+char* toNativeCString(JNIEnv* env, jstring js, int* length = nullptr);
+bool allocNativeArrayFromJLongs(JNIEnv* env, jlongArray javaArr, size_t*& nativeArr, bool copyFromJava);
+bool commitAndFreeNativeArrayToJLongs(JNIEnv* env, size_t*& nativeArr, jlongArray javaArr, bool copyToJava);
+bool initNative(JNIEnv* env, jlongArray javaArr, size_t*& nativeArr, bool fill);
+bool releaseNative(JNIEnv* env, size_t*& nativeArr, jlongArray javaArr, bool writeBack);
+
+
+// ---- Exceptions ----
+void ThrowByName(JNIEnv* env, const char* name, const char* msg);
+
+// ---- Module init (optional; keep if called from JNI_OnLoad) ----
+int initJNIUtils(JNIEnv* env);
+
+// ---- Cached IDs (minimal) ----
+extern jmethodID String_getBytes; // ()[B
+
+#endif // CUJAVA_JNI_UTILS_HPP
+
diff --git a/src/main/cpp/jni/common/cujava_logger.cpp b/src/main/cpp/jni/common/cujava_logger.cpp
new file mode 100644
index 00000000000..367f68df62d
--- /dev/null
+++ b/src/main/cpp/jni/common/cujava_logger.cpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "cujava_logger.hpp"
+#include <cstdarg>
+#include <cstdio>
+
+LogLevel Logger::currentLogLevel = LOG_ERROR;
+
+void Logger::log(LogLevel level, const char *message, ...)
+{
+    if (level <= Logger::currentLogLevel)
+    {
+        va_list argp;
+        va_start(argp, message);
+        vfprintf(stdout, message, argp);
+        va_end(argp);
+        fflush(stdout);
+    }
+}
+
+void Logger::setLogLevel(LogLevel level)
+{
+    Logger::currentLogLevel = level;
+}
diff --git a/src/main/cpp/jni/common/cujava_logger.hpp b/src/main/cpp/jni/common/cujava_logger.hpp
new file mode 100644
index 00000000000..58a3d8ed859
--- /dev/null
+++ b/src/main/cpp/jni/common/cujava_logger.hpp
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+#ifndef CUJAVA_LOGGER_HPP
+#define CUJAVA_LOGGER_HPP
+
+#include <cstdarg>
+#include <cstdio>
+
+enum LogLevel {LOG_QUIET, LOG_ERROR, LOG_WARNING, LOG_INFO, LOG_DEBUG, LOG_TRACE, LOG_DEBUGTRACE};
+
+class Logger {
+public:
+    static void log(LogLevel level, const char* message, ...);
+    static void setLogLevel(LogLevel level);
+private:
+    static LogLevel currentLogLevel;
+};
+
+#endif
diff --git a/src/main/cpp/jni/common/cujava_pointer_utils.cpp b/src/main/cpp/jni/common/cujava_pointer_utils.cpp
new file mode 100644
index 00000000000..3a8480406f2
--- /dev/null
+++ b/src/main/cpp/jni/common/cujava_pointer_utils.cpp
@@ -0,0 +1,225 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+#include <jni.h>
+#include <cstdint>
+#include "cujava_logger.hpp"
+#include "cujava_jni_utils.hpp"
+#include "cujava_pointer_utils.hpp"
+
+// ---- cached IDs / classes (definitions; headers should declare them 'extern') ----
+jmethodID Object_getClass = nullptr;            // ()Ljava/lang/Class;
+jmethodID Class_getComponentType = nullptr;     // ()Ljava/lang/Class;
+jmethodID Class_newInstance = nullptr;          // ()Ljava/lang/Object;
+
+jmethodID Buffer_isDirect = nullptr;            // ()Z
+jmethodID Buffer_hasArray = nullptr;            // ()Z
+jmethodID Buffer_array    = nullptr;            // ()Ljava/lang/Object;
+
+jfieldID NativePointerObject_nativePointer = nullptr; // long
+
+jclass   Pointer_class    = nullptr;            // org.apache.sysds.cujava.Pointer (global ref)
+jfieldID Pointer_buffer   = nullptr;            // Ljava/nio/Buffer;
+jfieldID Pointer_pointers = nullptr;            // [Lorg/apache/sysds/cujava/NativePointerObject;
+jfieldID Pointer_byteOffset = nullptr;          // long
+
+jmethodID Pointer_constructor = nullptr;        // ()V
+
+// -----------------------------------------------------------------------------
+// Initialize field- and method IDs for Pointer/Buffer plumbing
+// -----------------------------------------------------------------------------
+int initPointerUtils(JNIEnv *env) {
+    jclass cls = nullptr;
+
+    // java.lang.Object#getClass()
+    if (!init(env, cls, "java/lang/Object")) return JNI_ERR;
+    if (!init(env, cls, Object_getClass, "getClass", "()Ljava/lang/Class;")) return JNI_ERR;
+
+    // java.lang.Class methods we may need later (kept to match JCuda shape)
+    if (!init(env, cls, "java/lang/Class")) return JNI_ERR;
+    if (!init(env, cls, Class_getComponentType, "getComponentType", "()Ljava/lang/Class;")) return JNI_ERR;
+    if (!init(env, cls, Class_newInstance,      "newInstance",      "()Ljava/lang/Object;")) return JNI_ERR;
+
+    // java.nio.Buffer: isDirect/hasArray/array
+    if (!init(env, cls, "java/nio/Buffer")) return JNI_ERR;
+    if (!init(env, cls, Buffer_isDirect, "isDirect", "()Z")) return JNI_ERR;
+    if (!init(env, cls, Buffer_hasArray, "hasArray", "()Z")) return JNI_ERR;
+    if (!init(env, cls, Buffer_array,    "array",    "()Ljava/lang/Object;")) return JNI_ERR;
+
+    // org.apache.sysds.cujava.NativePointerObject.nativePointer (long)
+    if (!init(env, cls, "org/apache/sysds/cujava/NativePointerObject")) return JNI_ERR;
+    if (!init(env, cls, NativePointerObject_nativePointer, "nativePointer", "J")) return JNI_ERR;
+
+    // org.apache.sysds.cujava.Pointer
+    if (!init(env, cls, "org/apache/sysds/cujava/Pointer")) return JNI_ERR;
+    Pointer_class = (jclass)env->NewGlobalRef(cls);
+    if (Pointer_class == nullptr) return JNI_ERR;
+
+    if (!init(env, cls, Pointer_buffer,     "buffer",     "Ljava/nio/Buffer;")) return JNI_ERR;
+    if (!init(env, cls, Pointer_pointers,   "pointers",   "[Lorg/apache/sysds/cujava/NativePointerObject;")) return JNI_ERR;
+    if (!init(env, cls, Pointer_byteOffset, "byteOffset", "J")) return JNI_ERR;
+    if (!init(env, cls, Pointer_constructor, "<init>", "()V")) return JNI_ERR;
+
+    return JNI_VERSION_1_4;
+}
+
+// -----------------------------------------------------------------------------
+// Helper: validate newly created PointerData
+// -----------------------------------------------------------------------------
+static PointerData* validatePointerData(JNIEnv *env, jobject nativePointerObject, PointerData *pointerData) {
+    if (pointerData == nullptr) {
+        ThrowByName(env, "java/lang/OutOfMemoryError",
+            "Out of memory while creating pointer data");
+        return nullptr;
+    }
+    if (!pointerData->init(env, nativePointerObject)) {
+        delete pointerData;
+        return nullptr;
+    }
+    return pointerData;
+}
+
+// -----------------------------------------------------------------------------
+// Factory: create a PointerData matching the Java-side object
+// (mirrors JCuda: Pointer array -> PointersArrayPointerData,
+//  Buffer direct -> DirectBufferPointerData,
+//  Buffer with array -> ArrayBufferPointerData,
+//  else Pointer(nativePointer+byteOffset) -> NativePointerData,
+//  else non-Pointer/NULL -> NativePointerObjectPointerData)
+// -----------------------------------------------------------------------------
+PointerData* initPointerData(JNIEnv *env, jobject nativePointerObject) {
+    Logger::log(LOG_DEBUGTRACE, "Initializing pointer data for Java NativePointerObject %p\n", nativePointerObject);
+
+    // NULL -> NativePointerObjectPointerData
+    if (nativePointerObject == nullptr) {
+        Logger::log(LOG_DEBUGTRACE, "Initializing NativePointerObjectPointerData\n");
+        auto *pd = new NativePointerObjectPointerData();
+        return validatePointerData(env, nativePointerObject, pd);
+    }
+
+    // If not an instance of Pointer -> NativePointerObjectPointerData
+    jboolean isPointer = env->IsInstanceOf(nativePointerObject, Pointer_class);
+    if (!isPointer) {
+        Logger::log(LOG_DEBUGTRACE, "Initializing NativePointerObjectPointerData\n");
+        auto *pd = new NativePointerObjectPointerData();
+        return validatePointerData(env, nativePointerObject, pd);
+    }
+
+    // If Pointer.pointers != null -> PointersArrayPointerData
+    jobjectArray pointersArray = (jobjectArray)env->GetObjectField(nativePointerObject, Pointer_pointers);
+    if (pointersArray != nullptr) {
+        Logger::log(LOG_DEBUGTRACE, "Initializing PointersArrayPointerData\n");
+        auto *pd = new PointersArrayPointerData();
+        return validatePointerData(env, nativePointerObject, pd);
+    }
+
+    // If Pointer.buffer != null -> Buffer paths
+    jobject buffer = env->GetObjectField(nativePointerObject, Pointer_buffer);
+    if (buffer != nullptr) {
+        // Direct buffer?
+        jboolean isDirect = env->CallBooleanMethod(buffer, Buffer_isDirect);
+        if (env->ExceptionCheck()) return nullptr;
+        if (isDirect == JNI_TRUE) {
+            Logger::log(LOG_DEBUGTRACE, "Initializing DirectBufferPointerData\n");
+            auto *pd = new DirectBufferPointerData();
+            return validatePointerData(env, nativePointerObject, pd);
+        }
+
+        // Backed by primitive array?
+        jboolean hasArray = env->CallBooleanMethod(buffer, Buffer_hasArray);
+        if (env->ExceptionCheck()) return nullptr;
+        if (hasArray == JNI_TRUE) {
+            Logger::log(LOG_DEBUGTRACE, "Initializing ArrayBufferPointerData\n");
+            auto *pd = new ArrayBufferPointerData();
+            return validatePointerData(env, nativePointerObject, pd);
+        }
+
+        // Neither direct nor array-backed -> error (should have been checked in Java)
+        Logger::log(LOG_ERROR, "Buffer is neither direct nor has an array\n");
+        ThrowByName(env, "java/lang/IllegalArgumentException",
+                    "Buffer is neither direct nor has an array");
+        return nullptr;
+    }
+
+    // Plain Pointer: nativePointer + byteOffset
+    Logger::log(LOG_DEBUGTRACE, "Initializing NativePointerData\n");
+    auto *pd = new NativePointerData();
+    return validatePointerData(env, nativePointerObject, pd);
+}
+
+// -----------------------------------------------------------------------------
+// Release helper: calls PointerData::release and deletes the object
+// -----------------------------------------------------------------------------
+bool releasePointerData(JNIEnv *env, PointerData* &pointerData, jint mode) {
+    if (pointerData == nullptr) return true;
+    if (!pointerData->release(env, mode)) return false;
+    delete pointerData;
+    pointerData = nullptr;
+    return true;
+}
+
+// -----------------------------------------------------------------------------
+// Misc helpers
+// -----------------------------------------------------------------------------
+bool isDirectByteBuffer(JNIEnv *env, jobject buffer) {
+    if (buffer == nullptr) return false;
+    jboolean isDirect = env->CallBooleanMethod(buffer, Buffer_isDirect);
+    if (env->ExceptionCheck()) return false;
+    return (isDirect == JNI_TRUE);
+}
+
+bool isPointerBackedByNativeMemory(JNIEnv *env, jobject object) {
+    if (object == nullptr) return false;
+
+    jlong np = env->GetLongField(object, NativePointerObject_nativePointer);
+    if (np != 0) return true;
+
+    jboolean isPtr = env->IsInstanceOf(object, Pointer_class);
+    if (isPtr) {
+        jobject buffer = env->GetObjectField(object, Pointer_buffer);
+        return isDirectByteBuffer(env, buffer);
+    }
+    return false;
+}
+
+void setNativePointerValue(JNIEnv *env, jobject nativePointerObject, jlong pointer) {
+    if (nativePointerObject == nullptr) return;
+    env->SetLongField(nativePointerObject, NativePointerObject_nativePointer, pointer);
+}
+
+void* getNativePointerValue(JNIEnv *env, jobject nativePointerObject) {
+    if (nativePointerObject == nullptr) return nullptr;
+    jlong p = env->GetLongField(nativePointerObject, NativePointerObject_nativePointer);
+    return (void*)(uintptr_t)p;
+}
+
+void setPointer(JNIEnv *env, jobject pointerObject, jlong pointer) {
+    if (pointerObject == nullptr) return;
+    env->SetLongField(pointerObject, NativePointerObject_nativePointer, pointer);
+    env->SetLongField(pointerObject, Pointer_byteOffset, 0);
+}
+
+void* getPointer(JNIEnv *env, jobject pointerObject) {
+    if (pointerObject == nullptr) return nullptr;
+    jlong start = env->GetLongField(pointerObject, NativePointerObject_nativePointer);
+    jlong off   = env->GetLongField(pointerObject, Pointer_byteOffset);
+    jlong p     = start + off;
+    return (void*)(uintptr_t)p;
+}
diff --git a/src/main/cpp/jni/common/cujava_pointer_utils.hpp b/src/main/cpp/jni/common/cujava_pointer_utils.hpp
new file mode 100644
index 00000000000..9e23b84ffb0
--- /dev/null
+++ b/src/main/cpp/jni/common/cujava_pointer_utils.hpp
@@ -0,0 +1,499 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef CUJAVA_POINTER_UTILS_HPP
+#define CUJAVA_POINTER_UTILS_HPP
+
+#include <jni.h>
+#include "cujava_jni_utils.hpp"
+#include "cujava_logger.hpp"
+
+// -----------------------------------------------------------------------------
+// Init + helpers
+// -----------------------------------------------------------------------------
+int   initPointerUtils(JNIEnv* env);
+
+class PointerData;
+
+PointerData* initPointerData(JNIEnv* env, jobject nativePointerObject);
+bool         releasePointerData(JNIEnv* env, PointerData*& pointerData, jint mode = 0);
+
+void  setNativePointerValue(JNIEnv* env, jobject nativePointerObject, jlong pointer);
+void* getNativePointerValue(JNIEnv* env, jobject nativePointerObject);
+
+void  setPointer(JNIEnv* env, jobject pointerObject, jlong pointer);
+void* getPointer(JNIEnv* env, jobject pointerObject);
+
+bool  isDirectByteBuffer(JNIEnv* env, jobject buffer);
+bool  isPointerBackedByNativeMemory(JNIEnv* env, jobject object);
+
+// -----------------------------------------------------------------------------
+// Cached JNI IDs / classes (initialized in initPointerUtils)
+// -----------------------------------------------------------------------------
+extern jmethodID Buffer_isDirect;   // ()Z
+extern jmethodID Buffer_hasArray;   // ()Z
+extern jmethodID Buffer_array;      // ()Ljava/lang/Object;
+
+extern jfieldID  NativePointerObject_nativePointer; // long
+
+extern jclass    Pointer_class;     // Global ref: org.apache.sysds.cujava.Pointer
+extern jfieldID  Pointer_buffer;    // Ljava/nio/Buffer;
+extern jfieldID  Pointer_pointers;  // [Lorg/apache/sysds/cujava/NativePointerObject;
+extern jfieldID  Pointer_byteOffset;// long
+
+extern jmethodID Pointer_constructor; // ()V
+
+extern jmethodID Object_getClass;          // ()Ljava/lang/Class;
+extern jmethodID Class_getComponentType;   // ()Ljava/lang/Class;
+extern jmethodID Class_newInstance;        // ()Ljava/lang/Object;
+
+// -----------------------------------------------------------------------------
+// PointerData hierarchy
+// -----------------------------------------------------------------------------
+
+/**
+ * Virtual base class for all possible representations of pointers.
+ */
+class PointerData
+{
+public:
+    virtual ~PointerData() {}
+
+    virtual bool  init(JNIEnv* env, jobject object) = 0;
+    virtual bool  release(JNIEnv* env, jint mode = 0) = 0;
+
+    virtual void* getPointer(JNIEnv* env) = 0;
+    virtual void  releasePointer(JNIEnv* env, jint mode = 0) = 0;
+
+    /**
+     * For pointers inside pointer arrays that may be updated by native code:
+     * write the new native address back into the Java object, if supported.
+     */
+    virtual bool  setNewNativePointerValue(JNIEnv* env, jlong nativePointerValue) = 0;
+};
+
+
+/**
+ * Backed by a Java NativePointerObject that is NOT a Pointer instance.
+ * Stores only the nativePointer value.
+ */
+class NativePointerObjectPointerData : public PointerData
+{
+private:
+    jobject nativePointerObject; // global ref (may be null)
+    jlong   nativePointer;
+
+public:
+    NativePointerObjectPointerData() : nativePointerObject(NULL), nativePointer(0) {}
+    ~NativePointerObjectPointerData() {}
+
+    bool init(JNIEnv* env, jobject object)
+    {
+        if (object != NULL)
+        {
+            nativePointerObject = env->NewGlobalRef(object);
+            if (nativePointerObject == NULL)
+            {
+                ThrowByName(env, "java/lang/OutOfMemoryError",
+                    "Out of memory while creating global reference for pointer data");
+                return false;
+            }
+            nativePointer = env->GetLongField(object, NativePointerObject_nativePointer);
+            if (env->ExceptionCheck()) return false;
+        }
+        Logger::log(LOG_DEBUGTRACE, "Initialized  NativePointerObjectPointerData %p\n", (void*)nativePointer);
+        return true;
+    }
+
+    bool release(JNIEnv* env, jint = 0)
+    {
+        Logger::log(LOG_DEBUGTRACE, "Releasing    NativePointerObjectPointerData %p\n", (void*)nativePointer);
+        if (nativePointerObject != NULL)
+        {
+            env->SetLongField(nativePointerObject, NativePointerObject_nativePointer, nativePointer);
+            env->DeleteGlobalRef(nativePointerObject);
+        }
+        return true;
+    }
+
+    void* getPointer(JNIEnv*) { return (void*)nativePointer; }
+    void  releasePointer(JNIEnv*, jint = 0) {}
+
+    bool setNewNativePointerValue(JNIEnv*, jlong nativePointerValue)
+    {
+        nativePointer = nativePointerValue;
+        return true;
+    }
+};
+
+
+/**
+ * Backed by a Java Pointer (nativePointer + byteOffset).
+ */
+class NativePointerData : public PointerData
+{
+private:
+    jobject pointer;     // global ref
+    jlong   nativePointer;
+    jlong   byteOffset;
+
+public:
+    NativePointerData() : pointer(NULL), nativePointer(0), byteOffset(0) {}
+    ~NativePointerData() {}
+
+    bool init(JNIEnv* env, jobject object)
+    {
+        pointer = env->NewGlobalRef(object);
+        if (pointer == NULL)
+        {
+            ThrowByName(env, "java/lang/OutOfMemoryError",
+                "Out of memory while creating global reference for pointer data");
+            return false;
+        }
+
+        nativePointer = env->GetLongField(object, NativePointerObject_nativePointer);
+        if (env->ExceptionCheck()) return false;
+
+        byteOffset = env->GetLongField(object, Pointer_byteOffset);
+        if (env->ExceptionCheck()) return false;
+
+        Logger::log(LOG_DEBUGTRACE, "Initialized  NativePointerData              %p\n", (void*)nativePointer);
+        return true;
+    }
+
+    bool release(JNIEnv* env, jint = 0)
+    {
+        Logger::log(LOG_DEBUGTRACE, "Releasing    NativePointerData              %p\n", (void*)nativePointer);
+        env->SetLongField(pointer, NativePointerObject_nativePointer, nativePointer);
+        env->SetLongField(pointer, Pointer_byteOffset, byteOffset);
+        env->DeleteGlobalRef(pointer);
+        return true;
+    }
+
+    void* getPointer(JNIEnv*) { return (void*)(((char*)nativePointer) + byteOffset); }
+    void  releasePointer(JNIEnv*, jint = 0) {}
+
+    bool setNewNativePointerValue(JNIEnv*, jlong nativePointerValue)
+    {
+        nativePointer = nativePointerValue;
+        byteOffset = 0;
+        return true;
+    }
+};
+
+
+/**
+ * Backed by a Java Pointer that points to an array of NativePointerObjects.
+ */
+class PointersArrayPointerData : public PointerData
+{
+private:
+    jobject      nativePointerObject; // global ref to the Java Pointer
+    PointerData** arrayPointerDatas;  // parallel to Java array
+    void*        startPointer;        // native array of void* (one per element)
+    jlong        byteOffset;
+    bool         localPointersInitialized;
+
+    void initLocalPointers(JNIEnv* env)
+    {
+        Logger::log(LOG_DEBUGTRACE, "Initializing PointersArrayPointerData local pointers\n");
+        jobjectArray pointersArray = (jobjectArray)env->GetObjectField(
+            nativePointerObject, Pointer_pointers);
+        long size = (long)env->GetArrayLength(pointersArray);
+        void** localPointer = (void**)startPointer;
+        for (int i = 0; i < size; i++)
+        {
+            if (arrayPointerDatas[i] != NULL)
+                localPointer[i] = arrayPointerDatas[i]->getPointer(env);
+            else
+                localPointer[i] = NULL;
+        }
+        localPointersInitialized = true;
+        Logger::log(LOG_DEBUGTRACE, "Initialized  PointersArrayPointerData local pointers\n");
+    }
+
+public:
+    PointersArrayPointerData()
+    : nativePointerObject(NULL),
+      arrayPointerDatas(NULL),
+      startPointer(NULL),
+      byteOffset(0),
+      localPointersInitialized(false) {}
+
+    ~PointersArrayPointerData() {}
+
+    bool init(JNIEnv* env, jobject object)
+    {
+        nativePointerObject = env->NewGlobalRef(object);
+        if (nativePointerObject == NULL)
+        {
+            ThrowByName(env, "java/lang/OutOfMemoryError",
+                "Out of memory while creating global reference for pointer data");
+            return false;
+        }
+
+        jobjectArray pointersArray = (jobjectArray)env->GetObjectField(object, Pointer_pointers);
+        long size = (long)env->GetArrayLength(pointersArray);
+
+        void** localPointer = new void*[size];
+        if (localPointer == NULL)
+        {
+            ThrowByName(env, "java/lang/OutOfMemoryError",
+                "Out of memory while initializing pointer array");
+            return false;
+        }
+        startPointer = (void*)localPointer;
+
+        arrayPointerDatas = new PointerData*[size];
+        if (arrayPointerDatas == NULL)
+        {
+            ThrowByName(env, "java/lang/OutOfMemoryError",
+                "Out of memory while initializing pointer data array");
+            return false;
+        }
+
+        for (int i = 0; i < size; i++)
+        {
+            jobject p = env->GetObjectArrayElement(pointersArray, i);
+            if (env->ExceptionCheck()) return false;
+
+            if (p != NULL)
+            {
+                PointerData* apd = initPointerData(env, p);
+                if (apd == NULL) return false;
+                arrayPointerDatas[i] = apd;
+            }
+            else
+            {
+                arrayPointerDatas[i] = NULL;
+            }
+        }
+
+        byteOffset = env->GetLongField(object, Pointer_byteOffset);
+        if (env->ExceptionCheck()) return false;
+
+        Logger::log(LOG_DEBUGTRACE, "Initialized  PointersArrayPointerData       %p\n", startPointer);
+        return true;
+    }
+
+    bool release(JNIEnv* env, jint mode = 0)
+    {
+        Logger::log(LOG_DEBUGTRACE, "Releasing    PointersArrayPointerData       %p\n", startPointer);
+
+        if (!localPointersInitialized) initLocalPointers(env);
+
+        jobjectArray pointersArray = (jobjectArray)env->GetObjectField(
+            nativePointerObject, Pointer_pointers);
+        long size = (long)env->GetArrayLength(pointersArray);
+
+        void** localPointer = (void**)startPointer;
+        if (mode != JNI_ABORT)
+        {
+            for (int i = 0; i < size; i++)
+            {
+                jobject p = env->GetObjectArrayElement(pointersArray, i);
+                if (env->ExceptionCheck()) return false;
+
+                if (p != NULL)
+                {
+                    void* oldLocalPointer = arrayPointerDatas[i]->getPointer(env);
+
+                    Logger::log(LOG_DEBUGTRACE, "About to write back pointer %d in PointersArrayPointerData\n", i);
+                    Logger::log(LOG_DEBUGTRACE, "Old local pointer was %p\n", oldLocalPointer);
+                    Logger::log(LOG_DEBUGTRACE, "New local pointer is  %p\n", localPointer[i]);
+
+                    if (localPointer[i] != oldLocalPointer)
+                    {
+                        Logger::log(LOG_DEBUGTRACE, "In pointer %d setting value %p\n", i, localPointer[i]);
+                        bool updated = arrayPointerDatas[i]->setNewNativePointerValue(env, (jlong)localPointer[i]);
+                        if (!updated) return false; // pending IllegalArgumentException
+                    }
+                }
+                else if (localPointer[i] != NULL)
+                {
+                    ThrowByName(env, "java/lang/NullPointerException",
+                                "Pointer points to an array containing a 'null' entry");
+                    return false;
+                }
+            }
+        }
+
+        if (arrayPointerDatas != NULL)
+        {
+            for (int i = 0; i < size; i++)
+            {
+                if (arrayPointerDatas[i] != NULL)
+                {
+                    if (!releasePointerData(env, arrayPointerDatas[i], mode)) return false;
+                }
+            }
+            delete[] arrayPointerDatas;
+        }
+        delete[] localPointer;
+
+        env->DeleteGlobalRef(nativePointerObject);
+        return true;
+    }
+
+    void* getPointer(JNIEnv* env)
+    {
+        if (!localPointersInitialized) initLocalPointers(env);
+        return (void*)(((char*)startPointer) + byteOffset);
+    }
+
+    void  releasePointer(JNIEnv*, jint = 0) {}
+
+    bool setNewNativePointerValue(JNIEnv* env, jlong)
+    {
+        ThrowByName(env, "java/lang/IllegalArgumentException",
+            "Pointer to an array of pointers may not be overwritten");
+        return false;
+    }
+};
+
+
+/**
+ * Backed by a direct java.nio.Buffer.
+ */
+class DirectBufferPointerData : public PointerData
+{
+private:
+    void* startPointer;
+    jlong byteOffset;
+
+public:
+    DirectBufferPointerData() : startPointer(NULL), byteOffset(0) {}
+    ~DirectBufferPointerData() {}
+
+    bool init(JNIEnv* env, jobject object)
+    {
+        jobject buffer = env->GetObjectField(object, Pointer_buffer);
+        startPointer = env->GetDirectBufferAddress(buffer);
+        if (startPointer == 0)
+        {
+            ThrowByName(env, "java/lang/IllegalArgumentException",
+                "Failed to obtain direct buffer address");
+            return false;
+        }
+
+        byteOffset = env->GetLongField(object, Pointer_byteOffset);
+        if (env->ExceptionCheck()) return false;
+
+        Logger::log(LOG_DEBUGTRACE, "Initialized  DirectBufferPointerData        %p\n", startPointer);
+        return true;
+    }
+
+    bool  release(JNIEnv*, jint = 0)
+    {
+        Logger::log(LOG_DEBUGTRACE, "Releasing    DirectBufferPointerData        %p\n", startPointer);
+        return true;
+    }
+
+    void* getPointer(JNIEnv*) { return (void*)(((char*)startPointer) + byteOffset); }
+    void  releasePointer(JNIEnv*, jint = 0) {}
+
+    bool setNewNativePointerValue(JNIEnv* env, jlong)
+    {
+        ThrowByName(env, "java/lang/IllegalArgumentException",
+            "Pointer to a direct buffer may not be overwritten");
+        return false;
+    }
+};
+
+
+/**
+ * Backed by a primitive-array-backed Buffer (e.g., ByteBuffer.wrap(...)).
+ */
+class ArrayBufferPointerData : public PointerData
+{
+private:
+    jarray   array;        // global ref to the primitive array
+    void*    startPointer; // set on first getPointer()
+    jboolean isCopy;
+    jlong    byteOffset;
+
+public:
+    ArrayBufferPointerData()
+    : array(NULL), startPointer(NULL), isCopy(JNI_FALSE), byteOffset(0) {}
+    ~ArrayBufferPointerData() {}
+
+    bool init(JNIEnv* env, jobject object)
+    {
+        jobject buffer    = env->GetObjectField(object, Pointer_buffer);
+        jobject localArray = env->CallObjectMethod(buffer, Buffer_array);
+        if (env->ExceptionCheck()) return false;
+
+        array = (jarray)env->NewGlobalRef(localArray);
+        if (array == NULL)
+        {
+            ThrowByName(env, "java/lang/OutOfMemoryError",
+                "Out of memory while creating array reference");
+            return false;
+        }
+
+        byteOffset = env->GetLongField(object, Pointer_byteOffset);
+        if (env->ExceptionCheck()) return false;
+
+        Logger::log(LOG_DEBUGTRACE, "Initialized  ArrayBufferPointerData         %p (deferred)\n", startPointer);
+        return true;
+    }
+
+    bool release(JNIEnv* env, jint mode = 0)
+    {
+        Logger::log(LOG_DEBUGTRACE, "Releasing    ArrayBufferPointerData         %p\n", startPointer);
+        releasePointer(env, mode);
+        env->DeleteGlobalRef(array);
+        return true;
+    }
+
+    void* getPointer(JNIEnv* env)
+    {
+        if (startPointer == NULL)
+        {
+            Logger::log(LOG_DEBUGTRACE, "Initializing ArrayBufferPointerData critical\n");
+            isCopy = JNI_FALSE;
+            startPointer = env->GetPrimitiveArrayCritical(array, &isCopy);
+            if (startPointer == NULL) return NULL;
+            Logger::log(LOG_DEBUGTRACE, "Initialized  ArrayBufferPointerData         %p (isCopy %d)\n", startPointer, (int)isCopy);
+        }
+        return (void*)(((char*)startPointer) + byteOffset);
+    }
+
+    void releasePointer(JNIEnv* env, jint mode = 0)
+    {
+        if (startPointer != NULL)
+        {
+            Logger::log(LOG_DEBUGTRACE, "Releasing    ArrayBufferPointerData critical\n");
+            if (!isCopy)
+                env->ReleasePrimitiveArrayCritical(array, startPointer, JNI_ABORT);
+            else
+                env->ReleasePrimitiveArrayCritical(array, startPointer, mode);
+            startPointer = NULL;
+        }
+    }
+
+    bool setNewNativePointerValue(JNIEnv* env, jlong)
+    {
+        ThrowByName(env, "java/lang/IllegalArgumentException",
+            "Pointer to an array may not be overwritten");
+        return false;
+    }
+};
+
+#endif // CUJAVA_POINTER_UTILS_HPP
diff --git a/src/main/cpp/jni/cublas/CMakeLists.txt b/src/main/cpp/jni/cublas/CMakeLists.txt
new file mode 100644
index 00000000000..02ca2e5dd37
--- /dev/null
+++ b/src/main/cpp/jni/cublas/CMakeLists.txt
@@ -0,0 +1,60 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+cmake_minimum_required(VERSION 3.18)
+
+project(CuJavaCublas LANGUAGES CXX)
+
+find_package(JNI REQUIRED)
+find_package(CUDAToolkit REQUIRED)  # provides CUDA::cublas (and CUDA::cudart)
+
+add_library(CuJavaCublas SHARED
+    cujava_cublas.cpp
+)
+
+set_target_properties(CuJavaCublas PROPERTIES
+    CXX_STANDARD 11
+    OUTPUT_NAME cujava_cublas                      # -> libcujava_cublas.so
+    LIBRARY_OUTPUT_DIRECTORY  ${CMAKE_CURRENT_SOURCE_DIR}/../../lib
+    RUNTIME_OUTPUT_DIRECTORY  ${CMAKE_CURRENT_SOURCE_DIR}/../../lib
+    ARCHIVE_OUTPUT_DIRECTORY  ${CMAKE_CURRENT_SOURCE_DIR}/../../lib
+)
+
+target_include_directories(CuJavaCublas
+    PRIVATE
+        ${JNI_INCLUDE_DIRS}
+        ${CUDAToolkit_INCLUDE_DIRS}
+        ${CMAKE_CURRENT_SOURCE_DIR}            # headers in cublas/
+        ${CMAKE_CURRENT_SOURCE_DIR}/../common  # if including common headers
+)
+
+# Link cuBLAS v2. Add cudart only if you call CUDA runtime APIs in this package.
+target_link_libraries(CuJavaCublas
+    PRIVATE
+        CuJavaCommonJNI
+        CUDA::cublas
+        CUDA::cudart         # needed for cudaDeviceSynchronize()
+        ${JNI_LIBRARIES}
+)
+
+
+
diff --git a/src/main/cpp/jni/cublas/cujava_cublas.cpp b/src/main/cpp/jni/cublas/cujava_cublas.cpp
new file mode 100644
index 00000000000..a2e448af56e
--- /dev/null
+++ b/src/main/cpp/jni/cublas/cujava_cublas.cpp
@@ -0,0 +1,502 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+#include "cujava_cublas.hpp"
+#include "cujava_cublas_common.hpp"
+
+#define CUJAVA_REQUIRE_NONNULL(env, obj, name, method)                           \
+    do {                                                                          \
+        if ((obj) == nullptr) {                                                   \
+            ThrowByName((env), "java/lang/NullPointerException",                  \
+                        "Parameter '" name "' is null for " method);              \
+            return CUJAVA_CUBLAS_INTERNAL_ERROR;                                  \
+        }                                                                         \
+    } while (0)
+
+
+
+JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *jvm, void *reserved) {
+    JNIEnv *env = nullptr;
+    if (jvm->GetEnv((void **)&env, JNI_VERSION_1_4)) {
+        return JNI_ERR;
+    }
+
+    // Only what we need so far
+    if (initJNIUtils(env) == JNI_ERR) return JNI_ERR;
+    if (initPointerUtils(env) == JNI_ERR) return JNI_ERR;
+
+    return JNI_VERSION_1_4;
+}
+
+
+
+JNIEXPORT void JNICALL JNI_OnUnload(JavaVM *vm, void *reserved) {
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasCreateNative(JNIEnv *env, jclass cls, jobject handle) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cublasCreate");
+
+    Logger::log(LOG_TRACE, "Executing cublasCreate(handle=%p)\n", handle);
+
+    // Declare native variables
+    cublasHandle_t handle_native;
+
+    // Cublas API call
+    cublasStatus_t jniResult_native = cublasCreate(&handle_native);
+    setNativePointerValue(env, handle, (jlong)handle_native);
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDestroyNative(JNIEnv *env, jclass cls, jobject handle) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cublasDestroy");
+
+    Logger::log(LOG_TRACE, "Executing cublasDestroy(handle=%p)\n", handle);
+
+    // Declare native variables
+    cublasHandle_t handle_native;
+
+    // Copy Java inputs into native locals
+    handle_native = (cublasHandle_t)getNativePointerValue(env, handle);
+
+    // Cublas API call
+    cublasStatus_t jniResult_native = cublasDestroy(handle_native);
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDgeamNative
+    (JNIEnv *env, jclass cls, jobject handle, jint transa, jint transb, jint m, jint n, jobject alpha, jobject A,
+     jint lda, jobject beta, jobject B, jint ldb, jobject C, jint ldc) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cublasDgeam");
+    CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cublasDgeam");
+    CUJAVA_REQUIRE_NONNULL(env, A, "A", "cublasDgeam");
+    CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cublasDgeam");
+    CUJAVA_REQUIRE_NONNULL(env, B, "B", "cublasDgeam");
+    CUJAVA_REQUIRE_NONNULL(env, C, "C", "cublasDgeam");
+
+    Logger::log(LOG_TRACE, "Executing cublasDgeam(handle=%p, transa=%d, transb=%d, m=%d, n=%d, alpha=%p, A=%p, lda=%d, beta=%p, B=%p, ldb=%d, C=%p, ldc=%d)\n",
+        handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
+
+    // Declare native variables
+    cublasHandle_t handle_native;
+    cublasOperation_t transa_native;
+    cublasOperation_t transb_native;
+    int m_native = 0;
+    int n_native = 0;
+    double * alpha_native = nullptr;
+    double * A_native = nullptr;
+    int lda_native = 0;
+    double * beta_native = nullptr;
+    double * B_native = nullptr;
+    int ldb_native = 0;
+    double * C_native = nullptr;
+    int ldc_native = 0;
+
+    // Copy Java inputs into native locals
+    handle_native = (cublasHandle_t)getNativePointerValue(env, handle);
+    transa_native = (cublasOperation_t)transa;
+    transb_native = (cublasOperation_t)transb;
+    m_native = (int)m;
+    n_native = (int)n;
+    PointerData *alpha_pointerData = initPointerData(env, alpha);
+    if (alpha_pointerData == nullptr) {
+        return CUJAVA_CUBLAS_INTERNAL_ERROR;
+    }
+    alpha_native = (double *)alpha_pointerData->getPointer(env);
+    A_native = (double *)getPointer(env, A);
+    lda_native = (int)lda;
+    PointerData *beta_pointerData = initPointerData(env, beta);
+    if (beta_pointerData == nullptr) {
+        return CUJAVA_CUBLAS_INTERNAL_ERROR;
+    }
+    beta_native = (double *)beta_pointerData->getPointer(env);
+    B_native = (double *)getPointer(env, B);
+    ldb_native = (int)ldb;
+    C_native = (double *)getPointer(env, C);
+    ldc_native = (int)ldc;
+
+    // Cublas API call
+    cublasStatus_t jniResult_native = cublasDgeam(handle_native, transa_native, transb_native, m_native, n_native, alpha_native,
+        A_native, lda_native, beta_native, B_native, ldb_native, C_native, ldc_native);
+
+    if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUBLAS_INTERNAL_ERROR;
+    if (!releasePointerData(env, beta_pointerData, JNI_ABORT)) return CUJAVA_CUBLAS_INTERNAL_ERROR;
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDdotNative
+    (JNIEnv *env, jclass cls, jobject handle, jint n, jobject x, jint incx, jobject y, jint incy, jobject result) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cublasDdot");
+    CUJAVA_REQUIRE_NONNULL(env, x, "x", "cublasDdot");
+    CUJAVA_REQUIRE_NONNULL(env, y, "y", "cublasDdot");
+    CUJAVA_REQUIRE_NONNULL(env, result, "result", "cublasDdot");
+
+    Logger::log(LOG_TRACE, "Executing cublasDdot(handle=%p, n=%d, x=%p, incx=%d, y=%p, incy=%d, result=%p)\n",
+        handle, n, x, incx, y, incy, result);
+
+    // Declare native variables
+    cublasHandle_t handle_native;
+    int n_native = 0;
+    double * x_native = nullptr;
+    int incx_native = 0;
+    double * y_native = nullptr;
+    int incy_native = 0;
+    double * result_native = nullptr;
+
+    // Copy Java inputs into native locals
+    handle_native = (cublasHandle_t)getNativePointerValue(env, handle);
+    n_native = (int)n;
+    x_native = (double *)getPointer(env, x);
+    incx_native = (int)incx;
+    y_native = (double *)getPointer(env, y);
+    incy_native = (int)incy;
+    PointerData *result_pointerData = initPointerData(env, result);
+    if (result_pointerData == nullptr) {
+        return CUJAVA_CUBLAS_INTERNAL_ERROR;
+    }
+    result_native = (double *)result_pointerData->getPointer(env);
+
+    // Cublas API call
+    cublasStatus_t jniResult_native = cublasDdot(handle_native, n_native, x_native, incx_native, y_native, incy_native, result_native);
+
+    if (!isPointerBackedByNativeMemory(env, result)) {
+        cudaDeviceSynchronize();                        // add cudart to CMake to cover runtime call
+    }
+    if (!releasePointerData(env, result_pointerData, 0)) return CUJAVA_CUBLAS_INTERNAL_ERROR;
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDgemvNative
+    (JNIEnv *env, jclass cls, jobject handle, jint trans, jint m, jint n, jobject alpha, jobject A, jint lda,
+     jobject x, jint incx, jobject beta, jobject y, jint incy) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cublasDgemv");
+    CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cublasDgemv");
+    CUJAVA_REQUIRE_NONNULL(env, A, "A", "cublasDgemv");
+    CUJAVA_REQUIRE_NONNULL(env, x, "x", "cublasDgemv");
+    CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cublasDgemv");
+    CUJAVA_REQUIRE_NONNULL(env, y, "y", "cublasDgemv");
+
+    Logger::log(LOG_TRACE, "Executing cublasDgemv(handle=%p, trans=%d, m=%d, n=%d, alpha=%p, A=%p, lda=%d, x=%p, incx=%d, beta=%p, y=%p, incy=%d)\n",
+        handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy);
+
+    // Declare native variables
+    cublasHandle_t handle_native;
+    cublasOperation_t trans_native;
+    int m_native = 0;
+    int n_native = 0;
+    double * alpha_native = nullptr;
+    double * A_native = nullptr;
+    int lda_native = 0;
+    double * x_native = nullptr;
+    int incx_native = 0;
+    double * beta_native = nullptr;
+    double * y_native = nullptr;
+    int incy_native = 0;
+
+    // Copy Java inputs into native locals
+    handle_native = (cublasHandle_t)getNativePointerValue(env, handle);
+    trans_native = (cublasOperation_t)trans;
+    m_native = (int)m;
+    n_native = (int)n;
+    PointerData *alpha_pointerData = initPointerData(env, alpha);
+    if (alpha_pointerData == nullptr) {
+        return CUJAVA_CUBLAS_INTERNAL_ERROR;
+    }
+    alpha_native = (double *)alpha_pointerData->getPointer(env);
+    A_native = (double *)getPointer(env, A);
+    lda_native = (int)lda;
+    x_native = (double *)getPointer(env, x);
+    incx_native = (int)incx;
+    PointerData *beta_pointerData = initPointerData(env, beta);
+    if (beta_pointerData == nullptr) {
+        return CUJAVA_CUBLAS_INTERNAL_ERROR;
+    }
+    beta_native = (double *)beta_pointerData->getPointer(env);
+    y_native = (double *)getPointer(env, y);
+    incy_native = (int)incy;
+
+    // Cublas API call
+    cublasStatus_t jniResult_native = cublasDgemv(handle_native, trans_native, m_native, n_native, alpha_native, A_native,
+        lda_native, x_native, incx_native, beta_native, y_native, incy_native);
+
+    if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUBLAS_INTERNAL_ERROR;
+    if (!releasePointerData(env, beta_pointerData, JNI_ABORT)) return CUJAVA_CUBLAS_INTERNAL_ERROR;
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDgemmNative
+    (JNIEnv *env, jclass cls, jobject handle, jint transa, jint transb, jint m, jint n, jint k, jobject alpha,
+     jobject A, jint lda, jobject B, jint ldb, jobject beta, jobject C, jint ldc) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cublasDgemm");
+    CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cublasDgemm");
+    CUJAVA_REQUIRE_NONNULL(env, A, "A", "cublasDgemm");
+    CUJAVA_REQUIRE_NONNULL(env, B, "B", "cublasDgemm");
+    CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cublasDgemm");
+    CUJAVA_REQUIRE_NONNULL(env, C, "C", "cublasDgemm");
+
+    Logger::log(LOG_TRACE, "Executing cublasDgemm(handle=%p, transa=%d, transb=%d, m=%d, n=%d, k=%d, alpha=%p, A=%p, lda=%d, B=%p, ldb=%d, beta=%p, C=%p, ldc=%d)\n",
+        handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+
+    // Declare native variables
+    cublasHandle_t handle_native;
+    cublasOperation_t transa_native;
+    cublasOperation_t transb_native;
+    int m_native = 0;
+    int n_native = 0;
+    int k_native = 0;
+    double * alpha_native = nullptr;
+    double * A_native = nullptr;
+    int lda_native = 0;
+    double * B_native = nullptr;
+    int ldb_native = 0;
+    double * beta_native = nullptr;
+    double * C_native = nullptr;
+    int ldc_native = 0;
+
+    // Copy Java inputs into native locals
+    handle_native = (cublasHandle_t)getNativePointerValue(env, handle);
+    transa_native = (cublasOperation_t)transa;
+    transb_native = (cublasOperation_t)transb;
+    m_native = (int)m;
+    n_native = (int)n;
+    k_native = (int)k;
+    PointerData *alpha_pointerData = initPointerData(env, alpha);
+    if (alpha_pointerData == NULL) {
+        return CUJAVA_CUBLAS_INTERNAL_ERROR;
+    }
+    alpha_native = (double *)alpha_pointerData->getPointer(env);
+    A_native = (double *)getPointer(env, A);
+    lda_native = (int)lda;
+    B_native = (double *)getPointer(env, B);
+    ldb_native = (int)ldb;
+    PointerData *beta_pointerData = initPointerData(env, beta);
+    if (beta_pointerData == nullptr) {
+        return CUJAVA_CUBLAS_INTERNAL_ERROR;
+    }
+    beta_native = (double *)beta_pointerData->getPointer(env);
+    C_native = (double *)getPointer(env, C);
+    ldc_native = (int)ldc;
+
+    // Cublas API call
+    cublasStatus_t jniResult_native = cublasDgemm(handle_native, transa_native, transb_native, m_native, n_native, k_native,
+        alpha_native, A_native, lda_native, B_native, ldb_native, beta_native, C_native, ldc_native);
+
+    if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUBLAS_INTERNAL_ERROR;
+    if (!releasePointerData(env, beta_pointerData, JNI_ABORT)) return CUJAVA_CUBLAS_INTERNAL_ERROR;
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDsyrkNative
+    (JNIEnv *env, jclass cls, jobject handle, jint uplo, jint trans, jint n, jint k, jobject alpha,
+     jobject A, jint lda, jobject beta, jobject C, jint ldc) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cublasDsyrk");
+    CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cublasDsyrk");
+    CUJAVA_REQUIRE_NONNULL(env, A, "A", "cublasDsyrk");
+    CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cublasDsyrk");
+    CUJAVA_REQUIRE_NONNULL(env, C, "C", "cublasDsyrk");
+
+    Logger::log(LOG_TRACE, "Executing cublasDsyrk(handle=%p, uplo=%d, trans=%d, n=%d, k=%d, alpha=%p, A=%p, lda=%d, beta=%p, C=%p, ldc=%d)\n",
+        handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
+
+    // Declare native variables
+    cublasHandle_t handle_native;
+    cublasFillMode_t uplo_native;
+    cublasOperation_t trans_native;
+    int n_native = 0;
+    int k_native = 0;
+    double * alpha_native = nullptr;
+    double * A_native = nullptr;
+    int lda_native = 0;
+    double * beta_native = nullptr;
+    double * C_native = nullptr;
+    int ldc_native = 0;
+
+    // Copy Java inputs into native locals
+    handle_native = (cublasHandle_t)getNativePointerValue(env, handle);
+    uplo_native = (cublasFillMode_t)uplo;
+    trans_native = (cublasOperation_t)trans;
+    n_native = (int)n;
+    k_native = (int)k;
+    PointerData *alpha_pointerData = initPointerData(env, alpha);
+    if (alpha_pointerData == nullptr) {
+        return CUJAVA_CUBLAS_INTERNAL_ERROR;
+    }
+    alpha_native = (double *)alpha_pointerData->getPointer(env);
+    A_native = (double *)getPointer(env, A);
+    lda_native = (int)lda;
+    PointerData *beta_pointerData = initPointerData(env, beta);
+    if (beta_pointerData == nullptr) {
+        return CUJAVA_CUBLAS_INTERNAL_ERROR;
+    }
+    beta_native = (double *)beta_pointerData->getPointer(env);
+    C_native = (double *)getPointer(env, C);
+    ldc_native = (int)ldc;
+
+    // Cublas API call
+    cublasStatus_t jniResult_native = cublasDsyrk(handle_native, uplo_native, trans_native, n_native, k_native,
+        alpha_native, A_native, lda_native, beta_native, C_native, ldc_native);
+
+    if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUBLAS_INTERNAL_ERROR;
+    if (!releasePointerData(env, beta_pointerData, JNI_ABORT)) return CUJAVA_CUBLAS_INTERNAL_ERROR;
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDaxpyNative
+    (JNIEnv *env, jclass cls, jobject handle, jint n, jobject alpha, jobject x, jint incx, jobject y, jint incy) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cublasDaxpy");
+    CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cublasDaxpy");
+    CUJAVA_REQUIRE_NONNULL(env, x, "x", "cublasDaxpy");
+    CUJAVA_REQUIRE_NONNULL(env, y, "y", "cublasDaxpy");
+
+    Logger::log(LOG_TRACE, "Executing cublasDaxpy(handle=%p, n=%d, alpha=%p, x=%p, incx=%d, y=%p, incy=%d)\n",
+        handle, n, alpha, x, incx, y, incy);
+
+    // Declare native variables
+    cublasHandle_t handle_native;
+    int n_native = 0;
+    double * alpha_native = nullptr;
+    double * x_native = nullptr;
+    int incx_native = 0;
+    double * y_native = nullptr;
+    int incy_native = 0;
+
+    // Copy Java inputs into native locals
+    handle_native = (cublasHandle_t)getNativePointerValue(env, handle);
+    n_native = (int)n;
+    PointerData *alpha_pointerData = initPointerData(env, alpha);
+    if (alpha_pointerData == nullptr) {
+        return CUJAVA_CUBLAS_INTERNAL_ERROR;
+    }
+    alpha_native = (double *)alpha_pointerData->getPointer(env);
+    x_native = (double *)getPointer(env, x);
+    incx_native = (int)incx;
+    y_native = (double *)getPointer(env, y);
+    incy_native = (int)incy;
+
+    // Cublas API call
+    cublasStatus_t jniResult_native = cublasDaxpy(handle_native, n_native, alpha_native, x_native, incx_native, y_native, incy_native);
+
+    if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUBLAS_INTERNAL_ERROR;
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDtrsmNative
+    (JNIEnv *env, jclass cls, jobject handle, jint side, jint uplo, jint trans, jint diag, jint m,
+     jint n, jobject alpha, jobject A, jint lda, jobject B, jint ldb) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cublasDtrsm");
+    CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cublasDtrsm");
+    CUJAVA_REQUIRE_NONNULL(env, A, "A", "cublasDtrsm");
+    CUJAVA_REQUIRE_NONNULL(env, B, "B", "cublasDtrsm");
+
+    Logger::log(LOG_TRACE, "Executing cublasDtrsm(handle=%p, side=%d, uplo=%d, trans=%d, diag=%d, m=%d, n=%d, alpha=%p, A=%p, lda=%d, B=%p, ldb=%d)\n",
+        handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb);
+
+    // Declare native variables
+    cublasHandle_t handle_native;
+    cublasSideMode_t side_native;
+    cublasFillMode_t uplo_native;
+    cublasOperation_t trans_native;
+    cublasDiagType_t diag_native;
+    int m_native = 0;
+    int n_native = 0;
+    double * alpha_native = nullptr;
+    double * A_native = nullptr;
+    int lda_native = 0;
+    double * B_native = nullptr;
+    int ldb_native = 0;
+
+    // Copy Java inputs into native locals
+    handle_native = (cublasHandle_t)getNativePointerValue(env, handle);
+    side_native = (cublasSideMode_t)side;
+    uplo_native = (cublasFillMode_t)uplo;
+    trans_native = (cublasOperation_t)trans;
+    diag_native = (cublasDiagType_t)diag;
+    m_native = (int)m;
+    n_native = (int)n;
+    PointerData *alpha_pointerData = initPointerData(env, alpha);
+    if (alpha_pointerData == nullptr) {
+        return CUJAVA_CUBLAS_INTERNAL_ERROR;
+    }
+    alpha_native = (double *)alpha_pointerData->getPointer(env);
+    A_native = (double *)getPointer(env, A);
+    lda_native = (int)lda;
+    B_native = (double *)getPointer(env, B);
+    ldb_native = (int)ldb;
+
+    // Cublas API call
+    cublasStatus_t jniResult_native = cublasDtrsm(handle_native, side_native, uplo_native, trans_native, diag_native,
+        m_native, n_native, alpha_native, A_native, lda_native, B_native, ldb_native);
+
+    if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUBLAS_INTERNAL_ERROR;
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
diff --git a/src/main/cpp/jni/cublas/cujava_cublas.hpp b/src/main/cpp/jni/cublas/cujava_cublas.hpp
new file mode 100644
index 00000000000..39523d78f6f
--- /dev/null
+++ b/src/main/cpp/jni/cublas/cujava_cublas.hpp
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <jni.h>
+
+#ifndef _Included_org_apache_sysds_cujava_cublas_CuJavaCublas
+#define _Included_org_apache_sysds_cujava_cublas_CuJavaCublas
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/*
+ * Class:  org.apache.sysds.cujava.cublas.CuJavaCublas
+ * Methods:
+ *  - cublasCreate
+ *  - cublasDestroy
+ *  - cublasDgeam
+ *  - cublasDdot
+ *  - cublasDgemv
+ *  - cublasDgemm
+ *  - cublasDsyrk
+ *  - cublasDaxpy
+ *  - cublasDtrsm
+ */
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasCreateNative(JNIEnv *env, jclass cls, jobject handle);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDestroyNative(JNIEnv *env, jclass cls, jobject handle);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDgeamNative
+    (JNIEnv *env, jclass cls, jobject handle, jint transa, jint transb, jint m, jint n, jobject alpha, jobject A,
+     jint lda, jobject beta, jobject B, jint ldb, jobject C, jint ldc);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDdotNative
+    (JNIEnv *env, jclass cls, jobject handle, jint n, jobject x, jint incx, jobject y, jint incy, jobject result);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDgemvNative
+    (JNIEnv *env, jclass cls, jobject handle, jint trans, jint m, jint n, jobject alpha, jobject A, jint lda,
+     jobject x, jint incx, jobject beta, jobject y, jint incy);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDgemmNative
+    (JNIEnv *env, jclass cls, jobject handle, jint transa, jint transb, jint m, jint n, jint k, jobject alpha,
+     jobject A, jint lda, jobject B, jint ldb, jobject beta, jobject C, jint ldc);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDsyrkNative
+    (JNIEnv *env, jclass cls, jobject handle, jint uplo, jint trans, jint n, jint k, jobject alpha,
+     jobject A, jint lda, jobject beta, jobject C, jint ldc);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDaxpyNative
+    (JNIEnv *env, jclass cls, jobject handle, jint n, jobject alpha, jobject x, jint incx, jobject y, jint incy);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cublas_CuJavaCublas_cublasDtrsmNative
+    (JNIEnv *env, jclass cls, jobject handle, jint side, jint uplo, jint trans, jint diag, jint m,
+     jint n, jobject alpha, jobject A, jint lda, jobject B, jint ldb);
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/src/main/cpp/jni/cublas/cujava_cublas_common.hpp b/src/main/cpp/jni/cublas/cujava_cublas_common.hpp
new file mode 100644
index 00000000000..80950c84697
--- /dev/null
+++ b/src/main/cpp/jni/cublas/cujava_cublas_common.hpp
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+#ifndef CUJAVA_CUBLAS_COMMON_HPP
+#define CUJAVA_CUBLAS_COMMON_HPP
+
+#include <jni.h>
+#include <cublas_v2.h>      // cuBLAS v1 is deprecated
+#include <cuda_runtime.h>
+
+#include "../common/cujava_logger.hpp"
+#include "../common/cujava_jni_utils.hpp"
+#include "../common/cujava_pointer_utils.hpp"
+
+#define CUJAVA_CUBLAS_INTERNAL_ERROR (-1)
+
+#endif // CUJAVA_CUBLAS_COMMON_HPP
+
diff --git a/src/main/cpp/jni/cudnn/cujava_cudnn.cpp b/src/main/cpp/jni/cudnn/cujava_cudnn.cpp
new file mode 100644
index 00000000000..7b5b8aba71c
--- /dev/null
+++ b/src/main/cpp/jni/cudnn/cujava_cudnn.cpp
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+// TODO: Implement jni wrapper for cuDNN
diff --git a/src/main/cpp/jni/cudnn/cujava_cudnn.hpp b/src/main/cpp/jni/cudnn/cujava_cudnn.hpp
new file mode 100644
index 00000000000..042f3ce1f39
--- /dev/null
+++ b/src/main/cpp/jni/cudnn/cujava_cudnn.hpp
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
diff --git a/src/main/cpp/jni/cudnn/cujava_cudnn_common.hpp b/src/main/cpp/jni/cudnn/cujava_cudnn_common.hpp
new file mode 100644
index 00000000000..042f3ce1f39
--- /dev/null
+++ b/src/main/cpp/jni/cudnn/cujava_cudnn_common.hpp
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
diff --git a/src/main/cpp/jni/cusolver/cujava_cusolver.cpp b/src/main/cpp/jni/cusolver/cujava_cusolver.cpp
new file mode 100644
index 00000000000..5194fceec27
--- /dev/null
+++ b/src/main/cpp/jni/cusolver/cujava_cusolver.cpp
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+// TODO: Implement jni wrapper for cuSolver
diff --git a/src/main/cpp/jni/cusolver/cujava_cusolver.hpp b/src/main/cpp/jni/cusolver/cujava_cusolver.hpp
new file mode 100644
index 00000000000..042f3ce1f39
--- /dev/null
+++ b/src/main/cpp/jni/cusolver/cujava_cusolver.hpp
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
diff --git a/src/main/cpp/jni/cusolver/cujava_cusolver_common.hpp b/src/main/cpp/jni/cusolver/cujava_cusolver_common.hpp
new file mode 100644
index 00000000000..042f3ce1f39
--- /dev/null
+++ b/src/main/cpp/jni/cusolver/cujava_cusolver_common.hpp
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
diff --git a/src/main/cpp/jni/cusparse/CMakeLists.txt b/src/main/cpp/jni/cusparse/CMakeLists.txt
new file mode 100644
index 00000000000..2b728bd0da4
--- /dev/null
+++ b/src/main/cpp/jni/cusparse/CMakeLists.txt
@@ -0,0 +1,57 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+cmake_minimum_required(VERSION 3.18)
+
+project(CuJavaCusparse LANGUAGES CXX)
+
+find_package(JNI REQUIRED)
+find_package(CUDAToolkit REQUIRED)  # for CUDA::cusparse
+
+add_library(CuJavaCusparse SHARED
+    cujava_cusparse.cpp
+)
+
+set_target_properties(CuJavaCusparse PROPERTIES
+    CXX_STANDARD 11
+    OUTPUT_NAME cujava_cusparse                      # -> libcujava_cusparse.so
+    LIBRARY_OUTPUT_DIRECTORY  ${CMAKE_CURRENT_SOURCE_DIR}/../../lib
+    RUNTIME_OUTPUT_DIRECTORY  ${CMAKE_CURRENT_SOURCE_DIR}/../../lib
+    ARCHIVE_OUTPUT_DIRECTORY  ${CMAKE_CURRENT_SOURCE_DIR}/../../lib
+)
+
+target_include_directories(CuJavaCusparse
+    PRIVATE
+        ${JNI_INCLUDE_DIRS}
+        ${CUDAToolkit_INCLUDE_DIRS}
+        ${CMAKE_CURRENT_SOURCE_DIR}            # headers in cusparse/
+        ${CMAKE_CURRENT_SOURCE_DIR}/../common  # if you include common headers
+)
+
+target_link_libraries(CuJavaCusparse
+    PRIVATE
+        CuJavaCommonJNI
+        CUDA::cusparse
+        CUDA::cudart         # needed for cudaDeviceSynchronize()
+        ${JNI_LIBRARIES}
+)
+
diff --git a/src/main/cpp/jni/cusparse/cujava_cusparse.cpp b/src/main/cpp/jni/cusparse/cujava_cusparse.cpp
new file mode 100644
index 00000000000..97ec11a7dd3
--- /dev/null
+++ b/src/main/cpp/jni/cusparse/cujava_cusparse.cpp
@@ -0,0 +1,1586 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+#include "cujava_cusparse.hpp"
+#include "cujava_cusparse_common.hpp"
+
+#define CUJAVA_REQUIRE_NONNULL(env, obj, name, method)                           \
+    do {                                                                          \
+        if ((obj) == nullptr) {                                                   \
+            ThrowByName((env), "java/lang/NullPointerException",                  \
+                        "Parameter '" name "' is null for " method);              \
+            return CUJAVA_CUSPARSE_INTERNAL_ERROR;                                \
+        }                                                                         \
+    } while (0)
+
+
+JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *jvm, void *reserved) {
+    JNIEnv *env = nullptr;
+    if (jvm->GetEnv((void **)&env, JNI_VERSION_1_4)) {
+        return JNI_ERR;
+    }
+
+    // Only what we need so far
+    if (initJNIUtils(env) == JNI_ERR)      return JNI_ERR;
+    if (initPointerUtils(env) == JNI_ERR)  return JNI_ERR;
+
+    return JNI_VERSION_1_4;
+}
+
+
+
+JNIEXPORT void JNICALL JNI_OnUnload(JavaVM *vm, void *reserved) {
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpGEMM_1copyNative
+  (JNIEnv *env, jclass cls, jobject handle, jint opA, jint opB,jobject alpha, jobject matA, jobject matB, jobject beta, jobject matC,
+   jint computeType, jint alg, jobject spgemmDescr) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseSpGEMM_copy");
+    CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cusparseSpGEMM_copy");
+    CUJAVA_REQUIRE_NONNULL(env, matA, "matA", "cusparseSpGEMM_copy");
+    CUJAVA_REQUIRE_NONNULL(env, matB, "matB", "cusparseSpGEMM_copy");
+    CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cusparseSpGEMM_copy");
+    CUJAVA_REQUIRE_NONNULL(env, matC, "matC", "cusparseSpGEMM_copy");
+    CUJAVA_REQUIRE_NONNULL(env, spgemmDescr, "spgemmDescr", "cusparseSpGEMM_copy");
+
+    Logger::log(LOG_TRACE, "Executing cusparseSpGEMM_copy\n");
+
+    // Copy Java inputs into native locals
+    cusparseHandle_t h = (cusparseHandle_t)getNativePointerValue(env, handle);
+    cusparseOperation_t aOp = (cusparseOperation_t)opA;
+    cusparseOperation_t bOp = (cusparseOperation_t)opB;
+    PointerData* alphaPD = initPointerData(env, alpha); if (!alphaPD) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    void* alphaPtr = alphaPD->getPointer(env);
+    cusparseConstSpMatDescr_t A = (cusparseConstSpMatDescr_t)getNativePointerValue(env, matA);
+    cusparseConstSpMatDescr_t B = (cusparseConstSpMatDescr_t)getNativePointerValue(env, matB);
+    PointerData* betaPD = initPointerData(env, beta);  if (!betaPD)  { releasePointerData(env, alphaPD, JNI_ABORT); return CUJAVA_CUSPARSE_INTERNAL_ERROR; }
+    void* betaPtr = betaPD->getPointer(env);
+    cusparseSpMatDescr_t C = (cusparseSpMatDescr_t)getNativePointerValue(env, matC);
+    cudaDataType ct = (cudaDataType)computeType;
+    cusparseSpGEMMAlg_t al = (cusparseSpGEMMAlg_t)alg;
+    cusparseSpGEMMDescr_t D  = (cusparseSpGEMMDescr_t)getNativePointerValue(env, spgemmDescr);
+
+    // Cusparse API call
+    cusparseStatus_t st = cusparseSpGEMM_copy(h, aOp, bOp, alphaPtr, A, B, betaPtr, C, ct, al, D);
+
+    // alpha/beta are inputs → no commit
+    if (!releasePointerData(env, alphaPD, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    if (!releasePointerData(env, betaPD,  JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+
+    return (jint)st;
+}
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseGetMatIndexBaseNative(JNIEnv *env, jclass cls, jobject descrA) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, descrA, "descrA", "cusparseGetMatIndexBase");
+
+    Logger::log(LOG_TRACE, "Executing cusparseGetMatIndexBase(descrA=%p)\n", descrA);
+
+    // Declare native variables
+    cusparseMatDescr_t descrA_native;
+
+    // Copy Java inputs into native locals
+    descrA_native = (cusparseMatDescr_t)getNativePointerValue(env, descrA);
+
+    // Cusparse API call
+    cusparseIndexBase_t jniResult_native = cusparseGetMatIndexBase(descrA_native);
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCreateCsrNative
+  (JNIEnv *env, jclass cls, jobject spMatDescr, jlong rows, jlong cols, jlong nnz, jobject csrRowOffsets,
+   jobject csrColInd, jobject csrValues, jint csrRowOffsetsType, jint csrColIndType, jint idxBase, jint valueType) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, spMatDescr, "spMatDescr", "cusparseCreateCsr");
+
+    // Log message
+    Logger::log(LOG_TRACE, "Executing cusparseCreateCsr(spMatDescr=%p, rows=%ld, cols=%ld, nnz=%ld, csrRowOffsets=%p, csrColInd=%p, csrValues=%p, csrRowOffsetsType=%d, csrColIndType=%d, idxBase=%d, valueType=%d)\n",
+        spMatDescr, rows, cols, nnz, csrRowOffsets, csrColInd, csrValues, csrRowOffsetsType, csrColIndType, idxBase, valueType);
+
+    // Declare native variables
+    cusparseSpMatDescr_t spMatDescr_native;
+    int64_t rows_native = 0;
+    int64_t cols_native = 0;
+    int64_t nnz_native = 0;
+    void * csrRowOffsets_native = nullptr;
+    void * csrColInd_native = nullptr;
+    void * csrValues_native = nullptr;
+    cusparseIndexType_t csrRowOffsetsType_native;
+    cusparseIndexType_t csrColIndType_native;
+    cusparseIndexBase_t idxBase_native;
+    cudaDataType valueType_native;
+
+    // Copy Java inputs into native locals
+    rows_native = (int64_t)rows;
+    cols_native = (int64_t)cols;
+    nnz_native = (int64_t)nnz;
+    csrRowOffsets_native = (void *)getPointer(env, csrRowOffsets);
+    csrColInd_native = (void *)getPointer(env, csrColInd);
+    csrValues_native = (void *)getPointer(env, csrValues);
+    csrRowOffsetsType_native = (cusparseIndexType_t)csrRowOffsetsType;
+    csrColIndType_native = (cusparseIndexType_t)csrColIndType;
+    idxBase_native = (cusparseIndexBase_t)idxBase;
+    valueType_native = (cudaDataType)valueType;
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseCreateCsr(&spMatDescr_native, rows_native, cols_native, nnz_native, csrRowOffsets_native,
+        csrColInd_native, csrValues_native, csrRowOffsetsType_native, csrColIndType_native, idxBase_native, valueType_native);
+    setNativePointerValue(env, spMatDescr, (jlong)spMatDescr_native);
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCreateDnVecNative
+    (JNIEnv *env, jclass cls, jobject dnVecDescr, jlong size, jobject values, jint valueType) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, dnVecDescr, "dnVecDescr", "cusparseCreateDnVec");
+    CUJAVA_REQUIRE_NONNULL(env, values, "values", "cusparseCreateDnVec");
+
+    Logger::log(LOG_TRACE, "Executing cusparseCreateDnVec(dnVecDescr=%p, size=%ld, values=%p, valueType=%d)\n",
+        dnVecDescr, size, values, valueType);
+
+    // Declare native variables
+    cusparseDnVecDescr_t dnVecDescr_native;
+    int64_t size_native = 0;
+    void * values_native = nullptr;
+    cudaDataType valueType_native;
+
+    // Copy Java inputs into native locals
+    size_native = (int64_t)size;
+    values_native = (void *)getPointer(env, values);
+    valueType_native = (cudaDataType)valueType;
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseCreateDnVec(&dnVecDescr_native, size_native, values_native, valueType_native);
+    setNativePointerValue(env, dnVecDescr, (jlong)dnVecDescr_native);
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpMV_1bufferSizeNative
+  (JNIEnv *env, jclass cls, jobject handle, jint opA, jobject alpha, jobject matA, jobject vecX, jobject beta, jobject vecY, jint computeType, jint alg, jlongArray bufferSize) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseSpMV_bufferSize");
+    CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cusparseSpMV_bufferSize");
+    CUJAVA_REQUIRE_NONNULL(env, matA, "matA", "cusparseSpMV_bufferSize");
+    CUJAVA_REQUIRE_NONNULL(env, vecX, "vecX", "cusparseSpMV_bufferSize");
+    CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cusparseSpMV_bufferSize");
+    CUJAVA_REQUIRE_NONNULL(env, vecY, "vecY", "cusparseSpMV_bufferSize");
+    CUJAVA_REQUIRE_NONNULL(env, bufferSize, "bufferSize", "cusparseSpMV_bufferSize");
+
+    Logger::log(LOG_TRACE, "Executing cusparseSpMV_bufferSize(handle=%p, opA=%d, alpha=%p, matA=%p, vecX=%p, beta=%p, vecY=%p, computeType=%d, alg=%d, bufferSize=%p)\n",
+        handle, opA, alpha, matA, vecX, beta, vecY, computeType, alg, bufferSize);
+
+    // Declare native variables
+    cusparseHandle_t handle_native;
+    cusparseOperation_t opA_native;
+    void * alpha_native = nullptr;
+    cusparseConstSpMatDescr_t matA_native;
+    cusparseConstDnVecDescr_t vecX_native;
+    void * beta_native = nullptr;
+    cusparseDnVecDescr_t vecY_native;
+    cudaDataType computeType_native;
+    cusparseSpMVAlg_t alg_native;
+    size_t * bufferSize_native = nullptr;
+
+    // Copy Java inputs into native locals
+    handle_native = (cusparseHandle_t)getNativePointerValue(env, handle);
+    opA_native = (cusparseOperation_t)opA;
+    PointerData *alpha_pointerData = initPointerData(env, alpha);
+    if (alpha_pointerData == nullptr) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+
+    alpha_native = (void *)alpha_pointerData->getPointer(env);
+    matA_native = (cusparseConstSpMatDescr_t)getNativePointerValue(env, matA);
+    vecX_native = (cusparseConstDnVecDescr_t)getNativePointerValue(env, vecX);
+    PointerData *beta_pointerData = initPointerData(env, beta);
+    if (beta_pointerData == nullptr) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+
+    beta_native = (void *)beta_pointerData->getPointer(env);
+    vecY_native = (cusparseDnVecDescr_t)getNativePointerValue(env, vecY);
+    computeType_native = (cudaDataType)computeType;
+    alg_native = (cusparseSpMVAlg_t)alg;
+    if (!initNative(env, bufferSize, bufferSize_native, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseSpMV_bufferSize(handle_native, opA_native, alpha_native, matA_native,
+        vecX_native, beta_native, vecY_native, computeType_native, alg_native, bufferSize_native);
+
+    if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    if (!releasePointerData(env, beta_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    if (!releaseNative(env, bufferSize_native, bufferSize, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpMVNative
+ (JNIEnv *env, jclass cls, jobject handle, jint opA, jobject alpha, jobject matA, jobject vecX, jobject beta, jobject vecY, jint computeType, jint alg, jobject externalBuffer) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseSpMV");
+    CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cusparseSpMV");
+    CUJAVA_REQUIRE_NONNULL(env, matA, "matA", "cusparseSpMV");
+    CUJAVA_REQUIRE_NONNULL(env, vecX, "vecX", "cusparseSpMV");
+    CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cusparseSpMV");
+    CUJAVA_REQUIRE_NONNULL(env, vecY, "vecY", "cusparseSpMV");
+
+    Logger::log(LOG_TRACE, "Executing cusparseSpMV(handle=%p, opA=%d, alpha=%p, matA=%p, vecX=%p, beta=%p, vecY=%p, computeType=%d, alg=%d, externalBuffer=%p)\n",
+        handle, opA, alpha, matA, vecX, beta, vecY, computeType, alg, externalBuffer);
+
+    // Declare native variables
+    cusparseHandle_t handle_native;
+    cusparseOperation_t opA_native;
+    void * alpha_native = nullptr;
+    cusparseConstSpMatDescr_t matA_native;
+    cusparseConstDnVecDescr_t vecX_native;
+    void * beta_native = nullptr;
+    cusparseDnVecDescr_t vecY_native;
+    cudaDataType computeType_native;
+    cusparseSpMVAlg_t alg_native;
+    void * externalBuffer_native = nullptr;
+
+    // Copy Java inputs into native locals
+    handle_native = (cusparseHandle_t)getNativePointerValue(env, handle);
+    opA_native = (cusparseOperation_t)opA;
+    PointerData *alpha_pointerData = initPointerData(env, alpha);
+    if (alpha_pointerData == nullptr) {
+        return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    }
+    alpha_native = (void *)alpha_pointerData->getPointer(env);
+    matA_native = (cusparseConstSpMatDescr_t)getNativePointerValue(env, matA);
+    vecX_native = (cusparseConstDnVecDescr_t)getNativePointerValue(env, vecX);
+    PointerData *beta_pointerData = initPointerData(env, beta);
+    if (beta_pointerData == nullptr) {
+        return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    }
+    beta_native = (void *)beta_pointerData->getPointer(env);
+    vecY_native = (cusparseDnVecDescr_t)getNativePointerValue(env, vecY);
+    computeType_native = (cudaDataType)computeType;
+    alg_native = (cusparseSpMVAlg_t)alg;
+    externalBuffer_native = (void *)getPointer(env, externalBuffer);
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseSpMV(handle_native, opA_native, alpha_native, matA_native, vecX_native, beta_native, vecY_native, computeType_native, alg_native, externalBuffer_native);
+
+    if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    if (!releasePointerData(env, beta_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDestroyNative(JNIEnv *env, jclass cls, jobject handle) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseDestroy");
+
+    Logger::log(LOG_TRACE, "Executing cusparseDestroy(handle=%p)\n", handle);
+
+    // Declare native variables
+    cusparseHandle_t handle_native;
+
+    // Copy Java inputs into native locals
+    handle_native = (cusparseHandle_t)getNativePointerValue(env, handle);
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseDestroy(handle_native);
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDestroyDnVecNative(JNIEnv *env, jclass cls, jobject dnVecDescr) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, dnVecDescr, "dnVecDescr", "cusparseDestroyDnVec");
+
+    Logger::log(LOG_TRACE, "Executing cusparseDestroyDnVec(dnVecDescr=%p)\n", dnVecDescr);
+
+    // Declare native variables
+    cusparseConstDnVecDescr_t dnVecDescr_native;
+
+    // Copy Java inputs into native locals
+    dnVecDescr_native = (cusparseConstDnVecDescr_t)getNativePointerValue(env, dnVecDescr);
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseDestroyDnVec(dnVecDescr_native);
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDestroyDnMatNative(JNIEnv *env, jclass cls, jobject dnMatDescr) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, dnMatDescr, "dnMatDescr", "cusparseDestroyDnMat");
+
+    Logger::log(LOG_TRACE, "Executing cusparseDestroyDnMat(dnMatDescr=%p)\n", dnMatDescr);
+
+    // Declare native variables
+    cusparseConstDnMatDescr_t dnMatDescr_native;
+
+    // Copy Java inputs into native locals
+    dnMatDescr_native = (cusparseConstDnMatDescr_t)getNativePointerValue(env, dnMatDescr);
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseDestroyDnMat(dnMatDescr_native);
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDestroySpMatNative(JNIEnv *env, jclass cls, jobject spMatDescr) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, spMatDescr, "spMatDescr", "cusparseDestroySpMat");
+
+    Logger::log(LOG_TRACE, "Executing cusparseDestroySpMat(spMatDescr=%p)\n", spMatDescr);
+
+    // Declare native variables
+    cusparseConstSpMatDescr_t spMatDescr_native;
+
+    // Copy Java inputs into native locals
+    spMatDescr_native = (cusparseConstSpMatDescr_t)getNativePointerValue(env, spMatDescr);
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseDestroySpMat(spMatDescr_native);
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpMMNative
+(JNIEnv *env, jclass cls, jobject handle, jint opA, jint opB, jobject alpha, jobject matA, jobject matB, jobject beta, jobject matC, jint computeType, jint alg, jobject externalBuffer) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseSpMM");
+    CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cusparseSpMM");
+    CUJAVA_REQUIRE_NONNULL(env, matA, "matA", "cusparseSpMM");
+    CUJAVA_REQUIRE_NONNULL(env, matB, "matB", "cusparseSpMM");
+    CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cusparseSpMM");
+    CUJAVA_REQUIRE_NONNULL(env, matC, "matC", "cusparseSpMM");
+
+    Logger::log(LOG_TRACE, "Executing cusparseSpMM(handle=%p, opA=%d, opB=%d, alpha=%p, matA=%p, matB=%p, beta=%p, matC=%p, computeType=%d, alg=%d, externalBuffer=%p)\n",
+        handle, opA, opB, alpha, matA, matB, beta, matC, computeType, alg, externalBuffer);
+
+    // Declare native variables
+    cusparseHandle_t handle_native;
+    cusparseOperation_t opA_native;
+    cusparseOperation_t opB_native;
+    void * alpha_native = nullptr;
+    cusparseConstSpMatDescr_t matA_native;
+    cusparseConstDnMatDescr_t matB_native;
+    void * beta_native = nullptr;
+    cusparseDnMatDescr_t matC_native;
+    cudaDataType computeType_native;
+    cusparseSpMMAlg_t alg_native;
+    void * externalBuffer_native = nullptr;
+
+    // Copy Java inputs into native locals
+    handle_native = (cusparseHandle_t)getNativePointerValue(env, handle);
+    opA_native = (cusparseOperation_t)opA;
+    opB_native = (cusparseOperation_t)opB;
+    PointerData *alpha_pointerData = initPointerData(env, alpha);
+    if (alpha_pointerData == nullptr) {
+        return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    }
+    alpha_native = (void *)alpha_pointerData->getPointer(env);
+    matA_native = (cusparseConstSpMatDescr_t)getNativePointerValue(env, matA);
+    matB_native = (cusparseConstDnMatDescr_t)getNativePointerValue(env, matB);
+    PointerData *beta_pointerData = initPointerData(env, beta);
+    if (beta_pointerData == nullptr) {
+        return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    }
+    beta_native = (void *)beta_pointerData->getPointer(env);
+    matC_native = (cusparseDnMatDescr_t)getNativePointerValue(env, matC);
+    computeType_native = (cudaDataType)computeType;
+    alg_native = (cusparseSpMMAlg_t)alg;
+    externalBuffer_native = (void *)getPointer(env, externalBuffer);
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseSpMM(handle_native, opA_native, opB_native, alpha_native, matA_native,
+        matB_native, beta_native, matC_native, computeType_native, alg_native, externalBuffer_native);
+
+    if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    if (!releasePointerData(env, beta_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpMM_1bufferSizeNative
+    (JNIEnv *env, jclass cls, jobject handle, jint opA, jint opB, jobject alpha, jobject matA, jobject matB, jobject beta,
+     jobject matC, jint computeType, jint alg, jlongArray bufferSize) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseSpMM_bufferSize");
+    CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cusparseSpMM_bufferSize");
+    CUJAVA_REQUIRE_NONNULL(env, matA, "matA", "cusparseSpMM_bufferSize");
+    CUJAVA_REQUIRE_NONNULL(env, matB, "matB", "cusparseSpMM_bufferSize");
+    CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cusparseSpMM_bufferSize");
+    CUJAVA_REQUIRE_NONNULL(env, matC, "matC", "cusparseSpMM_bufferSize");
+    CUJAVA_REQUIRE_NONNULL(env, bufferSize, "bufferSize", "cusparseSpMM_bufferSize");
+
+    Logger::log(LOG_TRACE, "Executing cusparseSpMM_bufferSize(handle=%p, opA=%d, opB=%d, alpha=%p, matA=%p, matB=%p, beta=%p, matC=%p, computeType=%d, alg=%d, bufferSize=%p)\n",
+        handle, opA, opB, alpha, matA, matB, beta, matC, computeType, alg, bufferSize);
+
+    // Declare native variables
+    cusparseHandle_t handle_native;
+    cusparseOperation_t opA_native;
+    cusparseOperation_t opB_native;
+    void * alpha_native = nullptr;
+    cusparseConstSpMatDescr_t matA_native;
+    cusparseConstDnMatDescr_t matB_native;
+    void * beta_native = nullptr;
+    cusparseDnMatDescr_t matC_native;
+    cudaDataType computeType_native;
+    cusparseSpMMAlg_t alg_native;
+    size_t * bufferSize_native = nullptr;
+
+    // Copy Java inputs into native locals
+    handle_native = (cusparseHandle_t)getNativePointerValue(env, handle);
+    opA_native = (cusparseOperation_t)opA;
+    opB_native = (cusparseOperation_t)opB;
+    PointerData *alpha_pointerData = initPointerData(env, alpha);
+    if (alpha_pointerData == nullptr)
+    {
+        return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    }
+    alpha_native = (void *)alpha_pointerData->getPointer(env);
+    matA_native = (cusparseConstSpMatDescr_t)getNativePointerValue(env, matA);
+    matB_native = (cusparseConstDnMatDescr_t)getNativePointerValue(env, matB);
+    PointerData *beta_pointerData = initPointerData(env, beta);
+    if (beta_pointerData == nullptr) {
+        return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    }
+    beta_native = (void *)beta_pointerData->getPointer(env);
+    matC_native = (cusparseDnMatDescr_t)getNativePointerValue(env, matC);
+    computeType_native = (cudaDataType)computeType;
+    alg_native = (cusparseSpMMAlg_t)alg;
+    if (!initNative(env, bufferSize, bufferSize_native, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseSpMM_bufferSize(handle_native, opA_native, opB_native, alpha_native,
+        matA_native, matB_native, beta_native, matC_native, computeType_native, alg_native, bufferSize_native);
+
+    if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    if (!releasePointerData(env, beta_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    if (!releaseNative(env, bufferSize_native, bufferSize, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCreateDnMatNative
+    (JNIEnv *env, jclass cls, jobject dnMatDescr, jlong rows, jlong cols, jlong ld, jobject values, jint valueType, jint order) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, dnMatDescr, "dnMatDescr", "cusparseCreateDnMat");
+    CUJAVA_REQUIRE_NONNULL(env, values, "values", "cusparseCreateDnMat");
+
+    Logger::log(LOG_TRACE, "Executing cusparseCreateDnMat(dnMatDescr=%p, rows=%ld, cols=%ld, ld=%ld, values=%p, valueType=%d, order=%d)\n",
+        dnMatDescr, rows, cols, ld, values, valueType, order);
+
+    // Declare native variables
+    cusparseDnMatDescr_t dnMatDescr_native;
+    int64_t rows_native = 0;
+    int64_t cols_native = 0;
+    int64_t ld_native = 0;
+    void * values_native = nullptr;
+    cudaDataType valueType_native;
+    cusparseOrder_t order_native;
+
+    // Copy Java inputs into native locals
+    rows_native = (int64_t)rows;
+    cols_native = (int64_t)cols;
+    ld_native = (int64_t)ld;
+    values_native = (void *)getPointer(env, values);
+    valueType_native = (cudaDataType)valueType;
+    order_native = (cusparseOrder_t)order;
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseCreateDnMat(&dnMatDescr_native, rows_native, cols_native, ld_native,
+        values_native, valueType_native, order_native);
+    setNativePointerValue(env, dnMatDescr, (jlong)dnMatDescr_native);
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCsrSetPointersNative
+    (JNIEnv *env, jclass cls, jobject spMatDescr, jobject csrRowOffsets, jobject csrColInd, jobject csrValues) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, spMatDescr, "spMatDescr", "cusparseCsrSetPointers");
+    CUJAVA_REQUIRE_NONNULL(env, csrRowOffsets, "csrRowOffsets", "cusparseCsrSetPointers");
+    CUJAVA_REQUIRE_NONNULL(env, csrColInd, "csrColInd", "cusparseCsrSetPointers");
+    CUJAVA_REQUIRE_NONNULL(env, csrValues, "csrValues", "cusparseCsrSetPointers");
+
+    Logger::log(LOG_TRACE, "Executing cusparseCsrSetPointers(spMatDescr=%p, csrRowOffsets=%p, csrColInd=%p, csrValues=%p)\n",
+        spMatDescr, csrRowOffsets, csrColInd, csrValues);
+
+    // Declare native variables
+    cusparseSpMatDescr_t spMatDescr_native;
+    void * csrRowOffsets_native = nullptr;
+    void * csrColInd_native = nullptr;
+    void * csrValues_native = nullptr;
+
+    // Copy Java inputs into native locals
+    spMatDescr_native = (cusparseSpMatDescr_t)getNativePointerValue(env, spMatDescr);
+    csrRowOffsets_native = (void *)getPointer(env, csrRowOffsets);
+    csrColInd_native = (void *)getPointer(env, csrColInd);
+    csrValues_native = (void *)getPointer(env, csrValues);
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseCsrSetPointers(spMatDescr_native, csrRowOffsets_native, csrColInd_native, csrValues_native);
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCsr2cscEx2Native
+    (JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jint nnz, jobject csrVal, jobject csrRowPtr,
+     jobject csrColInd, jobject cscVal, jobject cscColPtr, jobject cscRowInd, jint valType, jint copyValues, jint idxBase, jint alg, jobject buffer) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseCsr2cscEx2");
+    CUJAVA_REQUIRE_NONNULL(env, csrVal, "csrVal", "cusparseCsr2cscEx2");
+    CUJAVA_REQUIRE_NONNULL(env, csrRowPtr, "csrRowPtr", "cusparseCsr2cscEx2");
+    CUJAVA_REQUIRE_NONNULL(env, csrColInd, "csrColInd", "cusparseCsr2cscEx2");
+    CUJAVA_REQUIRE_NONNULL(env, cscVal, "cscVal", "cusparseCsr2cscEx2");
+    CUJAVA_REQUIRE_NONNULL(env, cscColPtr, "cscColPtr", "cusparseCsr2cscEx2");
+    CUJAVA_REQUIRE_NONNULL(env, cscRowInd, "cscRowInd", "cusparseCsr2cscEx2");
+    CUJAVA_REQUIRE_NONNULL(env, buffer, "buffer", "cusparseCsr2cscEx2");
+
+    Logger::log(LOG_TRACE, "Executing cusparseCsr2cscEx2(handle=%p, m=%d, n=%d, nnz=%d, csrVal=%p, csrRowPtr=%p, csrColInd=%p, cscVal=%p, cscColPtr=%p, cscRowInd=%p, valType=%d, copyValues=%d, idxBase=%d, alg=%d, buffer=%p)\n",
+        handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, cscVal, cscColPtr, cscRowInd, valType, copyValues, idxBase, alg, buffer);
+
+    // Declare native variables
+    cusparseHandle_t handle_native;
+    int m_native = 0;
+    int n_native = 0;
+    int nnz_native = 0;
+    void * csrVal_native = nullptr;
+    int * csrRowPtr_native = nullptr;
+    int * csrColInd_native = nullptr;
+    void * cscVal_native = nullptr;
+    int * cscColPtr_native = nullptr;
+    int * cscRowInd_native = nullptr;
+    cudaDataType valType_native;
+    cusparseAction_t copyValues_native;
+    cusparseIndexBase_t idxBase_native;
+    cusparseCsr2CscAlg_t alg_native;
+    void * buffer_native = nullptr;
+
+    // Copy Java inputs into native locals
+    handle_native = (cusparseHandle_t)getNativePointerValue(env, handle);
+    m_native = (int)m;
+    n_native = (int)n;
+    nnz_native = (int)nnz;
+    csrVal_native = (void *)getPointer(env, csrVal);
+    csrRowPtr_native = (int *)getPointer(env, csrRowPtr);
+    csrColInd_native = (int *)getPointer(env, csrColInd);
+    cscVal_native = (void *)getPointer(env, cscVal);
+    cscColPtr_native = (int *)getPointer(env, cscColPtr);
+    cscRowInd_native = (int *)getPointer(env, cscRowInd);
+    valType_native = (cudaDataType)valType;
+    copyValues_native = (cusparseAction_t)copyValues;
+    idxBase_native = (cusparseIndexBase_t)idxBase;
+    alg_native = (cusparseCsr2CscAlg_t)alg;
+    buffer_native = (void *)getPointer(env, buffer);
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseCsr2cscEx2(handle_native, m_native, n_native, nnz_native,
+        csrVal_native, csrRowPtr_native, csrColInd_native, cscVal_native, cscColPtr_native, cscRowInd_native,
+        valType_native, copyValues_native, idxBase_native, alg_native, buffer_native);
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCsr2cscEx2_1bufferSizeNative
+    (JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jint nnz, jobject csrVal, jobject csrRowPtr, jobject csrColInd,
+     jobject cscVal, jobject cscColPtr, jobject cscRowInd, jint valType, jint copyValues, jint idxBase, jint alg, jlongArray bufferSize) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseCsr2cscEx2_bufferSize");
+    CUJAVA_REQUIRE_NONNULL(env, csrVal, "csrVal", "cusparseCsr2cscEx2_bufferSize");
+    CUJAVA_REQUIRE_NONNULL(env, csrRowPtr, "csrRowPtr", "cusparseCsr2cscEx2_bufferSize");
+    CUJAVA_REQUIRE_NONNULL(env, csrColInd, "csrColInd", "cusparseCsr2cscEx2_bufferSize");
+    CUJAVA_REQUIRE_NONNULL(env, cscVal, "cscVal", "cusparseCsr2cscEx2_bufferSize");
+    CUJAVA_REQUIRE_NONNULL(env, cscColPtr, "cscColPtr", "cusparseCsr2cscEx2_bufferSize");
+    CUJAVA_REQUIRE_NONNULL(env, cscRowInd, "cscRowInd", "cusparseCsr2cscEx2_bufferSize");
+    CUJAVA_REQUIRE_NONNULL(env, bufferSize, "bufferSize", "cusparseCsr2cscEx2_bufferSize");
+
+    Logger::log(LOG_TRACE, "Executing cusparseCsr2cscEx2_bufferSize(handle=%p, m=%d, n=%d, nnz=%d, csrVal=%p, csrRowPtr=%p, csrColInd=%p, cscVal=%p, cscColPtr=%p, cscRowInd=%p, valType=%d, copyValues=%d, idxBase=%d, alg=%d, bufferSize=%p)\n",
+        handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, cscVal, cscColPtr, cscRowInd, valType, copyValues, idxBase, alg, bufferSize);
+
+    // Declare native variables
+    cusparseHandle_t handle_native;
+    int m_native = 0;
+    int n_native = 0;
+    int nnz_native = 0;
+    void * csrVal_native = nullptr;
+    int * csrRowPtr_native = nullptr;
+    int * csrColInd_native = nullptr;
+    void * cscVal_native = nullptr;
+    int * cscColPtr_native = nullptr;
+    int * cscRowInd_native = nullptr;
+    cudaDataType valType_native;
+    cusparseAction_t copyValues_native;
+    cusparseIndexBase_t idxBase_native;
+    cusparseCsr2CscAlg_t alg_native;
+    size_t * bufferSize_native = nullptr;
+
+    // Copy Java inputs into native locals
+    handle_native = (cusparseHandle_t)getNativePointerValue(env, handle);
+    m_native = (int)m;
+    n_native = (int)n;
+    nnz_native = (int)nnz;
+    csrVal_native = (void *)getPointer(env, csrVal);
+    csrRowPtr_native = (int *)getPointer(env, csrRowPtr);
+    csrColInd_native = (int *)getPointer(env, csrColInd);
+    cscVal_native = (void *)getPointer(env, cscVal);
+    cscColPtr_native = (int *)getPointer(env, cscColPtr);
+    cscRowInd_native = (int *)getPointer(env, cscRowInd);
+    valType_native = (cudaDataType)valType;
+    copyValues_native = (cusparseAction_t)copyValues;
+    idxBase_native = (cusparseIndexBase_t)idxBase;
+    alg_native = (cusparseCsr2CscAlg_t)alg;
+    if (!initNative(env, bufferSize, bufferSize_native, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseCsr2cscEx2_bufferSize
+        (handle_native, m_native, n_native, nnz_native, csrVal_native, csrRowPtr_native, csrColInd_native, cscVal_native,
+         cscColPtr_native, cscRowInd_native, valType_native, copyValues_native, idxBase_native, alg_native, bufferSize_native);
+    if (!releaseNative(env, bufferSize_native, bufferSize, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDcsrgeam2Native
+    (JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jobject alpha, jobject descrA, jint nnzA, jobject csrSortedValA,
+     jobject csrSortedRowPtrA, jobject csrSortedColIndA, jobject beta, jobject descrB, jint nnzB, jobject csrSortedValB,
+     jobject csrSortedRowPtrB, jobject csrSortedColIndB, jobject descrC, jobject csrSortedValC, jobject csrSortedRowPtrC, jobject csrSortedColIndC, jobject pBuffer) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseDcsrgeam2");
+    CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cusparseDcsrgeam2");
+    CUJAVA_REQUIRE_NONNULL(env, descrA, "descrA", "cusparseDcsrgeam2");
+    CUJAVA_REQUIRE_NONNULL(env, csrSortedValA, "csrSortedValA", "cusparseDcsrgeam2");
+    CUJAVA_REQUIRE_NONNULL(env, csrSortedRowPtrA, "csrSortedRowPtrA", "cusparseDcsrgeam2");
+    CUJAVA_REQUIRE_NONNULL(env, csrSortedColIndA, "csrSortedColIndA", "cusparseDcsrgeam2");
+    CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cusparseDcsrgeam2");
+    CUJAVA_REQUIRE_NONNULL(env, descrB, "descrB", "cusparseDcsrgeam2");
+    CUJAVA_REQUIRE_NONNULL(env, csrSortedValB, "csrSortedValB", "cusparseDcsrgeam2");
+    CUJAVA_REQUIRE_NONNULL(env, csrSortedRowPtrB, "csrSortedRowPtrB", "cusparseDcsrgeam2");
+    CUJAVA_REQUIRE_NONNULL(env, csrSortedColIndB, "csrSortedColIndB", "cusparseDcsrgeam2");
+    CUJAVA_REQUIRE_NONNULL(env, descrC, "descrC", "cusparseDcsrgeam2");
+    CUJAVA_REQUIRE_NONNULL(env, csrSortedValC, "csrSortedValC", "cusparseDcsrgeam2");
+    CUJAVA_REQUIRE_NONNULL(env, csrSortedRowPtrC, "csrSortedRowPtrC", "cusparseDcsrgeam2");
+    CUJAVA_REQUIRE_NONNULL(env, csrSortedColIndC, "csrSortedColIndC", "cusparseDcsrgeam2");
+    CUJAVA_REQUIRE_NONNULL(env, pBuffer, "pBuffer", "cusparseDcsrgeam2");
+
+    Logger::log(LOG_TRACE, "Executing cusparseDcsrgeam2(handle=%p, m=%d, n=%d, alpha=%p, descrA=%p, nnzA=%d, csrSortedValA=%p, csrSortedRowPtrA=%p, csrSortedColIndA=%p, beta=%p, descrB=%p, nnzB=%d, csrSortedValB=%p, csrSortedRowPtrB=%p, csrSortedColIndB=%p, descrC=%p, csrSortedValC=%p, csrSortedRowPtrC=%p, csrSortedColIndC=%p, pBuffer=%p)\n",
+        handle, m, n, alpha, descrA, nnzA, csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer);
+
+    // Declare native variables
+    cusparseHandle_t handle_native;
+    int m_native = 0;
+    int n_native = 0;
+    double * alpha_native = nullptr;
+    cusparseMatDescr_t descrA_native;
+    int nnzA_native = 0;
+    double * csrSortedValA_native = nullptr;
+    int * csrSortedRowPtrA_native = nullptr;
+    int * csrSortedColIndA_native = nullptr;
+    double * beta_native = nullptr;
+    cusparseMatDescr_t descrB_native;
+    int nnzB_native = 0;
+    double * csrSortedValB_native = nullptr;
+    int * csrSortedRowPtrB_native = nullptr;
+    int * csrSortedColIndB_native = nullptr;
+    cusparseMatDescr_t descrC_native;
+    double * csrSortedValC_native = nullptr;
+    int * csrSortedRowPtrC_native = nullptr;
+    int * csrSortedColIndC_native = nullptr;
+    void * pBuffer_native = nullptr;
+
+    // Copy Java inputs into native locals
+    handle_native = (cusparseHandle_t)getNativePointerValue(env, handle);
+    m_native = (int)m;
+    n_native = (int)n;
+    PointerData *alpha_pointerData = initPointerData(env, alpha);
+    if (alpha_pointerData == nullptr) {
+        return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    }
+    alpha_native = (double *)alpha_pointerData->getPointer(env);
+    descrA_native = (cusparseMatDescr_t)getNativePointerValue(env, descrA);
+    nnzA_native = (int)nnzA;
+    csrSortedValA_native = (double *)getPointer(env, csrSortedValA);
+    csrSortedRowPtrA_native = (int *)getPointer(env, csrSortedRowPtrA);
+    csrSortedColIndA_native = (int *)getPointer(env, csrSortedColIndA);
+    PointerData *beta_pointerData = initPointerData(env, beta);
+    if (beta_pointerData == nullptr) {
+        return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    }
+    beta_native = (double *)beta_pointerData->getPointer(env);
+    descrB_native = (cusparseMatDescr_t)getNativePointerValue(env, descrB);
+    nnzB_native = (int)nnzB;
+    csrSortedValB_native = (double *)getPointer(env, csrSortedValB);
+    csrSortedRowPtrB_native = (int *)getPointer(env, csrSortedRowPtrB);
+    csrSortedColIndB_native = (int *)getPointer(env, csrSortedColIndB);
+    descrC_native = (cusparseMatDescr_t)getNativePointerValue(env, descrC);
+    csrSortedValC_native = (double *)getPointer(env, csrSortedValC);
+    csrSortedRowPtrC_native = (int *)getPointer(env, csrSortedRowPtrC);
+    csrSortedColIndC_native = (int *)getPointer(env, csrSortedColIndC);
+    pBuffer_native = (void *)getPointer(env, pBuffer);
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseDcsrgeam2(handle_native, m_native, n_native, alpha_native, descrA_native,
+        nnzA_native, csrSortedValA_native, csrSortedRowPtrA_native, csrSortedColIndA_native, beta_native, descrB_native,
+         nnzB_native, csrSortedValB_native, csrSortedRowPtrB_native, csrSortedColIndB_native, descrC_native, csrSortedValC_native,
+         csrSortedRowPtrC_native, csrSortedColIndC_native, pBuffer_native);
+
+    if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    if (!releasePointerData(env, beta_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDcsrgeam2_1bufferSizeExtNative
+    (JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jobject alpha, jobject descrA, jint nnzA, jobject csrSortedValA,
+     jobject csrSortedRowPtrA, jobject csrSortedColIndA, jobject beta, jobject descrB, jint nnzB, jobject csrSortedValB, jobject csrSortedRowPtrB,
+     jobject csrSortedColIndB, jobject descrC, jobject csrSortedValC, jobject csrSortedRowPtrC, jobject csrSortedColIndC, jlongArray pBufferSizeInBytes) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseDcsrgeam2_bufferSizeExt");
+    CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cusparseDcsrgeam2_bufferSizeExt");
+    CUJAVA_REQUIRE_NONNULL(env, descrA, "descrA", "cusparseDcsrgeam2_bufferSizeExt");
+    CUJAVA_REQUIRE_NONNULL(env, csrSortedValA, "csrSortedValA", "cusparseDcsrgeam2_bufferSizeExt");
+    CUJAVA_REQUIRE_NONNULL(env, csrSortedRowPtrA, "csrSortedRowPtrA", "cusparseDcsrgeam2_bufferSizeExt");
+    CUJAVA_REQUIRE_NONNULL(env, csrSortedColIndA, "csrSortedColIndA", "cusparseDcsrgeam2_bufferSizeExt");
+    CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cusparseDcsrgeam2_bufferSizeExt");
+    CUJAVA_REQUIRE_NONNULL(env, descrB, "descrB", "cusparseDcsrgeam2_bufferSizeExt");
+    CUJAVA_REQUIRE_NONNULL(env, csrSortedValB, "csrSortedValB", "cusparseDcsrgeam2_bufferSizeExt");
+    CUJAVA_REQUIRE_NONNULL(env, csrSortedRowPtrB, "csrSortedRowPtrB", "cusparseDcsrgeam2_bufferSizeExt");
+    CUJAVA_REQUIRE_NONNULL(env, csrSortedColIndB, "csrSortedColIndB", "cusparseDcsrgeam2_bufferSizeExt");
+    CUJAVA_REQUIRE_NONNULL(env, descrC, "descrC", "cusparseDcsrgeam2_bufferSizeExt");
+    CUJAVA_REQUIRE_NONNULL(env, csrSortedValC, "csrSortedValC", "cusparseDcsrgeam2_bufferSizeExt");
+    CUJAVA_REQUIRE_NONNULL(env, csrSortedRowPtrC, "csrSortedRowPtrC", "cusparseDcsrgeam2_bufferSizeExt");
+    CUJAVA_REQUIRE_NONNULL(env, csrSortedColIndC, "csrSortedColIndC", "cusparseDcsrgeam2_bufferSizeExt");
+    CUJAVA_REQUIRE_NONNULL(env, pBufferSizeInBytes, "pBufferSizeInBytes", "cusparseDcsrgeam2_bufferSizeExt");
+
+    Logger::log(LOG_TRACE, "Executing cusparseDcsrgeam2_bufferSizeExt(handle=%p, m=%d, n=%d, alpha=%p, descrA=%p, nnzA=%d, csrSortedValA=%p, csrSortedRowPtrA=%p, csrSortedColIndA=%p, beta=%p, descrB=%p, nnzB=%d, csrSortedValB=%p, csrSortedRowPtrB=%p, csrSortedColIndB=%p, descrC=%p, csrSortedValC=%p, csrSortedRowPtrC=%p, csrSortedColIndC=%p, pBufferSizeInBytes=%p)\n",
+        handle, m, n, alpha, descrA, nnzA, csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes);
+
+    // Declare native variables
+    cusparseHandle_t handle_native;
+    int m_native = 0;
+    int n_native = 0;
+    double * alpha_native = nullptr;
+    cusparseMatDescr_t descrA_native;
+    int nnzA_native = 0;
+    double * csrSortedValA_native = nullptr;
+    int * csrSortedRowPtrA_native = nullptr;
+    int * csrSortedColIndA_native = nullptr;
+    double * beta_native = nullptr;
+    cusparseMatDescr_t descrB_native;
+    int nnzB_native = 0;
+    double * csrSortedValB_native = nullptr;
+    int * csrSortedRowPtrB_native = nullptr;
+    int * csrSortedColIndB_native = nullptr;
+    cusparseMatDescr_t descrC_native;
+    double * csrSortedValC_native = nullptr;
+    int * csrSortedRowPtrC_native = nullptr;
+    int * csrSortedColIndC_native = nullptr;
+    size_t * pBufferSizeInBytes_native = nullptr;
+
+    // Copy Java inputs into native locals
+    handle_native = (cusparseHandle_t)getNativePointerValue(env, handle);
+    m_native = (int)m;
+    n_native = (int)n;
+    PointerData *alpha_pointerData = initPointerData(env, alpha);
+    if (alpha_pointerData == nullptr) {
+        return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    }
+    alpha_native = (double *)alpha_pointerData->getPointer(env);
+    descrA_native = (cusparseMatDescr_t)getNativePointerValue(env, descrA);
+    nnzA_native = (int)nnzA;
+    csrSortedValA_native = (double *)getPointer(env, csrSortedValA);
+    csrSortedRowPtrA_native = (int *)getPointer(env, csrSortedRowPtrA);
+    csrSortedColIndA_native = (int *)getPointer(env, csrSortedColIndA);
+    PointerData *beta_pointerData = initPointerData(env, beta);
+    if (beta_pointerData == nullptr) {
+        return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    }
+    beta_native = (double *)beta_pointerData->getPointer(env);
+    descrB_native = (cusparseMatDescr_t)getNativePointerValue(env, descrB);
+    nnzB_native = (int)nnzB;
+    csrSortedValB_native = (double *)getPointer(env, csrSortedValB);
+    csrSortedRowPtrB_native = (int *)getPointer(env, csrSortedRowPtrB);
+    csrSortedColIndB_native = (int *)getPointer(env, csrSortedColIndB);
+    descrC_native = (cusparseMatDescr_t)getNativePointerValue(env, descrC);
+    csrSortedValC_native = (double *)getPointer(env, csrSortedValC);
+    csrSortedRowPtrC_native = (int *)getPointer(env, csrSortedRowPtrC);
+    csrSortedColIndC_native = (int *)getPointer(env, csrSortedColIndC);
+    if (!initNative(env, pBufferSizeInBytes, pBufferSizeInBytes_native, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseDcsrgeam2_bufferSizeExt(handle_native, m_native, n_native, alpha_native,
+        descrA_native, nnzA_native, csrSortedValA_native, csrSortedRowPtrA_native, csrSortedColIndA_native, beta_native,
+        descrB_native, nnzB_native, csrSortedValB_native, csrSortedRowPtrB_native, csrSortedColIndB_native, descrC_native,
+        csrSortedValC_native, csrSortedRowPtrC_native, csrSortedColIndC_native, pBufferSizeInBytes_native);
+
+    if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    if (!releasePointerData(env, beta_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    if (!releaseNative(env, pBufferSizeInBytes_native, pBufferSizeInBytes, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSparseToDenseNative
+    (JNIEnv *env, jclass cls, jobject handle, jobject matA, jobject matB, jint alg, jobject externalBuffer) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseSparseToDense");
+    CUJAVA_REQUIRE_NONNULL(env, matA, "matA", "cusparseSparseToDense");
+    CUJAVA_REQUIRE_NONNULL(env, matB, "matB", "cusparseSparseToDense");
+
+    Logger::log(LOG_TRACE, "Executing cusparseSparseToDense(handle=%p, matA=%p, matB=%p, alg=%d, externalBuffer=%p)\n",
+        handle, matA, matB, alg, externalBuffer);
+
+    // Declare native variables
+    cusparseHandle_t handle_native;
+    cusparseConstSpMatDescr_t matA_native;
+    cusparseDnMatDescr_t matB_native;
+    cusparseSparseToDenseAlg_t alg_native;
+    void * externalBuffer_native = nullptr;
+
+    // Copy Java inputs into native locals
+    handle_native = (cusparseHandle_t)getNativePointerValue(env, handle);
+    matA_native = (cusparseConstSpMatDescr_t)getNativePointerValue(env, matA);
+    matB_native = (cusparseDnMatDescr_t)getNativePointerValue(env, matB);
+    alg_native = (cusparseSparseToDenseAlg_t)alg;
+    externalBuffer_native = (void *)getPointer(env, externalBuffer);
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseSparseToDense(handle_native, matA_native, matB_native, alg_native, externalBuffer_native);
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSparseToDense_1bufferSizeNative
+    (JNIEnv *env, jclass cls, jobject handle, jobject matA, jobject matB, jint alg, jlongArray bufferSize) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseSparseToDense_bufferSize");
+    CUJAVA_REQUIRE_NONNULL(env, matA, "matA", "cusparseSparseToDense_bufferSize");
+    CUJAVA_REQUIRE_NONNULL(env, matB, "matB", "cusparseSparseToDense_bufferSize");
+    CUJAVA_REQUIRE_NONNULL(env, bufferSize, "bufferSize", "cusparseSparseToDense_bufferSize");
+
+    Logger::log(LOG_TRACE, "Executing cusparseSparseToDense_bufferSize(handle=%p, matA=%p, matB=%p, alg=%d, bufferSize=%p)\n",
+        handle, matA, matB, alg, bufferSize);
+
+    // Declare native variables
+    cusparseHandle_t handle_native;
+    cusparseConstSpMatDescr_t matA_native;
+    cusparseDnMatDescr_t matB_native;
+    cusparseSparseToDenseAlg_t alg_native;
+    size_t * bufferSize_native = nullptr;
+
+    // Copy Java inputs into native locals
+    handle_native = (cusparseHandle_t)getNativePointerValue(env, handle);
+    matA_native = (cusparseConstSpMatDescr_t)getNativePointerValue(env, matA);
+    matB_native = (cusparseDnMatDescr_t)getNativePointerValue(env, matB);
+    alg_native = (cusparseSparseToDenseAlg_t)alg;
+    if (!initNative(env, bufferSize, bufferSize_native, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseSparseToDense_bufferSize(handle_native, matA_native, matB_native, alg_native, bufferSize_native);
+
+    if (!releaseNative(env, bufferSize_native, bufferSize, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDenseToSparse_1bufferSizeNative
+    (JNIEnv *env, jclass cls, jobject handle, jobject matA, jobject matB, jint alg, jlongArray bufferSize) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseDenseToSparse_bufferSize");
+    CUJAVA_REQUIRE_NONNULL(env, matA, "matA", "cusparseDenseToSparse_bufferSize");
+    CUJAVA_REQUIRE_NONNULL(env, matB, "matB", "cusparseDenseToSparse_bufferSize");
+    CUJAVA_REQUIRE_NONNULL(env, bufferSize, "bufferSize", "cusparseDenseToSparse_bufferSize");
+
+    Logger::log(LOG_TRACE, "Executing cusparseDenseToSparse_bufferSize(handle=%p, matA=%p, matB=%p, alg=%d, bufferSize=%p)\n", handle, matA, matB, alg, bufferSize);
+
+    // Declare native variables
+    cusparseHandle_t handle_native;
+    cusparseConstDnMatDescr_t matA_native;
+    cusparseSpMatDescr_t matB_native;
+    cusparseDenseToSparseAlg_t alg_native;
+    size_t * bufferSize_native = nullptr;
+
+    // Copy Java inputs into native locals
+    handle_native = (cusparseHandle_t)getNativePointerValue(env, handle);
+    matA_native = (cusparseConstDnMatDescr_t)getNativePointerValue(env, matA);
+    matB_native = (cusparseSpMatDescr_t)getNativePointerValue(env, matB);
+    alg_native = (cusparseDenseToSparseAlg_t)alg;
+    if (!initNative(env, bufferSize, bufferSize_native, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseDenseToSparse_bufferSize(handle_native, matA_native, matB_native, alg_native, bufferSize_native);
+
+    if (!releaseNative(env, bufferSize_native, bufferSize, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDenseToSparse_1analysisNative
+    (JNIEnv *env, jclass cls, jobject handle, jobject matA, jobject matB, jint alg, jobject externalBuffer) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseDenseToSparse_analysis");
+    CUJAVA_REQUIRE_NONNULL(env, matA, "matA", "cusparseDenseToSparse_analysis");
+    CUJAVA_REQUIRE_NONNULL(env, matB, "matB", "cusparseDenseToSparse_analysis");
+
+    Logger::log(LOG_TRACE, "Executing cusparseDenseToSparse_analysis(handle=%p, matA=%p, matB=%p, alg=%d, externalBuffer=%p)\n",
+        handle, matA, matB, alg, externalBuffer);
+
+    // Declare native variables
+    cusparseHandle_t handle_native;
+    cusparseConstDnMatDescr_t matA_native;
+    cusparseSpMatDescr_t matB_native;
+    cusparseDenseToSparseAlg_t alg_native;
+    void * externalBuffer_native = nullptr;
+
+    // Copy Java inputs into native locals
+    handle_native = (cusparseHandle_t)getNativePointerValue(env, handle);
+    matA_native = (cusparseConstDnMatDescr_t)getNativePointerValue(env, matA);
+    matB_native = (cusparseSpMatDescr_t)getNativePointerValue(env, matB);
+    alg_native = (cusparseDenseToSparseAlg_t)alg;
+    externalBuffer_native = (void *)getPointer(env, externalBuffer);
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseDenseToSparse_analysis(handle_native, matA_native, matB_native, alg_native, externalBuffer_native);
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDenseToSparse_1convertNative
+    (JNIEnv *env, jclass cls, jobject handle, jobject matA, jobject matB, jint alg, jobject externalBuffer) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseDenseToSparse_convert");
+    CUJAVA_REQUIRE_NONNULL(env, matA, "matA", "cusparseDenseToSparse_convert");
+    CUJAVA_REQUIRE_NONNULL(env, matB, "matB", "cusparseDenseToSparse_convert");
+
+    Logger::log(LOG_TRACE, "Executing cusparseDenseToSparse_convert(handle=%p, matA=%p, matB=%p, alg=%d, externalBuffer=%p)\n",
+        handle, matA, matB, alg, externalBuffer);
+
+    // Declare native variables
+    cusparseHandle_t handle_native;
+    cusparseConstDnMatDescr_t matA_native;
+    cusparseSpMatDescr_t matB_native;
+    cusparseDenseToSparseAlg_t alg_native;
+    void * externalBuffer_native = nullptr;
+
+    // Copy Java inputs into native locals
+    handle_native = (cusparseHandle_t)getNativePointerValue(env, handle);
+    matA_native = (cusparseConstDnMatDescr_t)getNativePointerValue(env, matA);
+    matB_native = (cusparseSpMatDescr_t)getNativePointerValue(env, matB);
+    alg_native = (cusparseDenseToSparseAlg_t)alg;
+    externalBuffer_native = (void *)getPointer(env, externalBuffer);
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseDenseToSparse_convert(handle_native, matA_native, matB_native, alg_native, externalBuffer_native);
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDnnzNative
+    (JNIEnv *env, jclass cls, jobject handle, jint dirA, jint m, jint n, jobject descrA, jobject A, jint lda, jobject nnzPerRowCol, jobject nnzTotalDevHostPtr) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseDnnz");
+    CUJAVA_REQUIRE_NONNULL(env, descrA, "descrA", "cusparseDnnz");
+    CUJAVA_REQUIRE_NONNULL(env, A, "A", "cusparseDnnz");
+    CUJAVA_REQUIRE_NONNULL(env, nnzPerRowCol, "nnzPerRowCol", "cusparseDnnz");
+    CUJAVA_REQUIRE_NONNULL(env, nnzTotalDevHostPtr, "nnzTotalDevHostPtr", "cusparseDnnz");
+
+    Logger::log(LOG_TRACE, "Executing cusparseDnnz(handle=%p, dirA=%d, m=%d, n=%d, descrA=%p, A=%p, lda=%d, nnzPerRowCol=%p, nnzTotalDevHostPtr=%p)\n",
+        handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, nnzTotalDevHostPtr);
+
+    // Declare native variables
+    cusparseHandle_t handle_native;
+    cusparseDirection_t dirA_native;
+    int m_native = 0;
+    int n_native = 0;
+    cusparseMatDescr_t descrA_native;
+    double * A_native = nullptr;
+    int lda_native = 0;
+    int * nnzPerRowCol_native = nullptr;
+    int * nnzTotalDevHostPtr_native = nullptr;
+
+    // Copy Java inputs into native locals
+    handle_native = (cusparseHandle_t)getNativePointerValue(env, handle);
+    dirA_native = (cusparseDirection_t)dirA;
+    m_native = (int)m;
+    n_native = (int)n;
+    descrA_native = (cusparseMatDescr_t)getNativePointerValue(env, descrA);
+    A_native = (double *)getPointer(env, A);
+    lda_native = (int)lda;
+    nnzPerRowCol_native = (int *)getPointer(env, nnzPerRowCol);
+    PointerData *nnzTotalDevHostPtr_pointerData = initPointerData(env, nnzTotalDevHostPtr);
+    if (nnzTotalDevHostPtr_pointerData == nullptr) {
+        return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    }
+    nnzTotalDevHostPtr_native = (int *)nnzTotalDevHostPtr_pointerData->getPointer(env);
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseDnnz(handle_native, dirA_native, m_native, n_native, descrA_native, A_native,
+        lda_native, nnzPerRowCol_native, nnzTotalDevHostPtr_native);
+
+    if (!isPointerBackedByNativeMemory(env, nnzTotalDevHostPtr)) {
+        cudaDeviceSynchronize();
+    }
+    if (!releasePointerData(env, nnzTotalDevHostPtr_pointerData, 0)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSetMatTypeNative
+    (JNIEnv *env, jclass cls, jobject descrA, jint type) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, descrA, "descrA", "cusparseSetMatType");
+
+    Logger::log(LOG_TRACE, "Executing cusparseSetMatType(descrA=%p, type=%d)\n", descrA, type);
+
+    // Declare native variables
+    cusparseMatDescr_t descrA_native;
+    cusparseMatrixType_t type_native;
+
+    // Copy Java inputs into native locals
+    descrA_native = (cusparseMatDescr_t)getNativePointerValue(env, descrA);
+    type_native = (cusparseMatrixType_t)type;
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseSetMatType(descrA_native, type_native);
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSetMatIndexBaseNative
+    (JNIEnv *env, jclass cls, jobject descrA, jint base) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, descrA, "descrA", "cusparseSetMatIndexBase");
+
+    Logger::log(LOG_TRACE, "Executing cusparseSetMatIndexBase(descrA=%p, base=%d)\n", descrA, base);
+
+    // Declare native variables
+    cusparseMatDescr_t descrA_native;
+    cusparseIndexBase_t base_native;
+
+    // Copy Java inputs into native locals
+    descrA_native = (cusparseMatDescr_t)getNativePointerValue(env, descrA);
+    base_native = (cusparseIndexBase_t)base;
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseSetMatIndexBase(descrA_native, base_native);
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSetPointerModeNative
+    (JNIEnv *env, jclass cls, jobject handle, jint mode) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseSetPointerMode");
+
+    Logger::log(LOG_TRACE, "Executing cusparseSetPointerMode(handle=%p, mode=%d)\n", handle, mode);
+
+    // Declare native variables
+    cusparseHandle_t handle_native;
+    cusparsePointerMode_t mode_native;
+
+    // Copy Java inputs into native locals
+    handle_native = (cusparseHandle_t)getNativePointerValue(env, handle);
+    mode_native = (cusparsePointerMode_t)mode;
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseSetPointerMode(handle_native, mode_native);
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseXcsrgeam2NnzNative
+    (JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jobject descrA, jint nnzA, jobject csrSortedRowPtrA, jobject csrSortedColIndA,
+     jobject descrB, jint nnzB, jobject csrSortedRowPtrB, jobject csrSortedColIndB, jobject descrC, jobject csrSortedRowPtrC, jobject nnzTotalDevHostPtr, jobject workspace) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseXcsrgeam2Nnz");
+    CUJAVA_REQUIRE_NONNULL(env, descrA, "descrA", "cusparseXcsrgeam2Nnz");
+    CUJAVA_REQUIRE_NONNULL(env, csrSortedRowPtrA, "csrSortedRowPtrA", "cusparseXcsrgeam2Nnz");
+    CUJAVA_REQUIRE_NONNULL(env, csrSortedColIndA, "csrSortedColIndA", "cusparseXcsrgeam2Nnz");
+    CUJAVA_REQUIRE_NONNULL(env, descrB, "descrB", "cusparseXcsrgeam2Nnz");
+    CUJAVA_REQUIRE_NONNULL(env, csrSortedRowPtrB, "csrSortedRowPtrB", "cusparseXcsrgeam2Nnz");
+    CUJAVA_REQUIRE_NONNULL(env, csrSortedColIndB, "csrSortedColIndB", "cusparseXcsrgeam2Nnz");
+    CUJAVA_REQUIRE_NONNULL(env, descrC, "descrC", "cusparseXcsrgeam2Nnz");
+    CUJAVA_REQUIRE_NONNULL(env, csrSortedRowPtrC, "csrSortedRowPtrC", "cusparseXcsrgeam2Nnz");
+    CUJAVA_REQUIRE_NONNULL(env, nnzTotalDevHostPtr, "nnzTotalDevHostPtr", "cusparseXcsrgeam2Nnz");
+    CUJAVA_REQUIRE_NONNULL(env, workspace, "workspace", "cusparseXcsrgeam2Nnz");
+
+    // Log message
+    Logger::log(LOG_TRACE, "Executing cusparseXcsrgeam2Nnz(handle=%p, m=%d, n=%d, descrA=%p, nnzA=%d, csrSortedRowPtrA=%p, csrSortedColIndA=%p, descrB=%p, nnzB=%d, csrSortedRowPtrB=%p, csrSortedColIndB=%p, descrC=%p, csrSortedRowPtrC=%p, nnzTotalDevHostPtr=%p, workspace=%p)\n",
+        handle, m, n, descrA, nnzA, csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, csrSortedColIndB, descrC, csrSortedRowPtrC, nnzTotalDevHostPtr, workspace);
+
+    // Declare native variables
+    cusparseHandle_t handle_native;
+    int m_native = 0;
+    int n_native = 0;
+    cusparseMatDescr_t descrA_native;
+    int nnzA_native = 0;
+    int * csrSortedRowPtrA_native = nullptr;
+    int * csrSortedColIndA_native = nullptr;
+    cusparseMatDescr_t descrB_native;
+    int nnzB_native = 0;
+    int * csrSortedRowPtrB_native = nullptr;
+    int * csrSortedColIndB_native = nullptr;
+    cusparseMatDescr_t descrC_native;
+    int * csrSortedRowPtrC_native = nullptr;
+    int * nnzTotalDevHostPtr_native = nullptr;
+    void * workspace_native = nullptr;
+
+    // Copy Java inputs into native locals
+    handle_native = (cusparseHandle_t)getNativePointerValue(env, handle);
+    m_native = (int)m;
+    n_native = (int)n;
+    descrA_native = (cusparseMatDescr_t)getNativePointerValue(env, descrA);
+    nnzA_native = (int)nnzA;
+    csrSortedRowPtrA_native = (int *)getPointer(env, csrSortedRowPtrA);
+    csrSortedColIndA_native = (int *)getPointer(env, csrSortedColIndA);
+    descrB_native = (cusparseMatDescr_t)getNativePointerValue(env, descrB);
+    nnzB_native = (int)nnzB;
+    csrSortedRowPtrB_native = (int *)getPointer(env, csrSortedRowPtrB);
+    csrSortedColIndB_native = (int *)getPointer(env, csrSortedColIndB);
+    descrC_native = (cusparseMatDescr_t)getNativePointerValue(env, descrC);
+    csrSortedRowPtrC_native = (int *)getPointer(env, csrSortedRowPtrC);
+    PointerData *nnzTotalDevHostPtr_pointerData = initPointerData(env, nnzTotalDevHostPtr);
+
+    if (nnzTotalDevHostPtr_pointerData == nullptr) {
+        return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    }
+    nnzTotalDevHostPtr_native = (int *)nnzTotalDevHostPtr_pointerData->getPointer(env);
+    workspace_native = (void *)getPointer(env, workspace);
+
+    cusparseStatus_t jniResult_native = cusparseXcsrgeam2Nnz(handle_native, m_native, n_native, descrA_native, nnzA_native,
+        csrSortedRowPtrA_native, csrSortedColIndA_native, descrB_native, nnzB_native, csrSortedRowPtrB_native, csrSortedColIndB_native,
+        descrC_native, csrSortedRowPtrC_native, nnzTotalDevHostPtr_native, workspace_native);
+
+    if (!isPointerBackedByNativeMemory(env, nnzTotalDevHostPtr)) {
+        cudaDeviceSynchronize();
+    }
+    if (!releasePointerData(env, nnzTotalDevHostPtr_pointerData, 0)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpGEMM_1workEstimationNative
+    (JNIEnv *env, jclass cls, jobject handle, jint opA, jint opB, jobject alpha, jobject matA, jobject matB, jobject beta,
+     jobject matC, jint computeType, jint alg, jobject spgemmDescr, jlongArray bufferSize1, jobject externalBuffer1) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseSpGEMM_workEstimation");
+    CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cusparseSpGEMM_workEstimation");
+    CUJAVA_REQUIRE_NONNULL(env, matA, "matA", "cusparseSpGEMM_workEstimation");
+    CUJAVA_REQUIRE_NONNULL(env, matB, "matB", "cusparseSpGEMM_workEstimation");
+    CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cusparseSpGEMM_workEstimation");
+    CUJAVA_REQUIRE_NONNULL(env, matC, "matC", "cusparseSpGEMM_workEstimation");
+    CUJAVA_REQUIRE_NONNULL(env, spgemmDescr, "spgemmDescr", "cusparseSpGEMM_workEstimation");
+    CUJAVA_REQUIRE_NONNULL(env, bufferSize1, "bufferSize1", "cusparseSpGEMM_workEstimation");
+
+    Logger::log(LOG_TRACE, "Executing cusparseSpGEMM_workEstimation(handle=%p, opA=%d, opB=%d, alpha=%p, matA=%p, matB=%p, beta=%p, matC=%p, computeType=%d, alg=%d, spgemmDescr=%p, bufferSize1=%p, externalBuffer1=%p)\n",
+        handle, opA, opB, alpha, matA, matB, beta, matC, computeType, alg, spgemmDescr, bufferSize1, externalBuffer1);
+
+    // Declare native variables
+    cusparseHandle_t handle_native;
+    cusparseOperation_t opA_native;
+    cusparseOperation_t opB_native;
+    void * alpha_native = nullptr;
+    cusparseConstSpMatDescr_t matA_native;
+    cusparseConstSpMatDescr_t matB_native;
+    void * beta_native = nullptr;
+    cusparseSpMatDescr_t matC_native;
+    cudaDataType computeType_native;
+    cusparseSpGEMMAlg_t alg_native;
+    cusparseSpGEMMDescr_t spgemmDescr_native;
+    size_t * bufferSize1_native = nullptr;
+    void * externalBuffer1_native = nullptr;
+
+    // Copy Java inputs into native locals
+    handle_native = (cusparseHandle_t)getNativePointerValue(env, handle);
+    opA_native = (cusparseOperation_t)opA;
+    opB_native = (cusparseOperation_t)opB;
+    PointerData *alpha_pointerData = initPointerData(env, alpha);
+    if (alpha_pointerData == nullptr) {
+        return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    }
+    alpha_native = (void *)alpha_pointerData->getPointer(env);
+    matA_native = (cusparseConstSpMatDescr_t)getNativePointerValue(env, matA);
+    matB_native = (cusparseConstSpMatDescr_t)getNativePointerValue(env, matB);
+    PointerData *beta_pointerData = initPointerData(env, beta);
+    if (beta_pointerData == nullptr) {
+        return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    }
+    beta_native = (void *)beta_pointerData->getPointer(env);
+    matC_native = (cusparseSpMatDescr_t)getNativePointerValue(env, matC);
+    computeType_native = (cudaDataType)computeType;
+    alg_native = (cusparseSpGEMMAlg_t)alg;
+    spgemmDescr_native = (cusparseSpGEMMDescr_t)getNativePointerValue(env, spgemmDescr);
+    if (!initNative(env, bufferSize1, bufferSize1_native, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    externalBuffer1_native = (void *)getPointer(env, externalBuffer1);
+
+    cusparseStatus_t jniResult_native = cusparseSpGEMM_workEstimation(handle_native, opA_native, opB_native, alpha_native,
+        matA_native, matB_native, beta_native, matC_native, computeType_native, alg_native, spgemmDescr_native, bufferSize1_native, externalBuffer1_native);
+
+    if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    if (!releasePointerData(env, beta_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    if (!releaseNative(env, bufferSize1_native, bufferSize1, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpGEMM_1computeNative
+    (JNIEnv *env, jclass cls, jobject handle, jint opA, jint opB, jobject alpha, jobject matA, jobject matB, jobject beta,
+     jobject matC, jint computeType, jint alg, jobject spgemmDescr, jlongArray bufferSize2, jobject externalBuffer2) {
+
+     // Validate: all jobject parameters must be non-null
+     CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseSpGEMM_compute");
+     CUJAVA_REQUIRE_NONNULL(env, alpha, "alpha", "cusparseSpGEMM_compute");
+     CUJAVA_REQUIRE_NONNULL(env, matA, "matA", "cusparseSpGEMM_compute");
+     CUJAVA_REQUIRE_NONNULL(env, matB, "matB", "cusparseSpGEMM_compute");
+     CUJAVA_REQUIRE_NONNULL(env, beta, "beta", "cusparseSpGEMM_compute");
+     CUJAVA_REQUIRE_NONNULL(env, matC, "matC", "cusparseSpGEMM_compute");
+     CUJAVA_REQUIRE_NONNULL(env, spgemmDescr, "spgemmDescr", "cusparseSpGEMM_compute");
+     CUJAVA_REQUIRE_NONNULL(env, bufferSize2, "bufferSize2", "cusparseSpGEMM_compute");
+
+    Logger::log(LOG_TRACE, "Executing cusparseSpGEMM_compute(handle=%p, opA=%d, opB=%d, alpha=%p, matA=%p, matB=%p, beta=%p, matC=%p, computeType=%d, alg=%d, spgemmDescr=%p, bufferSize2=%p, externalBuffer2=%p)\n",
+        handle, opA, opB, alpha, matA, matB, beta, matC, computeType, alg, spgemmDescr, bufferSize2, externalBuffer2);
+
+    // Declare native variables
+    cusparseHandle_t handle_native;
+    cusparseOperation_t opA_native;
+    cusparseOperation_t opB_native;
+    void * alpha_native = nullptr;
+    cusparseConstSpMatDescr_t matA_native;
+    cusparseConstSpMatDescr_t matB_native;
+    void * beta_native = nullptr;
+    cusparseSpMatDescr_t matC_native;
+    cudaDataType computeType_native;
+    cusparseSpGEMMAlg_t alg_native;
+    cusparseSpGEMMDescr_t spgemmDescr_native;
+    size_t * bufferSize2_native = nullptr;
+    void * externalBuffer2_native = nullptr;
+
+    // Copy Java inputs into native locals
+    handle_native = (cusparseHandle_t)getNativePointerValue(env, handle);
+    opA_native = (cusparseOperation_t)opA;
+    opB_native = (cusparseOperation_t)opB;
+    PointerData *alpha_pointerData = initPointerData(env, alpha);
+    if (alpha_pointerData == nullptr) {
+        return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    }
+    alpha_native = (void *)alpha_pointerData->getPointer(env);
+    matA_native = (cusparseConstSpMatDescr_t)getNativePointerValue(env, matA);
+    matB_native = (cusparseConstSpMatDescr_t)getNativePointerValue(env, matB);
+    PointerData *beta_pointerData = initPointerData(env, beta);
+    if (beta_pointerData == nullptr) {
+        return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    }
+    beta_native = (void *)beta_pointerData->getPointer(env);
+    matC_native = (cusparseSpMatDescr_t)getNativePointerValue(env, matC);
+    computeType_native = (cudaDataType)computeType;
+    alg_native = (cusparseSpGEMMAlg_t)alg;
+    spgemmDescr_native = (cusparseSpGEMMDescr_t)getNativePointerValue(env, spgemmDescr);
+    if (!initNative(env, bufferSize2, bufferSize2_native, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    externalBuffer2_native = (void *)getPointer(env, externalBuffer2);
+
+    cusparseStatus_t jniResult_native = cusparseSpGEMM_compute(handle_native, opA_native, opB_native, alpha_native, matA_native,
+        matB_native, beta_native, matC_native, computeType_native, alg_native, spgemmDescr_native, bufferSize2_native, externalBuffer2_native);
+
+    if (!releasePointerData(env, alpha_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    if (!releasePointerData(env, beta_pointerData, JNI_ABORT)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    if (!releaseNative(env, bufferSize2_native, bufferSize2, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpMatGetSizeNative
+    (JNIEnv *env, jclass cls, jobject spMatDescr, jlongArray rows, jlongArray cols, jlongArray nnz) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, spMatDescr, "spMatDescr", "cusparseSpMatGetSize");
+    CUJAVA_REQUIRE_NONNULL(env, rows, "rows", "cusparseSpMatGetSize");
+    CUJAVA_REQUIRE_NONNULL(env, cols, "cols", "cusparseSpMatGetSize");
+    CUJAVA_REQUIRE_NONNULL(env, nnz, "nnz", "cusparseSpMatGetSize");
+
+    Logger::log(LOG_TRACE, "Executing cusparseSpMatGetSize(spMatDescr=%p, rows=%p, cols=%p, nnz=%p)\n", spMatDescr, rows, cols, nnz);
+
+    // Declare native variables
+    cusparseConstSpMatDescr_t spMatDescr_native;
+    int64_t rows_native;
+    int64_t cols_native;
+    int64_t nnz_native;
+
+    // Copy Java inputs into native locals
+    spMatDescr_native = (cusparseConstSpMatDescr_t)getNativePointerValue(env, spMatDescr);
+
+    cusparseStatus_t jniResult_native = cusparseSpMatGetSize(spMatDescr_native, &rows_native, &cols_native, &nnz_native);
+
+    if (!set(env, rows, 0, (jlong)rows_native)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    if (!set(env, cols, 0, (jlong)cols_native)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+    if (!set(env, nnz, 0, (jlong)nnz_native)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+
+    // Return the result
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseXcsrsortNative
+(JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jint nnz, jobject descrA, jobject csrRowPtrA, jobject csrColIndA, jobject P, jobject pBuffer) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseXcsrsort");
+    CUJAVA_REQUIRE_NONNULL(env, descrA, "descrA", "cusparseXcsrsort");
+    CUJAVA_REQUIRE_NONNULL(env, csrRowPtrA, "csrRowPtrA", "cusparseXcsrsort");
+    CUJAVA_REQUIRE_NONNULL(env, csrColIndA, "csrColIndA", "cusparseXcsrsort");
+    CUJAVA_REQUIRE_NONNULL(env, P, "P", "cusparseXcsrsort");
+    CUJAVA_REQUIRE_NONNULL(env, pBuffer, "pBuffer", "cusparseXcsrsort");
+
+    Logger::log(LOG_TRACE, "Executing cusparseXcsrsort(handle=%p, m=%d, n=%d, nnz=%d, descrA=%p, csrRowPtrA=%p, csrColIndA=%p, P=%p, pBuffer=%p)\n",
+        handle, m, n, nnz, descrA, csrRowPtrA, csrColIndA, P, pBuffer);
+
+    // Declare native variables
+    cusparseHandle_t handle_native;
+    int m_native = 0;
+    int n_native = 0;
+    int nnz_native = 0;
+    cusparseMatDescr_t descrA_native;
+    int * csrRowPtrA_native = nullptr;
+    int * csrColIndA_native = nullptr;
+    int * P_native = nullptr;
+    void * pBuffer_native = nullptr;
+
+    // Copy Java inputs into native locals
+    handle_native = (cusparseHandle_t)getNativePointerValue(env, handle);
+    m_native = (int)m;
+    n_native = (int)n;
+    nnz_native = (int)nnz;
+    descrA_native = (cusparseMatDescr_t)getNativePointerValue(env, descrA);
+    csrRowPtrA_native = (int *)getPointer(env, csrRowPtrA);
+    csrColIndA_native = (int *)getPointer(env, csrColIndA);
+    P_native = (int *)getPointer(env, P);
+    pBuffer_native = (void *)getPointer(env, pBuffer);
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseXcsrsort(handle_native, m_native, n_native, nnz_native, descrA_native,
+        csrRowPtrA_native, csrColIndA_native, P_native, pBuffer_native);
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseXcsrsort_1bufferSizeExtNative
+(JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jint nnz, jobject csrRowPtrA, jobject csrColIndA, jlongArray pBufferSizeInBytes) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseXcsrsort_bufferSizeExt");
+    CUJAVA_REQUIRE_NONNULL(env, csrRowPtrA, "csrRowPtrA", "cusparseXcsrsort_bufferSizeExt");
+    CUJAVA_REQUIRE_NONNULL(env, csrColIndA, "csrColIndA", "cusparseXcsrsort_bufferSizeExt");
+    CUJAVA_REQUIRE_NONNULL(env, pBufferSizeInBytes, "pBufferSizeInBytes", "cusparseXcsrsort_bufferSizeExt");
+
+    Logger::log(LOG_TRACE, "Executing cusparseXcsrsort_bufferSizeExt(handle=%p, m=%d, n=%d, nnz=%d, csrRowPtrA=%p, csrColIndA=%p, pBufferSizeInBytes=%p)\n",
+        handle, m, n, nnz, csrRowPtrA, csrColIndA, pBufferSizeInBytes);
+
+    // Declare native variables
+    cusparseHandle_t handle_native;
+    int m_native = 0;
+    int n_native = 0;
+    int nnz_native = 0;
+    int * csrRowPtrA_native = nullptr;
+    int * csrColIndA_native = nullptr;
+    size_t * pBufferSizeInBytes_native = nullptr;
+
+    // Copy Java inputs into native locals
+    handle_native = (cusparseHandle_t)getNativePointerValue(env, handle);
+    m_native = (int)m;
+    n_native = (int)n;
+    nnz_native = (int)nnz;
+    csrRowPtrA_native = (int *)getPointer(env, csrRowPtrA);
+    csrColIndA_native = (int *)getPointer(env, csrColIndA);
+    if (!initNative(env, pBufferSizeInBytes, pBufferSizeInBytes_native, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseXcsrsort_bufferSizeExt(handle_native, m_native, n_native, nnz_native,
+        csrRowPtrA_native, csrColIndA_native, pBufferSizeInBytes_native);
+
+    if (!releaseNative(env, pBufferSizeInBytes_native, pBufferSizeInBytes, true)) return CUJAVA_CUSPARSE_INTERNAL_ERROR;
+
+    // Return the result
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCreateNative(JNIEnv *env, jclass cls, jobject handle) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseCreate");
+
+    Logger::log(LOG_TRACE, "Executing cusparseCreate(handle=%p)\n", handle);
+
+    // Declare native variables
+    cusparseHandle_t handle_native;
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseCreate(&handle_native);
+    setNativePointerValue(env, handle, (jlong)handle_native);
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCreateIdentityPermutationNative
+    (JNIEnv *env, jclass cls, jobject handle, jint n, jobject p) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, handle, "handle", "cusparseCreateIdentityPermutation");
+    CUJAVA_REQUIRE_NONNULL(env, p, "p", "cusparseCreateIdentityPermutation");
+
+    Logger::log(LOG_TRACE, "Executing cusparseCreateIdentityPermutation(handle=%p, n=%d, p=%p)\n", handle, n, p);
+
+    // Declare native variables
+    cusparseHandle_t handle_native;
+    int n_native = 0;
+    int * p_native = nullptr;
+
+    // Copy Java inputs into native locals
+    handle_native = (cusparseHandle_t)getNativePointerValue(env, handle);
+    n_native = (int)n;
+    p_native = (int *)getPointer(env, p);
+
+    // Cusparse API call
+    cusparseStatus_t jniResult_native = cusparseCreateIdentityPermutation(handle_native, n_native, p_native);
+
+    jint jniResult = (jint)jniResult_native;
+    return jniResult;
+}
diff --git a/src/main/cpp/jni/cusparse/cujava_cusparse.hpp b/src/main/cpp/jni/cusparse/cujava_cusparse.hpp
new file mode 100644
index 00000000000..5c3d1ee2c20
--- /dev/null
+++ b/src/main/cpp/jni/cusparse/cujava_cusparse.hpp
@@ -0,0 +1,226 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+#include <jni.h>
+
+#ifndef _Included_org_apache_sysds_cujava_cusparse_CuJavaCusparse
+#define _Included_org_apache_sysds_cujava_cusparse_CuJavaCusparse
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Class:  org.apache.sysds.cujava.cusparse.CuJavaCusparse
+ * Methods:
+ *  - cusparseSpGEMM_copyNative
+ *  - cusparseGetMatIndexBase
+ *  - cusparseCreateCsr
+ *  - cusparseCreateDnVec
+ *  - cusparseSpMV_bufferSize
+ *  - cusparseSpMV
+ *  - cusparseDestroy
+ *  - cusparseDestroyDnVec
+ *  - cusparseDestroyDnMat
+ *  - cusparseDestroySpMat
+ *  - cusparseSpMM
+ *  - cusparseSpMM_bufferSize
+ *  - cusparseCreateDnMat
+ *  - cusparseCsrSetPointers
+ *  - cusparseCsr2cscEx2
+ *  - cusparseCsr2cscEx2_bufferSize
+ *  - cusparseDcsrgeam2
+ *  - cusparseDcsrgeam2_bufferSizeEx
+ *  - cusparseSparseToDense
+ *  - cusparseSparseToDense_bufferSize
+ *  - cusparseDenseToSparse_bufferSize
+ *  - cusparseDenseToSparse_analysis
+ *  - cusparseDenseToSparse_convert
+ *  - cusparseDnnz
+ *  - cusparseSetMatType
+ *  - cusparseSetMatIndexBase
+ *  - cusparseSetPointerMode
+ *  - cusparseXcsrgeam2Nnz
+ *  - cusparseSpGEMM_workEstimation
+ *  - cusparseSpGEMM_compute
+ *  - cusparseSpMatGetSize
+ *  - cusparseXcsrsort
+ *  - cusparseXcsrsort_bufferSizeExt
+ *  - cusparseCreate
+ *  - cusparseCreateIdentityPermutation
+ */
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpGEMM_1copyNative
+    (JNIEnv *env, jclass cls, jobject handle, jint opA, jint opB,
+     jobject alpha, jobject matA, jobject matB, jobject beta, jobject matC,
+     jint computeType, jint alg, jobject spgemmDescr);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseGetMatIndexBaseNative
+  (JNIEnv *env, jclass cls, jobject descrA);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCreateCsrNative
+    (JNIEnv *env, jclass cls, jobject spMatDescr, jlong rows, jlong cols, jlong nnz, jobject csrRowOffsets,
+     jobject csrColInd, jobject csrValues, jint csrRowOffsetsType, jint csrColIndType, jint idxBase, jint valueType);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCreateDnVecNative
+    (JNIEnv *env, jclass cls, jobject dnVecDescr, jlong size, jobject values, jint valueType);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpMV_1bufferSizeNative
+  (JNIEnv *env, jclass cls, jobject handle, jint opA, jobject alpha, jobject matA, jobject vecX, jobject beta,
+   jobject vecY, jint computeType, jint alg, jlongArray bufferSize);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpMVNative
+    (JNIEnv *env, jclass cls, jobject handle, jint opA, jobject alpha, jobject matA, jobject vecX, jobject beta,
+     jobject vecY, jint computeType, jint alg, jobject externalBuffer);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDestroyNative
+    (JNIEnv *env, jclass cls, jobject handle);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDestroyDnVecNative
+    (JNIEnv *env, jclass cls, jobject dnVecDescr);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDestroyDnMatNative
+    (JNIEnv *env, jclass cls, jobject dnMatDescr);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDestroySpMatNative
+    (JNIEnv *env, jclass cls, jobject spMatDescr);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpMMNative
+    (JNIEnv *env, jclass cls, jobject handle, jint opA, jint opB, jobject alpha, jobject matA, jobject matB, jobject beta,
+     jobject matC, jint computeType, jint alg, jobject externalBuffer);
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpMM_1bufferSizeNative
+    (JNIEnv *env, jclass cls, jobject handle, jint opA, jint opB, jobject alpha, jobject matA, jobject matB, jobject beta,
+     jobject matC, jint computeType, jint alg, jlongArray bufferSize);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCreateDnMatNative
+    (JNIEnv *env, jclass cls, jobject dnMatDescr, jlong rows, jlong cols, jlong ld, jobject values, jint valueType, jint order);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCsrSetPointersNative
+    (JNIEnv *env, jclass cls, jobject spMatDescr, jobject csrRowOffsets, jobject csrColInd, jobject csrValues);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCsr2cscEx2Native
+    (JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jint nnz, jobject csrVal, jobject csrRowPtr,
+     jobject csrColInd, jobject cscVal, jobject cscColPtr, jobject cscRowInd, jint valType, jint copyValues, jint idxBase, jint alg, jobject buffer);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCsr2cscEx2_1bufferSizeNative
+    (JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jint nnz, jobject csrVal, jobject csrRowPtr, jobject csrColInd,
+     jobject cscVal, jobject cscColPtr, jobject cscRowInd, jint valType, jint copyValues, jint idxBase, jint alg, jlongArray bufferSize);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDcsrgeam2Native
+    (JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jobject alpha, jobject descrA, jint nnzA, jobject csrSortedValA,
+     jobject csrSortedRowPtrA, jobject csrSortedColIndA, jobject beta, jobject descrB, jint nnzB, jobject csrSortedValB,
+     jobject csrSortedRowPtrB, jobject csrSortedColIndB, jobject descrC, jobject csrSortedValC, jobject csrSortedRowPtrC, jobject csrSortedColIndC, jobject pBuffer);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDcsrgeam2_1bufferSizeExtNative
+    (JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jobject alpha, jobject descrA, jint nnzA, jobject csrSortedValA,
+     jobject csrSortedRowPtrA, jobject csrSortedColIndA, jobject beta, jobject descrB, jint nnzB, jobject csrSortedValB, jobject csrSortedRowPtrB,
+     jobject csrSortedColIndB, jobject descrC, jobject csrSortedValC, jobject csrSortedRowPtrC, jobject csrSortedColIndC, jlongArray pBufferSizeInBytes);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSparseToDenseNative
+    (JNIEnv *env, jclass cls, jobject handle, jobject matA, jobject matB, jint alg, jobject externalBuffer);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSparseToDense_1bufferSizeNative
+    (JNIEnv *env, jclass cls, jobject handle, jobject matA, jobject matB, jint alg, jlongArray bufferSize);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDenseToSparse_1bufferSizeNative
+    (JNIEnv *env, jclass cls, jobject handle, jobject matA, jobject matB, jint alg, jlongArray bufferSize);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDenseToSparse_1analysisNative
+    (JNIEnv *env, jclass cls, jobject handle, jobject matA, jobject matB, jint alg, jobject externalBuffer);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDenseToSparse_1convertNative
+    (JNIEnv *env, jclass cls, jobject handle, jobject matA, jobject matB, jint alg, jobject externalBuffer);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseDnnzNative
+    (JNIEnv *env, jclass cls, jobject handle, jint dirA, jint m, jint n, jobject descrA, jobject A, jint lda, jobject nnzPerRowCol, jobject nnzTotalDevHostPtr);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSetMatTypeNative
+    (JNIEnv *env, jclass cls, jobject descrA, jint type);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSetMatIndexBaseNative
+    (JNIEnv *env, jclass cls, jobject descrA, jint base);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSetPointerModeNative
+    (JNIEnv *env, jclass cls, jobject handle, jint mode);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseXcsrgeam2NnzNative
+    (JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jobject descrA, jint nnzA, jobject csrSortedRowPtrA, jobject csrSortedColIndA,
+     jobject descrB, jint nnzB, jobject csrSortedRowPtrB, jobject csrSortedColIndB, jobject descrC, jobject csrSortedRowPtrC, jobject nnzTotalDevHostPtr, jobject workspace);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpGEMM_1workEstimationNative
+    (JNIEnv *env, jclass cls, jobject handle, jint opA, jint opB, jobject alpha, jobject matA, jobject matB, jobject beta,
+     jobject matC, jint computeType, jint alg, jobject spgemmDescr, jlongArray bufferSize1, jobject externalBuffer1);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpGEMM_1computeNative
+    (JNIEnv *env, jclass cls, jobject handle, jint opA, jint opB, jobject alpha, jobject matA, jobject matB, jobject beta,
+     jobject matC, jint computeType, jint alg, jobject spgemmDescr, jlongArray bufferSize2, jobject externalBuffer2);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseSpMatGetSizeNative
+    (JNIEnv *env, jclass cls, jobject spMatDescr, jlongArray rows, jlongArray cols, jlongArray nnz);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseXcsrsortNative
+(JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jint nnz, jobject descrA, jobject csrRowPtrA, jobject csrColIndA, jobject P, jobject pBuffer);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseXcsrsort_1bufferSizeExtNative
+(JNIEnv *env, jclass cls, jobject handle, jint m, jint n, jint nnz, jobject csrRowPtrA, jobject csrColIndA, jlongArray pBufferSizeInBytes);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCreateNative(JNIEnv *env, jclass cls, jobject handle);
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_cusparse_CuJavaCusparse_cusparseCreateIdentityPermutationNative
+    (JNIEnv *env, jclass cls, jobject handle, jint n, jobject p);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/src/main/cpp/jni/cusparse/cujava_cusparse_common.hpp b/src/main/cpp/jni/cusparse/cujava_cusparse_common.hpp
new file mode 100644
index 00000000000..85ecfd9346c
--- /dev/null
+++ b/src/main/cpp/jni/cusparse/cujava_cusparse_common.hpp
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+#ifndef CUJAVA_CUSPARSE_COMMON_HPP
+#define CUJAVA_CUSPARSE_COMMON_HPP
+
+#include <jni.h>
+#include <cstdint>
+#include <cusparse.h>
+#include <cuda_runtime.h>
+
+#include "../common/cujava_logger.hpp"
+#include "../common/cujava_jni_utils.hpp"
+#include "../common/cujava_pointer_utils.hpp"
+
+#define CUJAVA_CUSPARSE_INTERNAL_ERROR (-1)
+
+
+
+#endif // CUJAVA_CUSPARSE_COMMON_HPP
diff --git a/src/main/cpp/jni/driver/CMakeLists.txt b/src/main/cpp/jni/driver/CMakeLists.txt
new file mode 100644
index 00000000000..9825ade2736
--- /dev/null
+++ b/src/main/cpp/jni/driver/CMakeLists.txt
@@ -0,0 +1,54 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+cmake_minimum_required(VERSION 3.18)
+
+project(CuJavaDriver LANGUAGES CXX)
+
+find_package(JNI REQUIRED)
+find_package(CUDAToolkit REQUIRED)  # for CUDA::cuda_driver
+
+add_library(CuJavaDriver SHARED
+    cujava_driver.cpp
+)
+
+set_target_properties(CuJavaDriver PROPERTIES
+    CXX_STANDARD 11
+    OUTPUT_NAME cujava_driver                         # -> libcujava_driver.so
+    LIBRARY_OUTPUT_DIRECTORY  ${CMAKE_CURRENT_SOURCE_DIR}/../../lib
+    RUNTIME_OUTPUT_DIRECTORY  ${CMAKE_CURRENT_SOURCE_DIR}/../../lib
+    ARCHIVE_OUTPUT_DIRECTORY  ${CMAKE_CURRENT_SOURCE_DIR}/../../lib
+)
+
+target_include_directories(CuJavaDriver
+    PRIVATE
+        ${JNI_INCLUDE_DIRS}
+        ${CUDAToolkit_INCLUDE_DIRS}
+        ${CMAKE_CURRENT_SOURCE_DIR}            # headers in driver/
+        ${CMAKE_CURRENT_SOURCE_DIR}/../common  # if including common headers
+)
+
+target_link_libraries(CuJavaRuntime
+    PRIVATE
+        CuJavaCommonJNI
+        CUDA::cuda_driver            # driver API (libcuda)
+        ${JNI_LIBRARIES}
+)
diff --git a/src/main/cpp/jni/driver/cujava_driver.cpp b/src/main/cpp/jni/driver/cujava_driver.cpp
new file mode 100644
index 00000000000..2d3d9e9afb0
--- /dev/null
+++ b/src/main/cpp/jni/driver/cujava_driver.cpp
@@ -0,0 +1,340 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "cujava_driver.hpp"
+#include "cujava_driver_common.hpp"
+
+#define CUJAVA_REQUIRE_NONNULL(env, obj, name, method)                           \
+    do {                                                                          \
+        if ((obj) == nullptr) {                                                   \
+            ThrowByName((env), "java/lang/NullPointerException",                  \
+                        "Parameter '" name "' is null for " method);              \
+            return CUJAVA_INTERNAL_ERROR;                                         \
+        }                                                                         \
+    } while (0)
+
+
+
+JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *jvm, void *reserved) {
+    JNIEnv *env = nullptr;
+    if (jvm->GetEnv((void **)&env, JNI_VERSION_1_4)) {
+        return JNI_ERR;
+    }
+
+    // Only what we need so far
+    if (initJNIUtils(env) == JNI_ERR)      return JNI_ERR;
+    if (initPointerUtils(env) == JNI_ERR)  return JNI_ERR;
+
+    return JNI_VERSION_1_4;
+}
+
+
+
+JNIEXPORT void JNICALL JNI_OnUnload(JavaVM *vm, void *reserved) {
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuCtxCreateNative
+  (JNIEnv *env, jclass cls, jobject pctx, jint flags, jobject dev) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, pctx, "pctx", "cuCtxCreate");
+    CUJAVA_REQUIRE_NONNULL(env, dev, "dev", "cuCtxCreate");
+
+    Logger::log(LOG_TRACE, "Executing cuCtxCreate\n");
+
+    CUdevice nativeDev = (CUdevice)(intptr_t)getNativePointerValue(env, dev);
+    CUcontext nativePctx;
+    int result = cuCtxCreate(&nativePctx, (int)flags, nativeDev);
+    setNativePointerValue(env, pctx, (jlong)nativePctx);
+
+    return result;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuDeviceGetNative
+  (JNIEnv *env, jclass cls, jobject device, jint ordinal) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, device, "device", "cuDeviceGet");
+
+    Logger::log(LOG_TRACE, "Executing cuDeviceGet for device %ld\n", ordinal);
+
+    CUdevice nativeDevice;
+    int result = cuDeviceGet(&nativeDevice, ordinal);
+    setNativePointerValue(env, device, (jlong)nativeDevice);
+    return result;
+}
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuDeviceGetCountNative
+  (JNIEnv *env, jclass cls, jintArray count) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, count, "count", "cuDeviceGetCount");
+
+    Logger::log(LOG_TRACE, "Executing cuDeviceGetCount\n");
+
+    int nativeCount = 0;
+    int result = cuDeviceGetCount(&nativeCount);
+    if (!set(env, count, 0, nativeCount)) return CUJAVA_INTERNAL_ERROR;
+    return result;
+}
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuInitNative
+  (JNIEnv *env, jclass cls, jint flags) {
+    Logger::log(LOG_TRACE, "Executing cuInit\n");
+
+    int result = cuInit((unsigned int)flags);
+    return result;
+}
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuLaunchKernelNative
+  (JNIEnv *env, jclass, jobject f, jint gridDimX, jint gridDimY, jint gridDimZ,
+   jint blockDimX, jint blockDimY, jint blockDimZ, jint sharedMemBytes,
+   jobject hStream, jobject kernelParams, jobject extra) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, f, "f", "cuLaunchKernel");
+
+    Logger::log(LOG_TRACE, "Executing cuLaunchKernel\n");
+
+    CUfunction nativeF    = (CUfunction)getNativePointerValue(env, f);
+    CUstream   nativeHStr = (CUstream)  getNativePointerValue(env, hStream);
+
+    PointerData *kernelParamsPD = nullptr;
+    void **nativeKernelParams   = nullptr;
+    if (kernelParams != nullptr) {
+        kernelParamsPD = initPointerData(env, kernelParams);
+        if (kernelParamsPD == nullptr) return CUJAVA_INTERNAL_ERROR;
+        nativeKernelParams = (void**)kernelParamsPD->getPointer(env);
+    }
+
+    PointerData *extraPD = nullptr;
+    void **nativeExtra   = nullptr;
+    if (extra != nullptr) {
+        extraPD = initPointerData(env, extra);
+        if (extraPD == nullptr) return CUJAVA_INTERNAL_ERROR;
+        nativeExtra = (void**)extraPD->getPointer(env);
+    }
+
+    int result = cuLaunchKernel(
+        nativeF,
+        (unsigned int)gridDimX, (unsigned int)gridDimY, (unsigned int)gridDimZ,
+        (unsigned int)blockDimX, (unsigned int)blockDimY, (unsigned int)blockDimZ,
+        (unsigned int)sharedMemBytes,
+        nativeHStr,
+        nativeKernelParams,
+        nativeExtra);
+
+    if (!releasePointerData(env, kernelParamsPD, 0)) return CUJAVA_INTERNAL_ERROR;
+    if (!releasePointerData(env, extraPD,        0)) return CUJAVA_INTERNAL_ERROR;
+
+    return result;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuModuleGetFunctionNative
+  (JNIEnv *env, jclass, jobject hfunc, jobject hmod, jstring name) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, hfunc, "hfunc", "cuModuleGetFunction");
+    CUJAVA_REQUIRE_NONNULL(env, hmod, "hmod", "cuModuleGetFunction");
+    CUJAVA_REQUIRE_NONNULL(env, name, "name", "cuModuleGetFunction");
+
+    Logger::log(LOG_TRACE, "Executing cuModuleGetFunction\n");
+
+    CUmodule   nativeHmod  = (CUmodule)getNativePointerValue(env, hmod);
+    char*      nativeName  = toNativeCString(env, name);
+    if (!nativeName) return CUJAVA_INTERNAL_ERROR;
+
+    CUfunction nativeHfunc = nullptr;
+    int result = cuModuleGetFunction(&nativeHfunc, nativeHmod, nativeName);
+
+    delete[] nativeName;
+    setNativePointerValue(env, hfunc, (jlong)nativeHfunc);
+    return result;
+}
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuModuleLoadDataExNative
+  (JNIEnv *env, jclass, jobject phMod, jobject p, jint numOptions, jintArray options, jobject optionValues) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, phMod, "phMod", "cuModuleLoadDataEx");
+    CUJAVA_REQUIRE_NONNULL(env, p, "p", "cuModuleLoadDataEx");
+    CUJAVA_REQUIRE_NONNULL(env, options, "options", "cuModuleLoadDataEx");
+    CUJAVA_REQUIRE_NONNULL(env, optionValues, "optionValues", "cuModuleLoadDataEx");
+
+    Logger::log(LOG_TRACE, "Executing cuModuleLoadDataEx\n");
+
+    CUjit_option *nativeOptions = nullptr;
+    {
+        jint *opts = env->GetIntArrayElements(options, nullptr);
+        if (opts == nullptr) return CUJAVA_INTERNAL_ERROR;
+
+        nativeOptions = new CUjit_option[(size_t)numOptions];
+        for (int i = 0; i < numOptions; ++i) nativeOptions[i] = (CUjit_option)opts[i];
+
+        env->ReleaseIntArrayElements(options, opts, JNI_ABORT);
+    }
+
+    // Pointers for 'p' (module data) and 'optionValues' (void** for JIT options)
+    CUmodule nativeModule;
+
+    PointerData *pPD = initPointerData(env, p);
+    if (pPD == nullptr) { delete[] nativeOptions; return CUJAVA_INTERNAL_ERROR; }
+
+    PointerData *ovPD = initPointerData(env, optionValues);
+    if (ovPD == nullptr) {
+        releasePointerData(env, pPD, JNI_ABORT);
+        delete[] nativeOptions;
+        return CUJAVA_INTERNAL_ERROR;
+    }
+    void **nativeOptionValues = (void**)ovPD->getPointer(env);
+
+    int result = cuModuleLoadDataEx(
+        &nativeModule,
+        (void*)pPD->getPointer(env),
+        (unsigned int)numOptions,
+        nativeOptions,
+        nativeOptionValues);
+
+    delete[] nativeOptions;
+
+    setNativePointerValue(env, phMod, (jlong)nativeModule);
+
+    // p is input-only → no-commit; optionValues may receive outputs → commit
+    if (!releasePointerData(env, pPD, JNI_ABORT)) return CUJAVA_INTERNAL_ERROR;
+    if (!releasePointerData(env, ovPD, 0))        return CUJAVA_INTERNAL_ERROR;
+
+    return result;
+}
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuMemAllocNative
+  (JNIEnv *env, jclass cls, jobject dptr, jlong bytesize) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, dptr, "dptr", "cuMemAlloc");
+
+    Logger::log(LOG_TRACE, "Executing cuMemAlloc of %ld bytes\n", (long)bytesize);
+
+    CUdeviceptr nativeDptr;
+    int result = cuMemAlloc(&nativeDptr, (size_t)bytesize);
+    setPointer(env, dptr, (jlong)nativeDptr);
+    return result;
+}
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuModuleUnloadNative
+  (JNIEnv *env, jclass cls, jobject hmod) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, hmod, "hmod", "cuModuleUnload");
+
+    Logger::log(LOG_TRACE, "Executing cuModuleUnload\n");
+
+    CUmodule nativeHmod = (CUmodule)getNativePointerValue(env, hmod);
+    int result = cuModuleUnload(nativeHmod);
+    return result;
+}
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuCtxDestroyNative
+  (JNIEnv *env, jclass cls, jobject ctx) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, ctx, "ctx", "cuCtxDestroy");
+
+    Logger::log(LOG_TRACE, "Executing cuCtxDestroy\n");
+
+    CUcontext nativeCtx = (CUcontext)getNativePointerValue(env, ctx);
+    int result = cuCtxDestroy(nativeCtx);
+    return result;
+}
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuMemFreeNative
+  (JNIEnv *env, jclass cls, jobject dptr) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, dptr, "dptr", "cuMemFree");
+
+    Logger::log(LOG_TRACE, "Executing cuMemFree\n");
+
+    CUdeviceptr nativeDptr = (CUdeviceptr)getPointer(env, dptr);
+    int result = cuMemFree(nativeDptr);
+    return result;
+}
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuMemcpyDtoHNative
+  (JNIEnv *env, jclass, jobject dstHost, jobject srcDevice, jlong ByteCount) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, dstHost, "dstHost", "cuMemcpyDtoH");
+    CUJAVA_REQUIRE_NONNULL(env, srcDevice, "srcDevice", "cuMemcpyDtoH");
+
+    Logger::log(LOG_TRACE, "Executing cuMemcpyDtoH of %ld bytes\n", (long)ByteCount);
+
+    PointerData *dstHostPD = initPointerData(env, dstHost);
+    if (dstHostPD == nullptr) return CUJAVA_INTERNAL_ERROR;
+
+    // Correct: CUdeviceptr from CUdeviceptr wrapper
+    CUdeviceptr nativeSrcDevice = (CUdeviceptr)(uintptr_t)getNativePointerValue(env, srcDevice);
+    void *nativeDstHost = dstHostPD->getPointer(env);
+
+    int result = cuMemcpyDtoH(nativeDstHost, nativeSrcDevice, (size_t)ByteCount);
+
+    if (!releasePointerData(env, dstHostPD, 0)) return CUJAVA_INTERNAL_ERROR; // commit host writes
+    return result;
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuCtxSynchronizeNative
+  (JNIEnv *env, jclass cls) {
+    Logger::log(LOG_TRACE, "Executing cuCtxSynchronize\n");
+
+    return cuCtxSynchronize();
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuDeviceGetAttributeNative
+  (JNIEnv *env, jclass cls, jintArray pi, jint CUdevice_attribute_attrib, jobject dev) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, pi, "pi", "cuDeviceGetAttribute");
+    CUJAVA_REQUIRE_NONNULL(env, dev, "dev", "cuDeviceGetAttribute");
+
+    Logger::log(LOG_TRACE, "Executing cuDeviceGetAttribute\n");
+
+    CUdevice nativeDev = (CUdevice)(intptr_t)getNativePointerValue(env, dev);
+    int nativePi = 0;
+    int result = cuDeviceGetAttribute(&nativePi, (CUdevice_attribute)CUdevice_attribute_attrib, nativeDev);
+    if (!set(env, pi, 0, nativePi)) return CUJAVA_INTERNAL_ERROR;
+    return result;
+}
diff --git a/src/main/cpp/jni/driver/cujava_driver.hpp b/src/main/cpp/jni/driver/cujava_driver.hpp
new file mode 100644
index 00000000000..6a99bf669b6
--- /dev/null
+++ b/src/main/cpp/jni/driver/cujava_driver.hpp
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+ #include <jni.h>
+
+ #ifndef _Included_org_apache_sysds_cujava_driver_CuJavaDriver
+ #define _Included_org_apache_sysds_cujava_driver_CuJavaDriver
+ #ifdef __cplusplus
+ extern "C" {
+ #endif
+
+ /*
+  * Class:  org.apache.sysds.cujava.driver.CuJavaDriver
+  * Methods:
+  *  - cudaCtxCreate
+  *  - cuDeviceGet
+  *  - cuDeviceGetCount
+  *  - cuInit
+  *  - cuLaunchKernel
+  *  - cuModuleGetFunction
+  *  - cuModuleLoadDataEx
+  *  - cuMemAlloc
+  *  - cuModuleUnload
+  *  - cuCtxDestroy
+  *  - cuMemFree
+  *  - cuMemcpyDtoH
+  *  - cuCtxSynchronize
+  *  - cuDeviceGetAttribute
+  */
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuCtxCreateNative
+  (JNIEnv *env, jclass cls, jobject pctx, jint flags, jobject dev);
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuDeviceGetNative
+  (JNIEnv *env, jclass cls, jobject device, jint ordinal);
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuDeviceGetCountNative
+  (JNIEnv *env, jclass cls, jintArray count);
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuInitNative
+  (JNIEnv *env, jclass cls, jint Flags);
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuLaunchKernelNative
+  (JNIEnv *env, jclass, jobject f, jint gridDimX, jint gridDimY, jint gridDimZ,
+   jint blockDimX, jint blockDimY, jint blockDimZ, jint sharedMemBytes,
+   jobject hStream, jobject kernelParams, jobject extra);
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuModuleGetFunctionNative
+  (JNIEnv *env, jclass, jobject hfunc, jobject hmod, jstring name);
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuModuleLoadDataExNative
+  (JNIEnv *env, jclass, jobject phMod, jobject p, jint numOptions, jintArray options, jobject optionValues);
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuMemAllocNative
+  (JNIEnv *env, jclass cls, jobject dptr, jlong bytesize);
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuModuleUnloadNative
+  (JNIEnv *env, jclass cls, jobject hmod);
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuCtxDestroyNative
+  (JNIEnv *env, jclass cls, jobject ctx);
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuMemFreeNative
+  (JNIEnv *env, jclass cls, jobject dptr);
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuMemcpyDtoHNative
+  (JNIEnv *env, jclass cls, jobject dstHost, jobject srcDevice, jlong ByteCount);
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuCtxSynchronizeNative
+  (JNIEnv *env, jclass cls);
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_driver_CuJavaDriver_cuDeviceGetAttributeNative
+  (JNIEnv *env, jclass cls, jintArray pi, jint CUdevice_attribute_attrib, jobject dev);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/src/main/cpp/jni/driver/cujava_driver_common.hpp b/src/main/cpp/jni/driver/cujava_driver_common.hpp
new file mode 100644
index 00000000000..c4c219981c6
--- /dev/null
+++ b/src/main/cpp/jni/driver/cujava_driver_common.hpp
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef CUJAVA_DRIVER_COMMON_HPP
+#define CUJAVA_DRIVER_COMMON_HPP
+
+#include <jni.h>
+#include <cstdint>
+#include <cuda.h>
+
+#include "../common/cujava_logger.hpp"
+#include "../common/cujava_jni_utils.hpp"
+#include "../common/cujava_pointer_utils.hpp"
+
+#define CUJAVA_INTERNAL_ERROR 0x80000001
+
+
+
+#endif // CUJAVA_DRIVER_COMMON_HPP
diff --git a/src/main/cpp/jni/runtime/CMakeLists.txt b/src/main/cpp/jni/runtime/CMakeLists.txt
new file mode 100644
index 00000000000..2bb94a356ab
--- /dev/null
+++ b/src/main/cpp/jni/runtime/CMakeLists.txt
@@ -0,0 +1,55 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+cmake_minimum_required(VERSION 3.18)
+
+project(CuJavaRuntime LANGUAGES CXX)
+
+find_package(JNI REQUIRED)
+find_package(CUDAToolkit REQUIRED)  # for CUDA::cudart
+
+add_library(CuJavaRuntime SHARED
+    cujava_runtime.cpp
+)
+
+set_target_properties(CuJavaRuntime PROPERTIES
+    CXX_STANDARD 11
+    OUTPUT_NAME cujava_runtime                         # -> libcujava_runtime.so
+    LIBRARY_OUTPUT_DIRECTORY  ${CMAKE_CURRENT_SOURCE_DIR}/../../lib
+    RUNTIME_OUTPUT_DIRECTORY  ${CMAKE_CURRENT_SOURCE_DIR}/../../lib
+    ARCHIVE_OUTPUT_DIRECTORY  ${CMAKE_CURRENT_SOURCE_DIR}/../../lib
+)
+
+target_include_directories(CuJavaRuntime
+    PRIVATE
+        ${JNI_INCLUDE_DIRS}
+        ${CUDAToolkit_INCLUDE_DIRS}
+        ${CMAKE_CURRENT_SOURCE_DIR}            # headers in runtime/
+        ${CMAKE_CURRENT_SOURCE_DIR}/../common  # if including common headers
+)
+
+target_link_libraries(CuJavaRuntime
+    PRIVATE
+        CuJavaCommonJNI
+        CUDA::cudart
+        ${JNI_LIBRARIES}
+)
+
diff --git a/src/main/cpp/jni/runtime/cujava_runtime.cpp b/src/main/cpp/jni/runtime/cujava_runtime.cpp
new file mode 100644
index 00000000000..cbf14bf0bdd
--- /dev/null
+++ b/src/main/cpp/jni/runtime/cujava_runtime.cpp
@@ -0,0 +1,572 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+#include "cujava_runtime.hpp"
+#include "cujava_runtime_common.hpp"
+
+#define CUJAVA_REQUIRE_NONNULL(env, obj, name, method)                           \
+    do {                                                                          \
+        if ((obj) == nullptr) {                                                   \
+            ThrowByName((env), "java/lang/NullPointerException",                  \
+                        "Parameter '" name "' is null for " method);              \
+            return CUJAVA_INTERNAL_ERROR;                                         \
+        }                                                                         \
+    } while (0)
+
+// ---- cudaDeviceProp jfieldIDs ----
+static jclass  cudaDeviceProp_class = nullptr;
+
+#define F(name) static jfieldID name = nullptr;
+F(cudaDeviceProp_accessPolicyMaxWindowSize)
+F(cudaDeviceProp_asyncEngineCount)
+F(cudaDeviceProp_canMapHostMemory)
+F(cudaDeviceProp_canUseHostPointerForRegisteredMem)
+F(cudaDeviceProp_clockRate)
+F(cudaDeviceProp_clusterLaunch)
+F(cudaDeviceProp_computeMode)
+F(cudaDeviceProp_computePreemptionSupported)
+F(cudaDeviceProp_concurrentKernels)
+F(cudaDeviceProp_concurrentManagedAccess)
+F(cudaDeviceProp_cooperativeLaunch)
+F(cudaDeviceProp_cooperativeMultiDeviceLaunch)
+F(cudaDeviceProp_deferredMappingCudaArraySupported)
+F(cudaDeviceProp_deviceOverlap)
+F(cudaDeviceProp_directManagedMemAccessFromHost)
+F(cudaDeviceProp_ECCEnabled)
+F(cudaDeviceProp_globalL1CacheSupported)
+F(cudaDeviceProp_gpuDirectRDMAFlushWritesOptions)
+F(cudaDeviceProp_gpuDirectRDMASupported)
+F(cudaDeviceProp_gpuDirectRDMAWritesOrdering)
+F(cudaDeviceProp_hostNativeAtomicSupported)
+F(cudaDeviceProp_hostRegisterReadOnlySupported)
+F(cudaDeviceProp_hostRegisterSupported)
+F(cudaDeviceProp_integrated)
+F(cudaDeviceProp_ipcEventSupported)
+F(cudaDeviceProp_isMultiGpuBoard)
+F(cudaDeviceProp_kernelExecTimeoutEnabled)
+F(cudaDeviceProp_l2CacheSize)
+F(cudaDeviceProp_localL1CacheSupported)
+F(cudaDeviceProp_luid)
+F(cudaDeviceProp_luidDeviceNodeMask)
+F(cudaDeviceProp_major)
+F(cudaDeviceProp_managedMemory)
+F(cudaDeviceProp_maxBlocksPerMultiProcessor)
+F(cudaDeviceProp_maxGridSize)
+F(cudaDeviceProp_maxSurface1D)
+F(cudaDeviceProp_maxSurface1DLayered)
+F(cudaDeviceProp_maxSurface2D)
+F(cudaDeviceProp_maxSurface2DLayered)
+F(cudaDeviceProp_maxSurface3D)
+F(cudaDeviceProp_maxSurfaceCubemap)
+F(cudaDeviceProp_maxSurfaceCubemapLayered)
+F(cudaDeviceProp_maxTexture1D)
+F(cudaDeviceProp_maxTexture1DLayered)
+F(cudaDeviceProp_maxTexture1DLinear)
+F(cudaDeviceProp_maxTexture1DMipmap)
+F(cudaDeviceProp_maxTexture2D)
+F(cudaDeviceProp_maxTexture2DGather)
+F(cudaDeviceProp_maxTexture2DLayered)
+F(cudaDeviceProp_maxTexture2DLinear)
+F(cudaDeviceProp_maxTexture2DMipmap)
+F(cudaDeviceProp_maxTexture3D)
+F(cudaDeviceProp_maxTexture3DAlt)
+F(cudaDeviceProp_maxTextureCubemap)
+F(cudaDeviceProp_maxTextureCubemapLayered)
+F(cudaDeviceProp_maxThreadsDim)
+F(cudaDeviceProp_maxThreadsPerBlock)
+F(cudaDeviceProp_maxThreadsPerMultiProcessor)
+F(cudaDeviceProp_memoryBusWidth)
+F(cudaDeviceProp_memoryClockRate)
+F(cudaDeviceProp_memoryPoolsSupported)
+F(cudaDeviceProp_memoryPoolSupportedHandleTypes)
+F(cudaDeviceProp_memPitch)
+F(cudaDeviceProp_minor)
+F(cudaDeviceProp_multiGpuBoardGroupID)
+F(cudaDeviceProp_multiProcessorCount)
+F(cudaDeviceProp_name)
+F(cudaDeviceProp_pageableMemoryAccess)
+F(cudaDeviceProp_pageableMemoryAccessUsesHostPageTables)
+F(cudaDeviceProp_pciBusID)
+F(cudaDeviceProp_pciDeviceID)
+F(cudaDeviceProp_pciDomainID)
+F(cudaDeviceProp_persistingL2CacheMaxSize)
+F(cudaDeviceProp_regsPerBlock)
+F(cudaDeviceProp_regsPerMultiprocessor)
+F(cudaDeviceProp_reserved)
+F(cudaDeviceProp_reservedSharedMemPerBlock)
+F(cudaDeviceProp_sharedMemPerBlock)
+F(cudaDeviceProp_sharedMemPerBlockOptin)
+F(cudaDeviceProp_sharedMemPerMultiprocessor)
+F(cudaDeviceProp_singleToDoublePrecisionPerfRatio)
+F(cudaDeviceProp_sparseCudaArraySupported)
+F(cudaDeviceProp_streamPrioritiesSupported)
+F(cudaDeviceProp_surfaceAlignment)
+F(cudaDeviceProp_tccDriver)
+F(cudaDeviceProp_textureAlignment)
+F(cudaDeviceProp_texturePitchAlignment)
+F(cudaDeviceProp_timelineSemaphoreInteropSupported)
+F(cudaDeviceProp_totalConstMem)
+F(cudaDeviceProp_totalGlobalMem)
+F(cudaDeviceProp_unifiedAddressing)
+F(cudaDeviceProp_unifiedFunctionPointers)
+F(cudaDeviceProp_warpSize)
+#undef F
+
+
+
+JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *jvm, void *reserved) {
+    JNIEnv *env = nullptr;
+    if (jvm->GetEnv((void **)&env, JNI_VERSION_1_4)) {
+        return JNI_ERR;
+    }
+
+    // Only what we need so far
+    if (initJNIUtils(env) == JNI_ERR)      return JNI_ERR;
+    if (initPointerUtils(env) == JNI_ERR)  return JNI_ERR;
+
+    // ---- cache all fields of org.apache.sysds.cujava.runtime.cudaDeviceProp ----
+    {
+        jclass cls = nullptr;
+        if (!init(env, cls, "org/apache/sysds/cujava/runtime/CudaDeviceProp")) return JNI_ERR;
+        cudaDeviceProp_class = (jclass)env->NewGlobalRef(cls);
+        if (!cudaDeviceProp_class) return JNI_ERR;
+
+        struct Spec { const char* name; const char* sig; jfieldID* out; } specs[] = {
+            {"accessPolicyMaxWindowSize","I",&cudaDeviceProp_accessPolicyMaxWindowSize},
+            {"asyncEngineCount","I",&cudaDeviceProp_asyncEngineCount},
+            {"canMapHostMemory","I",&cudaDeviceProp_canMapHostMemory},
+            {"canUseHostPointerForRegisteredMem","I",&cudaDeviceProp_canUseHostPointerForRegisteredMem},
+            {"clockRate","I",&cudaDeviceProp_clockRate},
+            {"clusterLaunch","I",&cudaDeviceProp_clusterLaunch},
+            {"computeMode","I",&cudaDeviceProp_computeMode},
+            {"computePreemptionSupported","I",&cudaDeviceProp_computePreemptionSupported},
+            {"concurrentKernels","I",&cudaDeviceProp_concurrentKernels},
+            {"concurrentManagedAccess","I",&cudaDeviceProp_concurrentManagedAccess},
+            {"cooperativeLaunch","I",&cudaDeviceProp_cooperativeLaunch},
+            {"cooperativeMultiDeviceLaunch","I",&cudaDeviceProp_cooperativeMultiDeviceLaunch},
+            {"deferredMappingCudaArraySupported","I",&cudaDeviceProp_deferredMappingCudaArraySupported},
+            {"deviceOverlap","I",&cudaDeviceProp_deviceOverlap},
+            {"directManagedMemAccessFromHost","I",&cudaDeviceProp_directManagedMemAccessFromHost},
+            {"ECCEnabled","I",&cudaDeviceProp_ECCEnabled},
+            {"globalL1CacheSupported","I",&cudaDeviceProp_globalL1CacheSupported},
+            {"gpuDirectRDMAFlushWritesOptions","I",&cudaDeviceProp_gpuDirectRDMAFlushWritesOptions},
+            {"gpuDirectRDMASupported","I",&cudaDeviceProp_gpuDirectRDMASupported},
+            {"gpuDirectRDMAWritesOrdering","I",&cudaDeviceProp_gpuDirectRDMAWritesOrdering},
+            {"hostNativeAtomicSupported","I",&cudaDeviceProp_hostNativeAtomicSupported},
+            {"hostRegisterReadOnlySupported","I",&cudaDeviceProp_hostRegisterReadOnlySupported},
+            {"hostRegisterSupported","I",&cudaDeviceProp_hostRegisterSupported},
+            {"integrated","I",&cudaDeviceProp_integrated},
+            {"ipcEventSupported","I",&cudaDeviceProp_ipcEventSupported},
+            {"isMultiGpuBoard","I",&cudaDeviceProp_isMultiGpuBoard},
+            {"kernelExecTimeoutEnabled","I",&cudaDeviceProp_kernelExecTimeoutEnabled},
+            {"l2CacheSize","I",&cudaDeviceProp_l2CacheSize},
+            {"localL1CacheSupported","I",&cudaDeviceProp_localL1CacheSupported},
+            {"luid","[B",&cudaDeviceProp_luid},
+            {"luidDeviceNodeMask","I",&cudaDeviceProp_luidDeviceNodeMask},
+            {"major","I",&cudaDeviceProp_major},
+            {"managedMemory","I",&cudaDeviceProp_managedMemory},
+            {"maxBlocksPerMultiProcessor","I",&cudaDeviceProp_maxBlocksPerMultiProcessor},
+            {"maxGridSize","[I",&cudaDeviceProp_maxGridSize},
+            {"maxSurface1D","I",&cudaDeviceProp_maxSurface1D},
+            {"maxSurface1DLayered","[I",&cudaDeviceProp_maxSurface1DLayered},
+            {"maxSurface2D","[I",&cudaDeviceProp_maxSurface2D},
+            {"maxSurface2DLayered","[I",&cudaDeviceProp_maxSurface2DLayered},
+            {"maxSurface3D","[I",&cudaDeviceProp_maxSurface3D},
+            {"maxSurfaceCubemap","I",&cudaDeviceProp_maxSurfaceCubemap},
+            {"maxSurfaceCubemapLayered","[I",&cudaDeviceProp_maxSurfaceCubemapLayered},
+            {"maxTexture1D","I",&cudaDeviceProp_maxTexture1D},
+            {"maxTexture1DLayered","[I",&cudaDeviceProp_maxTexture1DLayered},
+            {"maxTexture1DLinear","I",&cudaDeviceProp_maxTexture1DLinear},
+            {"maxTexture1DMipmap","I",&cudaDeviceProp_maxTexture1DMipmap},
+            {"maxTexture2D","[I",&cudaDeviceProp_maxTexture2D},
+            {"maxTexture2DGather","[I",&cudaDeviceProp_maxTexture2DGather},
+            {"maxTexture2DLayered","[I",&cudaDeviceProp_maxTexture2DLayered},
+            {"maxTexture2DLinear","[I",&cudaDeviceProp_maxTexture2DLinear},
+            {"maxTexture2DMipmap","[I",&cudaDeviceProp_maxTexture2DMipmap},
+            {"maxTexture3D","[I",&cudaDeviceProp_maxTexture3D},
+            {"maxTexture3DAlt","[I",&cudaDeviceProp_maxTexture3DAlt},
+            {"maxTextureCubemap","I",&cudaDeviceProp_maxTextureCubemap},
+            {"maxTextureCubemapLayered","[I",&cudaDeviceProp_maxTextureCubemapLayered},
+            {"maxThreadsDim","[I",&cudaDeviceProp_maxThreadsDim},
+            {"maxThreadsPerBlock","I",&cudaDeviceProp_maxThreadsPerBlock},
+            {"maxThreadsPerMultiProcessor","I",&cudaDeviceProp_maxThreadsPerMultiProcessor},
+            {"memoryBusWidth","I",&cudaDeviceProp_memoryBusWidth},
+            {"memoryClockRate","I",&cudaDeviceProp_memoryClockRate},
+            {"memoryPoolsSupported","I",&cudaDeviceProp_memoryPoolsSupported},
+            {"memoryPoolSupportedHandleTypes","I",&cudaDeviceProp_memoryPoolSupportedHandleTypes},
+            {"memPitch","J",&cudaDeviceProp_memPitch},
+            {"minor","I",&cudaDeviceProp_minor},
+            {"multiGpuBoardGroupID","I",&cudaDeviceProp_multiGpuBoardGroupID},
+            {"multiProcessorCount","I",&cudaDeviceProp_multiProcessorCount},
+            {"name","[B",&cudaDeviceProp_name},
+            {"pageableMemoryAccess","I",&cudaDeviceProp_pageableMemoryAccess},
+            {"pageableMemoryAccessUsesHostPageTables","I",&cudaDeviceProp_pageableMemoryAccessUsesHostPageTables},
+            {"pciBusID","I",&cudaDeviceProp_pciBusID},
+            {"pciDeviceID","I",&cudaDeviceProp_pciDeviceID},
+            {"pciDomainID","I",&cudaDeviceProp_pciDomainID},
+            {"persistingL2CacheMaxSize","I",&cudaDeviceProp_persistingL2CacheMaxSize},
+            {"regsPerBlock","I",&cudaDeviceProp_regsPerBlock},
+            {"regsPerMultiprocessor","I",&cudaDeviceProp_regsPerMultiprocessor},
+            {"reserved","I",&cudaDeviceProp_reserved},
+            {"reservedSharedMemPerBlock","J",&cudaDeviceProp_reservedSharedMemPerBlock},
+            {"sharedMemPerBlock","J",&cudaDeviceProp_sharedMemPerBlock},
+            {"sharedMemPerBlockOptin","J",&cudaDeviceProp_sharedMemPerBlockOptin},
+            {"sharedMemPerMultiprocessor","J",&cudaDeviceProp_sharedMemPerMultiprocessor},
+            {"singleToDoublePrecisionPerfRatio","I",&cudaDeviceProp_singleToDoublePrecisionPerfRatio},
+            {"sparseCudaArraySupported","I",&cudaDeviceProp_sparseCudaArraySupported},
+            {"streamPrioritiesSupported","I",&cudaDeviceProp_streamPrioritiesSupported},
+            {"surfaceAlignment","J",&cudaDeviceProp_surfaceAlignment},
+            {"tccDriver","I",&cudaDeviceProp_tccDriver},
+            {"textureAlignment","J",&cudaDeviceProp_textureAlignment},
+            {"texturePitchAlignment","J",&cudaDeviceProp_texturePitchAlignment},
+            {"timelineSemaphoreInteropSupported","I",&cudaDeviceProp_timelineSemaphoreInteropSupported},
+            {"totalConstMem","J",&cudaDeviceProp_totalConstMem},
+            {"totalGlobalMem","J",&cudaDeviceProp_totalGlobalMem},
+            {"unifiedAddressing","I",&cudaDeviceProp_unifiedAddressing},
+            {"unifiedFunctionPointers","I",&cudaDeviceProp_unifiedFunctionPointers},
+            {"warpSize","I",&cudaDeviceProp_warpSize},
+        };
+
+        for (const auto& s : specs) {
+            if (!init(env, cls, *s.out, s.name, s.sig)) return JNI_ERR;
+        }
+    }
+
+
+    return JNI_VERSION_1_4;
+}
+
+
+JNIEXPORT void JNICALL JNI_OnUnload(JavaVM *vm, void *reserved) {
+}
+
+static void setCudaDeviceProp(JNIEnv* env, jobject prop, const cudaDeviceProp& p) {
+    // byte[256] name + byte[8] luid (luid undefined on non-Windows -> zero it)
+    setFieldBytes(env, prop, cudaDeviceProp_name,
+                  reinterpret_cast<const jbyte*>(p.name), 256);
+    { jbyte z8[8] = {0}; setFieldBytes(env, prop, cudaDeviceProp_luid, z8, 8); }
+
+    // int[] fields
+    { jint v[3] = { (jint)p.maxThreadsDim[0], (jint)p.maxThreadsDim[1], (jint)p.maxThreadsDim[2] };
+      setFieldInts(env, prop, cudaDeviceProp_maxThreadsDim, v, 3); }
+
+    { jint v[3] = { (jint)p.maxGridSize[0], (jint)p.maxGridSize[1], (jint)p.maxGridSize[2] };
+      setFieldInts(env, prop, cudaDeviceProp_maxGridSize, v, 3); }
+
+    { jint v[2] = { (jint)p.maxTexture2D[0], (jint)p.maxTexture2D[1] };
+      setFieldInts(env, prop, cudaDeviceProp_maxTexture2D, v, 2); }
+
+    { jint v[2] = { (jint)p.maxTexture2DGather[0], (jint)p.maxTexture2DGather[1] };
+      setFieldInts(env, prop, cudaDeviceProp_maxTexture2DGather, v, 2); }
+
+    { jint v[3] = { (jint)p.maxTexture2DLinear[0], (jint)p.maxTexture2DLinear[1], (jint)p.maxTexture2DLinear[2] };
+      setFieldInts(env, prop, cudaDeviceProp_maxTexture2DLinear, v, 3); }
+
+    { jint v[2] = { (jint)p.maxTexture2DMipmap[0], (jint)p.maxTexture2DMipmap[1] };
+      setFieldInts(env, prop, cudaDeviceProp_maxTexture2DMipmap, v, 2); }
+
+    { jint v[3] = { (jint)p.maxTexture3D[0], (jint)p.maxTexture3D[1], (jint)p.maxTexture3D[2] };
+      setFieldInts(env, prop, cudaDeviceProp_maxTexture3D, v, 3); }
+
+    { jint v[3] = { (jint)p.maxTexture3DAlt[0], (jint)p.maxTexture3DAlt[1], (jint)p.maxTexture3DAlt[2] };
+      setFieldInts(env, prop, cudaDeviceProp_maxTexture3DAlt, v, 3); }
+
+    { jint v[2] = { (jint)p.maxTexture1DLayered[0], (jint)p.maxTexture1DLayered[1] };
+      setFieldInts(env, prop, cudaDeviceProp_maxTexture1DLayered, v, 2); }
+
+    { jint v[3] = { (jint)p.maxTexture2DLayered[0], (jint)p.maxTexture2DLayered[1], (jint)p.maxTexture2DLayered[2] };
+      setFieldInts(env, prop, cudaDeviceProp_maxTexture2DLayered, v, 3); }
+
+    { jint v[2] = { (jint)p.maxTextureCubemapLayered[0], (jint)p.maxTextureCubemapLayered[1] };
+      setFieldInts(env, prop, cudaDeviceProp_maxTextureCubemapLayered, v, 2); }
+
+    { jint v[2] = { (jint)p.maxSurface1DLayered[0], (jint)p.maxSurface1DLayered[1] };
+      setFieldInts(env, prop, cudaDeviceProp_maxSurface1DLayered, v, 2); }
+
+    { jint v[2] = { (jint)p.maxSurface2D[0], (jint)p.maxSurface2D[1] };
+      setFieldInts(env, prop, cudaDeviceProp_maxSurface2D, v, 2); }
+
+    { jint v[3] = { (jint)p.maxSurface2DLayered[0], (jint)p.maxSurface2DLayered[1], (jint)p.maxSurface2DLayered[2] };
+      setFieldInts(env, prop, cudaDeviceProp_maxSurface2DLayered, v, 3); }
+
+    { jint v[3] = { (jint)p.maxSurface3D[0], (jint)p.maxSurface3D[1], (jint)p.maxSurface3D[2] };
+      setFieldInts(env, prop, cudaDeviceProp_maxSurface3D, v, 3); }
+
+    // long fields
+    env->SetLongField(prop, cudaDeviceProp_totalGlobalMem,           (jlong)p.totalGlobalMem);
+    env->SetLongField(prop, cudaDeviceProp_totalConstMem,            (jlong)p.totalConstMem);
+    env->SetLongField(prop, cudaDeviceProp_sharedMemPerBlock,        (jlong)p.sharedMemPerBlock);
+    env->SetLongField(prop, cudaDeviceProp_sharedMemPerMultiprocessor,(jlong)p.sharedMemPerMultiprocessor);
+    env->SetLongField(prop, cudaDeviceProp_reservedSharedMemPerBlock,(jlong)p.reservedSharedMemPerBlock);
+    env->SetLongField(prop, cudaDeviceProp_sharedMemPerBlockOptin,   (jlong)p.sharedMemPerBlockOptin);
+    env->SetLongField(prop, cudaDeviceProp_memPitch,                 (jlong)p.memPitch);
+    env->SetLongField(prop, cudaDeviceProp_surfaceAlignment,         (jlong)p.surfaceAlignment);
+    env->SetLongField(prop, cudaDeviceProp_textureAlignment,         (jlong)p.textureAlignment);
+    env->SetLongField(prop, cudaDeviceProp_texturePitchAlignment,    (jlong)p.texturePitchAlignment);
+
+    // int fields (available in cudaDeviceProp)
+    env->SetIntField(prop,  cudaDeviceProp_regsPerBlock,             (jint)p.regsPerBlock);
+    env->SetIntField(prop,  cudaDeviceProp_regsPerMultiprocessor,    (jint)p.regsPerMultiprocessor);
+    env->SetIntField(prop,  cudaDeviceProp_warpSize,                 (jint)p.warpSize);
+    env->SetIntField(prop,  cudaDeviceProp_maxThreadsPerBlock,       (jint)p.maxThreadsPerBlock);
+    env->SetIntField(prop,  cudaDeviceProp_maxThreadsPerMultiProcessor,(jint)p.maxThreadsPerMultiProcessor);
+    env->SetIntField(prop,  cudaDeviceProp_clockRate,                (jint)p.clockRate);
+    env->SetIntField(prop,  cudaDeviceProp_memoryClockRate,          (jint)p.memoryClockRate);
+    env->SetIntField(prop,  cudaDeviceProp_memoryBusWidth,           (jint)p.memoryBusWidth);
+    env->SetIntField(prop,  cudaDeviceProp_l2CacheSize,              (jint)p.l2CacheSize);
+    env->SetIntField(prop,  cudaDeviceProp_major,                    (jint)p.major);
+    env->SetIntField(prop,  cudaDeviceProp_minor,                    (jint)p.minor);
+    env->SetIntField(prop,  cudaDeviceProp_multiProcessorCount,      (jint)p.multiProcessorCount);
+    env->SetIntField(prop,  cudaDeviceProp_deviceOverlap,            (jint)p.deviceOverlap);
+    env->SetIntField(prop,  cudaDeviceProp_kernelExecTimeoutEnabled, (jint)p.kernelExecTimeoutEnabled);
+    env->SetIntField(prop,  cudaDeviceProp_integrated,               (jint)p.integrated);
+    env->SetIntField(prop,  cudaDeviceProp_canMapHostMemory,         (jint)p.canMapHostMemory);
+    env->SetIntField(prop,  cudaDeviceProp_computeMode,              (jint)p.computeMode);
+    env->SetIntField(prop,  cudaDeviceProp_maxTexture1D,             (jint)p.maxTexture1D);
+    env->SetIntField(prop,  cudaDeviceProp_maxTexture1DMipmap,       (jint)p.maxTexture1DMipmap);
+    env->SetIntField(prop,  cudaDeviceProp_maxTexture1DLinear,       (jint)p.maxTexture1DLinear);
+    env->SetIntField(prop,  cudaDeviceProp_maxTextureCubemap,        (jint)p.maxTextureCubemap);
+    env->SetIntField(prop,  cudaDeviceProp_maxSurface1D,             (jint)p.maxSurface1D);
+    env->SetIntField(prop,  cudaDeviceProp_maxSurfaceCubemap,        (jint)p.maxSurfaceCubemap);
+    env->SetIntField(prop,  cudaDeviceProp_asyncEngineCount,         (jint)p.asyncEngineCount);
+    env->SetIntField(prop,  cudaDeviceProp_concurrentKernels,        (jint)p.concurrentKernels);
+    env->SetIntField(prop,  cudaDeviceProp_ECCEnabled,               (jint)p.ECCEnabled);
+    env->SetIntField(prop,  cudaDeviceProp_pciBusID,                 (jint)p.pciBusID);
+    env->SetIntField(prop,  cudaDeviceProp_pciDeviceID,              (jint)p.pciDeviceID);
+    env->SetIntField(prop,  cudaDeviceProp_pciDomainID,              (jint)p.pciDomainID);
+    env->SetIntField(prop,  cudaDeviceProp_unifiedAddressing,        (jint)p.unifiedAddressing);
+
+}
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaMemcpyNative
+  (JNIEnv *env, jclass cls, jobject dst, jobject src, jlong count, jint kind) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, dst, "dst", "cudaMemcpy");
+    CUJAVA_REQUIRE_NONNULL(env, src, "src", "cudaMemcpy");
+
+    Logger::log(LOG_TRACE, "Executing cudaMemcpy of %ld bytes\n", (long)count);
+
+    // Obtain the destination and source pointers
+    PointerData *dstPointerData = initPointerData(env, dst);
+    if (dstPointerData == nullptr) {
+        return CUJAVA_INTERNAL_ERROR;
+    }
+    PointerData *srcPointerData = initPointerData(env, src);
+    if (srcPointerData == nullptr) {
+        return CUJAVA_INTERNAL_ERROR;
+    }
+
+    // Execute the cudaMemcpy operation
+    int result = CUJAVA_INTERNAL_ERROR;
+    if (kind == cudaMemcpyHostToHost) {
+        Logger::log(LOG_TRACE, "Copying %ld bytes from host to host\n", (long)count);
+        result = cudaMemcpy((void*)dstPointerData->getPointer(env), (void*)srcPointerData->getPointer(env), (size_t)count, cudaMemcpyHostToHost);
+    }
+    else if (kind == cudaMemcpyHostToDevice) {
+        Logger::log(LOG_TRACE, "Copying %ld bytes from host to device\n", (long)count);
+        result = cudaMemcpy((void*)dstPointerData->getPointer(env), (void*)srcPointerData->getPointer(env), (size_t)count, cudaMemcpyHostToDevice);
+    }
+    else if (kind == cudaMemcpyDeviceToHost) {
+        Logger::log(LOG_TRACE, "Copying %ld bytes from device to host\n", (long)count);
+        result = cudaMemcpy((void*)dstPointerData->getPointer(env), (void*)srcPointerData->getPointer(env), (size_t)count, cudaMemcpyDeviceToHost);
+    }
+    else if (kind == cudaMemcpyDeviceToDevice) {
+        Logger::log(LOG_TRACE, "Copying %ld bytes from device to device\n", (long)count);
+        result = cudaMemcpy((void*)dstPointerData->getPointer(env), (void*)srcPointerData->getPointer(env), (size_t)count, cudaMemcpyDeviceToDevice);
+    }
+    else {
+        Logger::log(LOG_ERROR, "Invalid cudaMemcpyKind given: %d\n", kind);
+        return cudaErrorInvalidMemcpyDirection;
+    }
+
+    // Release the pointer data
+    if (!releasePointerData(env, dstPointerData)) return CUJAVA_INTERNAL_ERROR;
+    if (!releasePointerData(env, srcPointerData, JNI_ABORT)) return CUJAVA_INTERNAL_ERROR;
+    return result;
+}
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaMallocNative
+  (JNIEnv *env, jclass cls, jobject devPtr, jlong size) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, devPtr, "devPtr", "cudaMalloc");
+
+    Logger::log(LOG_TRACE, "Executing cudaMalloc of %ld bytes\n", (long)size);
+
+    void *nativeDevPtr = nullptr;
+    int result = cudaMalloc(&nativeDevPtr, (size_t)size);
+    setPointer(env, devPtr, (jlong)nativeDevPtr);
+
+    return result;
+}
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaFreeNative
+  (JNIEnv *env, jclass cls, jobject devPtr) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, devPtr, "devPtr", "cudaFree");
+
+    Logger::log(LOG_TRACE, "Executing cudaFree\n");
+
+    void *nativeDevPtr = nullptr;
+    nativeDevPtr = getPointer(env, devPtr);
+    int result = cudaFree(nativeDevPtr);
+    return result;
+}
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaMemsetNative
+  (JNIEnv *env, jclass cls, jobject mem, jint c, jlong count) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, mem, "mem", "cudaMemset");
+
+    Logger::log(LOG_TRACE, "Executing cudaMemset\n");
+
+    void *nativeMem = getPointer(env, mem);
+
+    int result = cudaMemset(nativeMem, (int)c, (size_t)count);
+    return result;
+}
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaDeviceSynchronizeNative
+  (JNIEnv *env, jclass cls) {
+    Logger::log(LOG_TRACE, "Executing cudaDeviceSynchronize\n");
+
+    int result = cudaDeviceSynchronize();
+    return result;
+}
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaMallocManagedNative
+  (JNIEnv *env, jclass cls, jobject devPtr, jlong size, jint flags) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, devPtr, "devPtr", "cudaMallocManaged");
+
+    Logger::log(LOG_TRACE, "Executing cudaMallocManaged of %ld bytes\n", (long)size);
+
+    void *nativeDevPtr = nullptr;
+    int result = cudaMallocManaged(&nativeDevPtr, (size_t)size, (unsigned int)flags);
+    if (result == cudaSuccess) {
+        if (flags == cudaMemAttachHost) {
+            jobject object = env->NewDirectByteBuffer(nativeDevPtr, size);
+            env->SetObjectField(devPtr, Pointer_buffer, object);
+            env->SetObjectField(devPtr, Pointer_pointers, nullptr);
+            env->SetLongField(devPtr, Pointer_byteOffset, 0);
+        }
+        env->SetLongField(devPtr, NativePointerObject_nativePointer, (jlong)nativeDevPtr);
+    }
+
+    return result;
+}
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaMemGetInfoNative
+  (JNIEnv *env, jclass cls, jlongArray freeBytes, jlongArray totalBytes) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, freeBytes, "freeBytes", "cudaMemGetInfo");
+    CUJAVA_REQUIRE_NONNULL(env, totalBytes, "totalBytes", "cudaMemGetInfo");
+
+    Logger::log(LOG_TRACE, "Executing cudaMemGetInfo\n");
+
+    size_t nativeFreeBytes = 0;
+    size_t nativeTotalBytes = 0;
+
+    int result = cudaMemGetInfo(&nativeFreeBytes, &nativeTotalBytes);
+
+    if (!set(env, freeBytes, 0, (jlong)nativeFreeBytes)) return CUJAVA_INTERNAL_ERROR;
+    if (!set(env, totalBytes, 0, (jlong)nativeTotalBytes)) return CUJAVA_INTERNAL_ERROR;
+
+    return result;
+}
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaGetDeviceCountNative
+  (JNIEnv *env, jclass cls, jintArray count) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, count, "count", "cudaGetDeviceCount");
+
+    Logger::log(LOG_TRACE, "Executing cudaGetDeviceCount\n");
+
+    int nativeCount = 0;
+    int result = cudaGetDeviceCount(&nativeCount);
+    if (!set(env, count, 0, nativeCount)) return CUJAVA_INTERNAL_ERROR;
+    return result;
+}
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaSetDeviceNative
+  (JNIEnv *env, jclass cls, jint device) {
+    Logger::log(LOG_TRACE, "Executing cudaSetDevice\n");
+
+    return cudaSetDevice(device);
+}
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaSetDeviceFlagsNative
+  (JNIEnv *env, jclass cls, jint flags) {
+    Logger::log(LOG_TRACE, "Executing cudaSetDeviceFlags\n");
+
+    return cudaSetDeviceFlags((int)flags);
+}
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaGetDeviceNative
+  (JNIEnv *env, jclass cls, jintArray device) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, device, "device", "cudaGetDevice");
+
+    Logger::log(LOG_TRACE, "Executing cudaGetDevice\n");
+
+    int nativeDevice = 0;
+    int result = cudaGetDevice(&nativeDevice);
+    if (!set(env, device, 0, nativeDevice)) return CUJAVA_INTERNAL_ERROR;
+    return result;
+}
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaGetDevicePropertiesNative
+  (JNIEnv *env, jclass cls, jobject prop, jint device) {
+
+    // Validate: all jobject parameters must be non-null
+    CUJAVA_REQUIRE_NONNULL(env, prop, "prop", "cudaGetDeviceProperties");
+
+    Logger::log(LOG_TRACE, "Executing cudaGetDeviceProperties\n");
+
+    cudaDeviceProp nativeProp;
+    int result = cudaGetDeviceProperties(&nativeProp, device);
+
+    setCudaDeviceProp(env, prop, nativeProp);
+    return result;
+}
+
+
+
+
diff --git a/src/main/cpp/jni/runtime/cujava_runtime.hpp b/src/main/cpp/jni/runtime/cujava_runtime.hpp
new file mode 100644
index 00000000000..4c455821a73
--- /dev/null
+++ b/src/main/cpp/jni/runtime/cujava_runtime.hpp
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <jni.h>
+
+#ifndef _Included_org_apache_sysds_cujava_runtime_CuJava
+#define _Included_org_apache_sysds_cujava_runtime_CuJava
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Class:  org.apache.sysds.cujava.runtime.CuJava
+ * Methods:
+ *  - cudaMemcpy
+ *  - cudaMalloc
+ *  - cudaFree
+ *  - cudaMemset
+ *  - cudaDeviceSynchronize
+ *  - cudaMallocManaged
+ *  - cudaMemGetInfo
+ *  - cudaGetDeviceCount
+ *  - cudaSetDevice
+ *  - cudaSetDeviceFlags
+ *  - cudaGetDevice
+ *  - cudaGetDeviceProperties
+ */
+
+
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaMemcpyNative
+  (JNIEnv *, jclass, jobject, jobject, jlong, jint);
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaMallocNative
+  (JNIEnv *env, jclass cls, jobject devPtr, jlong size);
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaFreeNative
+  (JNIEnv *env, jclass cls, jobject devPtr);
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaMemsetNative
+  (JNIEnv *env, jclass cls, jobject mem, jint c, jlong count);
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaDeviceSynchronizeNative
+  (JNIEnv *env, jclass cls);
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaMallocManagedNative
+  (JNIEnv *env, jclass cls, jobject devPtr, jlong size, jint flags);
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaMemGetInfoNative
+  (JNIEnv *env, jclass cls, jlongArray freeBytes, jlongArray totalBytes);
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaGetDeviceCountNative
+  (JNIEnv *env, jclass cls, jintArray count);
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaSetDeviceNative
+  (JNIEnv *env, jclass cls, jint device);
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaSetDeviceFlagsNative
+  (JNIEnv *env, jclass cls, jint flags);
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaGetDeviceNative
+  (JNIEnv *env, jclass cls, jintArray device);
+
+JNIEXPORT jint JNICALL Java_org_apache_sysds_cujava_runtime_CuJava_cudaGetDevicePropertiesNative
+  (JNIEnv *env, jclass cls, jobject prop, jint device);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/src/main/cpp/jni/runtime/cujava_runtime_common.hpp b/src/main/cpp/jni/runtime/cujava_runtime_common.hpp
new file mode 100644
index 00000000000..94e6265711d
--- /dev/null
+++ b/src/main/cpp/jni/runtime/cujava_runtime_common.hpp
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef CUJAVA_RUNTIME_COMMON_HPP
+#define CUJAVA_RUNTIME_COMMON_HPP
+
+#include <jni.h>
+#include <cstdint>
+#include <cuda_runtime.h>
+
+#include "../common/cujava_logger.hpp"
+#include "../common/cujava_jni_utils.hpp"
+#include "../common/cujava_pointer_utils.hpp"
+
+#define CUJAVA_INTERNAL_ERROR 0x80000001
+
+
+
+#endif // CUJAVA_RUNTIME_COMMON_HPP
diff --git a/src/main/cpp/lib/libCuJavaCommonJNI.a b/src/main/cpp/lib/libCuJavaCommonJNI.a
new file mode 100644
index 00000000000..088843ce137
Binary files /dev/null and b/src/main/cpp/lib/libCuJavaCommonJNI.a differ
diff --git a/src/main/cpp/lib/libcujava_cublas.so b/src/main/cpp/lib/libcujava_cublas.so
new file mode 100755
index 00000000000..a64fe23e06f
Binary files /dev/null and b/src/main/cpp/lib/libcujava_cublas.so differ
diff --git a/src/main/cpp/lib/libcujava_cusparse.so b/src/main/cpp/lib/libcujava_cusparse.so
new file mode 100755
index 00000000000..d0439d0d327
Binary files /dev/null and b/src/main/cpp/lib/libcujava_cusparse.so differ
diff --git a/src/main/cpp/lib/libcujava_driver.so b/src/main/cpp/lib/libcujava_driver.so
new file mode 100755
index 00000000000..a3e488b7722
Binary files /dev/null and b/src/main/cpp/lib/libcujava_driver.so differ
diff --git a/src/main/cpp/lib/libcujava_runtime.so b/src/main/cpp/lib/libcujava_runtime.so
new file mode 100755
index 00000000000..24de9f300e4
Binary files /dev/null and b/src/main/cpp/lib/libcujava_runtime.so differ
diff --git a/src/main/java/org/apache/sysds/cujava/CuJavaLibLoader.java b/src/main/java/org/apache/sysds/cujava/CuJavaLibLoader.java
new file mode 100644
index 00000000000..4e8381b505d
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/CuJavaLibLoader.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.*;
+import java.util.Collections;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+public class CuJavaLibLoader {
+
+	private static volatile boolean loaded = false;   // fast-path guard
+	private static final Set<String> LOADED = Collections.newSetFromMap(new ConcurrentHashMap<>());
+
+	/** Public entry – call from static blocks in binding classes. */
+	public static synchronized void load(String lib) {
+		if (!LOADED.add(lib)) return; // already loaded
+
+		// 1) Standard lookup (java.library.path or OS default locations)
+		try {
+			System.loadLibrary(lib);
+			return;
+		}
+		catch (UnsatisfiedLinkError ignored) {
+			// Fall through to JAR extraction
+		}
+
+		// 2) Extract the library from the JAR (/lib/...) to a temp file
+		String fileName = System.mapLibraryName(lib);   // platform-specific
+		String resource = "/lib/" + fileName;                // matches <targetPath>lib in the POM
+
+		try (InputStream in = CuJavaLibLoader.class.getResourceAsStream(resource)) {
+			if (in == null)
+				throw new UnsatisfiedLinkError(
+					"Native library not found inside JAR at " + resource);
+
+			Path tmp = Files.createTempFile("cujava_", fileName);
+			tmp.toFile().deleteOnExit();
+			Files.copy(in, tmp, StandardCopyOption.REPLACE_EXISTING);
+
+			System.load(tmp.toAbsolutePath().toString());
+		}
+		catch (IOException | UnsatisfiedLinkError e) {
+			LOADED.remove(lib);
+			throw (UnsatisfiedLinkError)
+				new UnsatisfiedLinkError("Failed to load native CUDA bridge: " + e).initCause(e);
+		}
+	}
+
+	private CuJavaLibLoader() { /* no instances */ }
+
+}
diff --git a/src/main/java/org/apache/sysds/cujava/CudaDataType.java b/src/main/java/org/apache/sysds/cujava/CudaDataType.java
new file mode 100644
index 00000000000..17dda43ff28
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/CudaDataType.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava;
+
+/**
+ * CUDA data-type constants (mirror of cudaDataType_t). Grouped: all real (R) types first, then complex (C) companions.
+ */
+
+public class CudaDataType {
+
+	/* ─────── Real scalars ─────── */
+
+	/** 16-bit IEEE half-precision float (fp16) */
+	public static final int CUDA_R_16F = 2;
+	/** 16-bit bfloat16 */
+	public static final int CUDA_R_16BF = 14;
+	/** 32-bit IEEE single-precision float */
+	public static final int CUDA_R_32F = 0;
+	/** 64-bit IEEE double-precision float */
+	public static final int CUDA_R_64F = 1;
+
+	/** 4-bit  signed integer */
+	public static final int CUDA_R_4I = 16;
+	/** 4-bit  unsigned integer */
+	public static final int CUDA_R_4U = 18;
+	/** 8-bit  signed integer */
+	public static final int CUDA_R_8I = 3;
+	/** 8-bit  unsigned integer */
+	public static final int CUDA_R_8U = 8;
+	/** 16-bit signed integer */
+	public static final int CUDA_R_16I = 20;
+	/** 16-bit unsigned integer */
+	public static final int CUDA_R_16U = 22;
+	/** 32-bit signed integer */
+	public static final int CUDA_R_32I = 10;
+	/** 32-bit unsigned integer */
+	public static final int CUDA_R_32U = 12;
+	/** 64-bit signed integer */
+	public static final int CUDA_R_64I = 24;
+	/** 64-bit unsigned integer */
+	public static final int CUDA_R_64U = 26;
+
+	/** 8-bit float, FP-8 format E4M3 */
+	public static final int CUDA_R_8F_E4M3 = 28;
+	/** 8-bit float, FP-8 format E5M2 */
+	public static final int CUDA_R_8F_E5M2 = 29;
+
+
+	/* ─────── Complex pairs (real + imaginary) ─────── */
+
+	/** two fp16 numbers: (real, imag) */
+	public static final int CUDA_C_16F = 6;
+	/** two bfloat16 numbers */
+	public static final int CUDA_C_16BF = 15;
+	/** two 32-bit floats */
+	public static final int CUDA_C_32F = 4;
+	/** two 64-bit doubles */
+	public static final int CUDA_C_64F = 5;
+
+	/** two 4-bit  signed integers */
+	public static final int CUDA_C_4I = 17;
+	/** two 4-bit  unsigned integers */
+	public static final int CUDA_C_4U = 19;
+	/** two 8-bit  signed integers */
+	public static final int CUDA_C_8I = 7;
+	/** two 8-bit  unsigned integers */
+	public static final int CUDA_C_8U = 9;
+	/** two 16-bit signed integers */
+	public static final int CUDA_C_16I = 21;
+	/** two 16-bit unsigned integers */
+	public static final int CUDA_C_16U = 23;
+	/** two 32-bit signed integers */
+	public static final int CUDA_C_32I = 11;
+	/** two 32-bit unsigned integers */
+	public static final int CUDA_C_32U = 13;
+	/** two 64-bit signed integers */
+	public static final int CUDA_C_64I = 25;
+	/** two 64-bit unsigned integers */
+	public static final int CUDA_C_64U = 27;
+
+	private CudaDataType() { /* utility class – no instantiation */ }
+
+}
diff --git a/src/main/java/org/apache/sysds/cujava/CudaException.java b/src/main/java/org/apache/sysds/cujava/CudaException.java
new file mode 100644
index 00000000000..bcc901af3e9
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/CudaException.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava;
+
+public class CudaException extends RuntimeException {
+
+	private static final long serialVersionUID = 1587809813906124159L;
+
+	public CudaException(String message) {
+		super(message);
+	}
+
+	public CudaException(String message, Throwable cause) {
+		super(message, cause);
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/NativePointerObject.java b/src/main/java/org/apache/sysds/cujava/NativePointerObject.java
new file mode 100644
index 00000000000..8b3c33cb3e6
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/NativePointerObject.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava;
+
+public abstract class NativePointerObject {
+
+	private long nativePointer;
+
+	protected NativePointerObject() {
+		nativePointer = 0;
+	}
+
+	protected NativePointerObject(long nativePointer) {
+		this.nativePointer = nativePointer;
+	}
+
+	protected NativePointerObject(NativePointerObject other) {
+		this.nativePointer = other.nativePointer;
+	}
+
+	public long getNativePointer() {
+		return nativePointer;
+	}
+
+}
diff --git a/src/main/java/org/apache/sysds/cujava/Pointer.java b/src/main/java/org/apache/sysds/cujava/Pointer.java
new file mode 100644
index 00000000000..84d280db4b9
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/Pointer.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava;
+
+import java.nio.Buffer;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.ShortBuffer;
+import java.nio.IntBuffer;
+import java.nio.FloatBuffer;
+import java.nio.LongBuffer;
+import java.nio.DoubleBuffer;
+import java.nio.ByteOrder;
+
+public class Pointer extends NativePointerObject {
+
+	private long byteOffset;
+	private final Buffer buffer;
+	private final NativePointerObject[] pointers;
+
+	public Pointer() {
+		buffer = null;
+		pointers = null;
+		byteOffset = 0;
+	}
+
+	protected Pointer(long nativePointerValue) {
+		super(nativePointerValue);
+		buffer = null;
+		pointers = null;
+		byteOffset = 0;
+	}
+
+	private Pointer(Buffer buffer) {
+		this.buffer = buffer;
+		pointers = null;
+		byteOffset = 0;
+	}
+
+	private Pointer(NativePointerObject[] pointers) {
+		buffer = null;
+		this.pointers = pointers;
+		byteOffset = 0;
+	}
+
+	protected Pointer(Pointer other) {
+		super(other.getNativePointer());
+		this.buffer = other.buffer;
+		this.pointers = other.pointers;
+		this.byteOffset = other.byteOffset;
+	}
+
+	protected Pointer(Pointer other, long byteOffset) {
+		this(other);
+		this.byteOffset += byteOffset;
+	}
+
+	public static Pointer to(byte[] values) {
+		return new Pointer(ByteBuffer.wrap(values));
+	}
+
+	public static Pointer to(char[] values) {
+		return new Pointer(CharBuffer.wrap(values));
+	}
+
+	public static Pointer to(short[] values) {
+		return new Pointer(ShortBuffer.wrap(values));
+	}
+
+	public static Pointer to(int[] values) {
+		return new Pointer(IntBuffer.wrap(values));
+	}
+
+	public static Pointer to(float[] values) {
+		return new Pointer(FloatBuffer.wrap(values));
+	}
+
+	public static Pointer to(long[] values) {
+		return new Pointer(LongBuffer.wrap(values));
+	}
+
+	public static Pointer to(double[] values) {
+		return new Pointer(DoubleBuffer.wrap(values));
+	}
+
+	public static Pointer to(NativePointerObject... pointers) {
+		if(pointers == null) {
+			throw new IllegalArgumentException(
+				"The pointers argument is null – expected one or more NativePointerObject references.");
+		}
+		return new Pointer(pointers);
+	}
+
+	public Pointer withByteOffset(long byteOffset) {
+		return new Pointer(this, byteOffset);
+	}
+
+	public long getByteOffset() {
+		return byteOffset;
+	}
+
+	public long address() {                      // nativePointer + byteOffset
+		return getNativePointer() + getByteOffset();
+	}
+
+	public ByteBuffer getByteBuffer(long byteOffset, long byteSize) {
+		if(buffer == null) {
+			return null;
+		}
+		if(!(buffer instanceof ByteBuffer internalByteBuffer)) {
+			return null;
+		}
+		ByteBuffer byteBuffer = internalByteBuffer.slice();
+		byteBuffer.limit(Math.toIntExact(byteOffset + byteSize));
+		byteBuffer.position(Math.toIntExact(byteOffset));
+		return byteBuffer.slice().order(ByteOrder.nativeOrder());
+	}
+
+	public static Pointer to(Buffer buffer) {
+		if(buffer == null || (!buffer.isDirect() && !buffer.hasArray())) {
+			throw new IllegalArgumentException(
+				"Invalid buffer: argument is null or neither direct nor backed by an array; " +
+					"expected a non-null direct buffer or one with an accessible backing array.");
+		}
+		return new Pointer(buffer);
+	}
+
+}
diff --git a/src/main/java/org/apache/sysds/cujava/Sizeof.java b/src/main/java/org/apache/sysds/cujava/Sizeof.java
new file mode 100644
index 00000000000..b53a1fa6e81
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/Sizeof.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava;
+
+public class Sizeof {
+
+	/**
+	 * CUDA expects sizes in bytes. The JDK provides sizes in bits.
+	 * Hence, we divide the sizes provided by the JDK by 8 to obtain bytes.
+	 */
+
+	public static final int BYTE = Byte.SIZE / 8;
+
+	public static final int CHAR = Character.SIZE / 8;
+
+	public static final int SHORT = Short.SIZE / 8;
+
+	public static final int INT = Integer.SIZE / 8;
+
+	public static final int FLOAT = Float.SIZE / 8;
+
+	public static final int LONG = Long.SIZE / 8;
+
+	public static final int DOUBLE = Double.SIZE / 8;
+
+	// Keep constructor private to prevent instantiation
+	private Sizeof() {
+	}
+
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cublas/CuJavaCublas.java b/src/main/java/org/apache/sysds/cujava/cublas/CuJavaCublas.java
new file mode 100644
index 00000000000..0689e64b91f
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cublas/CuJavaCublas.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cublas;
+
+import org.apache.sysds.cujava.CuJavaLibLoader;
+import org.apache.sysds.cujava.CudaException;
+import org.apache.sysds.cujava.Pointer;
+
+/**
+ * The methods declared in this class refer to cublas v2. cublas v1 is deprecated in CUDA 12 and SystemDS does not
+ * utilize v1 methods anymore.
+ */
+
+public class CuJavaCublas {
+
+	private static boolean exceptionsEnabled = false;
+
+	private static final String LIB_BASE = "cujava_cublas";
+
+	private CuJavaCublas() {
+		// prevent instantiation
+	}
+
+	static {
+		CuJavaLibLoader.load(LIB_BASE);
+	}
+
+	private static int checkCublasStatus(int result) {
+		if(exceptionsEnabled && result != cublasStatus.CUBLAS_STATUS_SUCCESS) {
+			throw new CudaException(cublasStatus.statusString(result));
+		}
+		return result;
+	}
+
+	public static void setExceptionsEnabled(boolean enabled) {
+		exceptionsEnabled = enabled;
+	}
+
+	public static int cublasCreate(cublasHandle handle) {
+		return checkCublasStatus(cublasCreateNative(handle));
+	}
+
+	private static native int cublasCreateNative(cublasHandle handle);
+
+	public static int cublasDestroy(cublasHandle handle) {
+		return checkCublasStatus(cublasDestroyNative(handle));
+	}
+
+	private static native int cublasDestroyNative(cublasHandle handle);
+
+	public static int cublasDgeam(cublasHandle handle, int transa, int transb, int m, int n, Pointer alpha, Pointer A,
+		int lda, Pointer beta, Pointer B, int ldb, Pointer C, int ldc) {
+		return checkCublasStatus(cublasDgeamNative(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc));
+	}
+
+	private static native int cublasDgeamNative(cublasHandle handle, int transa, int transb, int m, int n,
+		Pointer alpha, Pointer A, int lda, Pointer beta, Pointer B, int ldb, Pointer C, int ldc);
+
+	public static int cublasDdot(cublasHandle handle, int n, Pointer x, int incx, Pointer y, int incy, Pointer result) {
+		return checkCublasStatus(cublasDdotNative(handle, n, x, incx, y, incy, result));
+	}
+
+	private static native int cublasDdotNative(cublasHandle handle, int n, Pointer x, int incx, Pointer y, int incy,
+		Pointer result);
+
+	public static int cublasDgemv(cublasHandle handle, int trans, int m, int n, Pointer alpha, Pointer A, int lda,
+		Pointer x, int incx, Pointer beta, Pointer y, int incy) {
+		return checkCublasStatus(cublasDgemvNative(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy));
+	}
+
+	private static native int cublasDgemvNative(cublasHandle handle, int trans, int m, int n, Pointer alpha, Pointer A,
+		int lda, Pointer x, int incx, Pointer beta, Pointer y, int incy);
+
+	public static int cublasDgemm(cublasHandle handle, int transa, int transb, int m, int n, int k, Pointer alpha,
+		Pointer A, int lda, Pointer B, int ldb, Pointer beta, Pointer C, int ldc) {
+		return checkCublasStatus(
+			cublasDgemmNative(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc));
+	}
+
+	private static native int cublasDgemmNative(cublasHandle handle, int transa, int transb, int m, int n, int k,
+		Pointer alpha, Pointer A, int lda, Pointer B, int ldb, Pointer beta, Pointer C, int ldc);
+
+	public static int cublasDsyrk(cublasHandle handle, int uplo, int trans, int n, int k, Pointer alpha, Pointer A,
+		int lda, Pointer beta, Pointer C, int ldc) {
+		return checkCublasStatus(cublasDsyrkNative(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc));
+	}
+
+	private static native int cublasDsyrkNative(cublasHandle handle, int uplo, int trans, int n, int k, Pointer alpha,
+		Pointer A, int lda, Pointer beta, Pointer C, int ldc);
+
+	public static int cublasDaxpy(cublasHandle handle, int n, Pointer alpha, Pointer x, int incx, Pointer y, int incy) {
+		return checkCublasStatus(cublasDaxpyNative(handle, n, alpha, x, incx, y, incy));
+	}
+
+	private static native int cublasDaxpyNative(cublasHandle handle, int n, Pointer alpha, Pointer x, int incx,
+		Pointer y, int incy);
+
+	public static int cublasDtrsm(cublasHandle handle, int side, int uplo, int trans, int diag, int m, int n,
+		Pointer alpha, Pointer A, int lda, Pointer B, int ldb) {
+		return checkCublasStatus(cublasDtrsmNative(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb));
+	}
+
+	private static native int cublasDtrsmNative(cublasHandle handle, int side, int uplo, int trans, int diag, int m,
+		int n, Pointer alpha, Pointer A, int lda, Pointer B, int ldb);
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cublas/cublasDiagType.java b/src/main/java/org/apache/sysds/cujava/cublas/cublasDiagType.java
new file mode 100644
index 00000000000..8c0972e2724
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cublas/cublasDiagType.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cublas;
+
+public class cublasDiagType {
+
+	public static final int CUBLAS_DIAG_NON_UNIT = 0;
+
+	public static final int CUBLAS_DIAG_UNIT = 1;
+
+	private cublasDiagType(){
+		// prevent instantiation
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cublas/cublasFillMode.java b/src/main/java/org/apache/sysds/cujava/cublas/cublasFillMode.java
new file mode 100644
index 00000000000..d9e0720fcee
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cublas/cublasFillMode.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cublas;
+
+public class cublasFillMode {
+
+	public static final int CUBLAS_FILL_MODE_LOWER = 0;
+
+	public static final int CUBLAS_FILL_MODE_UPPER = 1;
+
+
+	public static final int CUBLAS_FILL_MODE_FULL = 2;
+
+	private cublasFillMode(){
+		// prevent instantiation
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cublas/cublasHandle.java b/src/main/java/org/apache/sysds/cujava/cublas/cublasHandle.java
new file mode 100644
index 00000000000..a02dfed3295
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cublas/cublasHandle.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cublas;
+
+import org.apache.sysds.cujava.NativePointerObject;
+
+public class cublasHandle extends NativePointerObject {
+
+	public cublasHandle() {
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cublas/cublasOperation.java b/src/main/java/org/apache/sysds/cujava/cublas/cublasOperation.java
new file mode 100644
index 00000000000..eadf75d756b
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cublas/cublasOperation.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cublas;
+
+public class cublasOperation {
+
+	public static final int CUBLAS_OP_N = 0;
+
+	public static final int CUBLAS_OP_T = 1;
+
+	public static final int CUBLAS_OP_C = 2;
+
+	public static final int CUBLAS_OP_HERMITAN = 2;
+
+	public static final int CUBLAS_OP_CONJG = 3;
+
+	private cublasOperation(){
+		// prevent instantiation
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cublas/cublasPointerMode.java b/src/main/java/org/apache/sysds/cujava/cublas/cublasPointerMode.java
new file mode 100644
index 00000000000..9b274e28cba
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cublas/cublasPointerMode.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cublas;
+
+public class cublasPointerMode {
+
+	public static final int CUBLAS_POINTER_MODE_HOST = 0;
+
+	public static final int CUBLAS_POINTER_MODE_DEVICE = 1;
+
+	private cublasPointerMode() {
+		// prevent instantiation
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cublas/cublasSideMode.java b/src/main/java/org/apache/sysds/cujava/cublas/cublasSideMode.java
new file mode 100644
index 00000000000..fe474d55065
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cublas/cublasSideMode.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cublas;
+
+public class cublasSideMode {
+
+	public static final int CUBLAS_SIDE_LEFT = 0;
+
+	public static final int CUBLAS_SIDE_RIGHT = 1;
+
+	private cublasSideMode() {
+		// prevent instantiation
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cublas/cublasStatus.java b/src/main/java/org/apache/sysds/cujava/cublas/cublasStatus.java
new file mode 100644
index 00000000000..b0ba370a730
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cublas/cublasStatus.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cublas;
+
+public class cublasStatus {
+
+	public static final int CUBLAS_STATUS_SUCCESS = 0;
+
+	public static final int CUBLAS_STATUS_NOT_INITIALIZED = 1;
+
+	public static final int CUBLAS_STATUS_ALLOC_FAILED = 3;
+
+	public static final int CUBLAS_STATUS_INVALID_VALUE = 7;
+
+	public static final int CUBLAS_STATUS_ARCH_MISMATCH = 8;
+
+	public static final int CUBLAS_STATUS_MAPPING_ERROR = 11;
+
+	public static final int CUBLAS_STATUS_EXECUTION_FAILED = 13;
+
+	public static final int CUBLAS_STATUS_INTERNAL_ERROR   = 14;
+
+	public static final int CUBLAS_STATUS_NOT_SUPPORTED    = 15;
+
+	private cublasStatus() {
+	}
+
+	public static String statusString(int err) {
+		return switch(err) {
+			case CUBLAS_STATUS_SUCCESS -> "CUBLAS_STATUS_SUCCESS";
+			case CUBLAS_STATUS_NOT_INITIALIZED -> "CUBLAS_STATUS_NOT_INITIALIZED";
+			case CUBLAS_STATUS_ALLOC_FAILED -> "CUBLAS_STATUS_ALLOC_FAILED";
+			case CUBLAS_STATUS_INVALID_VALUE -> "CUBLAS_STATUS_INVALID_VALUE";
+			case CUBLAS_STATUS_ARCH_MISMATCH -> "CUBLAS_STATUS_ARCH_MISMATCH";
+			case CUBLAS_STATUS_MAPPING_ERROR -> "CUBLAS_STATUS_MAPPING_ERROR";
+			case CUBLAS_STATUS_EXECUTION_FAILED -> "CUBLAS_STATUS_EXECUTION_FAILED";
+			case CUBLAS_STATUS_INTERNAL_ERROR -> "CUBLAS_STATUS_INTERNAL_ERROR";
+			case CUBLAS_STATUS_NOT_SUPPORTED -> "CUBLAS_STATUS_NOT_SUPPORTED";
+			default -> "Invalid error";
+		};
+	}
+
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cudnn/CuJavaCudnn.java b/src/main/java/org/apache/sysds/cujava/cudnn/CuJavaCudnn.java
new file mode 100644
index 00000000000..a38cd518c13
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cudnn/CuJavaCudnn.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cudnn;
+
+public class CuJavaCudnn {
+
+	private CuJavaCudnn(){
+		// prevent instantiation
+	}
+
+	// TODO: Implement java wrapper for cuDNN
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusolver/CuJavaCusolver.java b/src/main/java/org/apache/sysds/cujava/cusolver/CuJavaCusolver.java
new file mode 100644
index 00000000000..20878688607
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusolver/CuJavaCusolver.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusolver;
+
+public class CuJavaCusolver {
+
+	private CuJavaCusolver() {
+		// prevent instantiation
+	}
+
+	// TODO: Implement java wrapper for cuSOLVER
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/CuJavaCusparse.java b/src/main/java/org/apache/sysds/cujava/cusparse/CuJavaCusparse.java
new file mode 100644
index 00000000000..27965a7938e
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/CuJavaCusparse.java
@@ -0,0 +1,373 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+import org.apache.sysds.cujava.CuJavaLibLoader;
+import org.apache.sysds.cujava.CudaException;
+import org.apache.sysds.cujava.Pointer;
+
+public class CuJavaCusparse {
+
+	private static boolean exceptionsEnabled = false;
+
+	private static final String LIB_BASE = "cujava_cusparse";
+
+	private CuJavaCusparse() {
+
+	}
+
+	static {
+		CuJavaLibLoader.load(LIB_BASE);
+	}
+
+	private static int checkCusparseStatus(int result) {
+		if(exceptionsEnabled && result != cusparseStatus.CUSPARSE_STATUS_SUCCESS) {
+			throw new CudaException(cusparseStatus.statusString(result));
+		}
+		return result;
+	}
+
+	public static void setExceptionsEnabled(boolean enabled) {
+		exceptionsEnabled = enabled;
+	}
+
+	public static int cusparseSpGEMM_copy(cusparseHandle handle, int opA, int opB, Pointer alpha,
+		cusparseConstSpMatDescr matA, cusparseConstSpMatDescr matB, Pointer beta, cusparseSpMatDescr matC,
+		int computeType, int alg, cusparseSpGEMMDescr spgemmDescr) {
+		return checkCusparseStatus(
+			cusparseSpGEMM_copyNative(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, alg, spgemmDescr));
+	}
+
+	private static native int cusparseSpGEMM_copyNative(cusparseHandle handle, int opA, int opB, Pointer alpha,
+		cusparseConstSpMatDescr matA, cusparseConstSpMatDescr matB, Pointer beta, cusparseSpMatDescr matC,
+		int computeType, int alg, cusparseSpGEMMDescr spgemmDescr);
+
+	public static int cusparseGetMatIndexBase(cusparseMatDescr descrA) {
+		return checkCusparseStatus(cusparseGetMatIndexBaseNative(descrA));
+	}
+
+	private static native int cusparseGetMatIndexBaseNative(cusparseMatDescr descrA);
+
+	public static int cusparseCreateCsr(cusparseSpMatDescr spMatDescr, long rows, long cols, long nnz,
+		Pointer csrRowOffsets, Pointer csrColInd, Pointer csrValues, int csrRowOffsetsType, int csrColIndType,
+		int idxBase, int valueType) {
+		return checkCusparseStatus(
+			cusparseCreateCsrNative(spMatDescr, rows, cols, nnz, csrRowOffsets, csrColInd, csrValues, csrRowOffsetsType,
+				csrColIndType, idxBase, valueType));
+	}
+
+	private static native int cusparseCreateCsrNative(cusparseSpMatDescr spMatDescr, long rows, long cols, long nnz,
+		Pointer csrRowOffsets, Pointer csrColInd, Pointer csrValues, int csrRowOffsetsType, int csrColIndType,
+		int idxBase, int valueType);
+
+	public static int cusparseCreateDnVec(cusparseDnVecDescr dnVecDescr, long size, Pointer values, int valueType) {
+		return checkCusparseStatus(cusparseCreateDnVecNative(dnVecDescr, size, values, valueType));
+	}
+
+	private static native int cusparseCreateDnVecNative(cusparseDnVecDescr dnVecDescr, long size, Pointer values,
+		int valueType);
+
+	public static int cusparseSpMV_bufferSize(cusparseHandle handle, int opA, Pointer alpha,
+		cusparseConstSpMatDescr matA, cusparseConstDnVecDescr vecX, Pointer beta, cusparseDnVecDescr vecY,
+		int computeType, int alg, long[] bufferSize) {
+		return checkCusparseStatus(
+			cusparseSpMV_bufferSizeNative(handle, opA, alpha, matA, vecX, beta, vecY, computeType, alg, bufferSize));
+	}
+
+	private static native int cusparseSpMV_bufferSizeNative(cusparseHandle handle, int opA, Pointer alpha,
+		cusparseConstSpMatDescr matA, cusparseConstDnVecDescr vecX, Pointer beta, cusparseDnVecDescr vecY,
+		int computeType, int alg, long[] bufferSize);
+
+	public static int cusparseSpMV(cusparseHandle handle, int opA, Pointer alpha, cusparseConstSpMatDescr matA,
+		cusparseConstDnVecDescr vecX, Pointer beta, cusparseDnVecDescr vecY, int computeType, int alg,
+		Pointer externalBuffer) {
+		return checkCusparseStatus(
+			cusparseSpMVNative(handle, opA, alpha, matA, vecX, beta, vecY, computeType, alg, externalBuffer));
+	}
+
+	private static native int cusparseSpMVNative(cusparseHandle handle, int opA, Pointer alpha,
+		cusparseConstSpMatDescr matA, cusparseConstDnVecDescr vecX, Pointer beta, cusparseDnVecDescr vecY,
+		int computeType, int alg, Pointer externalBuffer);
+
+	public static int cusparseDestroy(cusparseHandle handle) {
+		return checkCusparseStatus(cusparseDestroyNative(handle));
+	}
+
+	private static native int cusparseDestroyNative(cusparseHandle handle);
+
+	public static int cusparseDestroyDnVec(cusparseConstDnVecDescr dnVecDescr) {
+		return checkCusparseStatus(cusparseDestroyDnVecNative(dnVecDescr));
+	}
+
+	private static native int cusparseDestroyDnVecNative(cusparseConstDnVecDescr dnVecDescr);
+
+	public static int cusparseDestroyDnMat(cusparseConstDnMatDescr dnMatDescr) {
+		return checkCusparseStatus(cusparseDestroyDnMatNative(dnMatDescr));
+	}
+
+	private static native int cusparseDestroyDnMatNative(cusparseConstDnMatDescr dnMatDescr);
+
+	public static int cusparseDestroySpMat(cusparseConstSpMatDescr spMatDescr) {
+		return checkCusparseStatus(cusparseDestroySpMatNative(spMatDescr));
+	}
+
+	private static native int cusparseDestroySpMatNative(cusparseConstSpMatDescr spMatDescr);
+
+	public static int cusparseSpMM(cusparseHandle handle, int opA, int opB, Pointer alpha, cusparseConstSpMatDescr matA,
+		cusparseConstDnMatDescr matB, Pointer beta, cusparseDnMatDescr matC, int computeType, int alg,
+		Pointer externalBuffer) {
+		return checkCusparseStatus(
+			cusparseSpMMNative(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, alg, externalBuffer));
+	}
+
+	private static native int cusparseSpMMNative(cusparseHandle handle, int opA, int opB, Pointer alpha,
+		cusparseConstSpMatDescr matA, cusparseConstDnMatDescr matB, Pointer beta, cusparseDnMatDescr matC,
+		int computeType, int alg, Pointer externalBuffer);
+
+	public static int cusparseSpMM_bufferSize(cusparseHandle handle, int opA, int opB, Pointer alpha,
+		cusparseConstSpMatDescr matA, cusparseConstDnMatDescr matB, Pointer beta, cusparseDnMatDescr matC,
+		int computeType, int alg, long[] bufferSize) {
+		return checkCusparseStatus(
+			cusparseSpMM_bufferSizeNative(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, alg,
+				bufferSize));
+	}
+
+	private static native int cusparseSpMM_bufferSizeNative(cusparseHandle handle, int opA, int opB, Pointer alpha,
+		cusparseConstSpMatDescr matA, cusparseConstDnMatDescr matB, Pointer beta, cusparseDnMatDescr matC,
+		int computeType, int alg, long[] bufferSize);
+
+	public static int cusparseCreateDnMat(cusparseDnMatDescr dnMatDescr, long rows, long cols, long ld, Pointer values,
+		int valueType, int order) {
+		return checkCusparseStatus(cusparseCreateDnMatNative(dnMatDescr, rows, cols, ld, values, valueType, order));
+	}
+
+	private static native int cusparseCreateDnMatNative(cusparseDnMatDescr dnMatDescr, long rows, long cols, long ld,
+		Pointer values, int valueType, int order);
+
+	public static int cusparseCsrSetPointers(cusparseSpMatDescr spMatDescr, Pointer csrRowOffsets, Pointer csrColInd,
+		Pointer csrValues) {
+		return checkCusparseStatus(cusparseCsrSetPointersNative(spMatDescr, csrRowOffsets, csrColInd, csrValues));
+	}
+
+	private static native int cusparseCsrSetPointersNative(cusparseSpMatDescr spMatDescr, Pointer csrRowOffsets,
+		Pointer csrColInd, Pointer csrValues);
+
+	public static int cusparseCsr2cscEx2(cusparseHandle handle, int m, int n, int nnz, Pointer csrVal,
+		Pointer csrRowPtr, Pointer csrColInd, Pointer cscVal, Pointer cscColPtr, Pointer cscRowInd, int valType,
+		int copyValues, int idxBase, int alg, Pointer buffer) {
+		return checkCusparseStatus(
+			cusparseCsr2cscEx2Native(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, cscVal, cscColPtr, cscRowInd,
+				valType, copyValues, idxBase, alg, buffer));
+	}
+
+	private static native int cusparseCsr2cscEx2Native(cusparseHandle handle, int m, int n, int nnz, Pointer csrVal,
+		Pointer csrRowPtr, Pointer csrColInd, Pointer cscVal, Pointer cscColPtr, Pointer cscRowInd, int valType,
+		int copyValues, int idxBase, int alg, Pointer buffer);
+
+	public static int cusparseCsr2cscEx2_bufferSize(cusparseHandle handle, int m, int n, int nnz, Pointer csrVal,
+		Pointer csrRowPtr, Pointer csrColInd, Pointer cscVal, Pointer cscColPtr, Pointer cscRowInd, int valType,
+		int copyValues, int idxBase, int alg, long[] bufferSize) {
+		return checkCusparseStatus(
+			cusparseCsr2cscEx2_bufferSizeNative(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, cscVal, cscColPtr,
+				cscRowInd, valType, copyValues, idxBase, alg, bufferSize));
+	}
+
+	private static native int cusparseCsr2cscEx2_bufferSizeNative(cusparseHandle handle, int m, int n, int nnz,
+		Pointer csrVal, Pointer csrRowPtr, Pointer csrColInd, Pointer cscVal, Pointer cscColPtr, Pointer cscRowInd,
+		int valType, int copyValues, int idxBase, int alg, long[] bufferSize);
+
+	public static int cusparseDcsrgeam2(cusparseHandle handle, int m, int n, Pointer alpha, cusparseMatDescr descrA,
+		int nnzA, Pointer csrSortedValA, Pointer csrSortedRowPtrA, Pointer csrSortedColIndA, Pointer beta,
+		cusparseMatDescr descrB, int nnzB, Pointer csrSortedValB, Pointer csrSortedRowPtrB, Pointer csrSortedColIndB,
+		cusparseMatDescr descrC, Pointer csrSortedValC, Pointer csrSortedRowPtrC, Pointer csrSortedColIndC,
+		Pointer pBuffer) {
+		return checkCusparseStatus(
+			cusparseDcsrgeam2Native(handle, m, n, alpha, descrA, nnzA, csrSortedValA, csrSortedRowPtrA,
+				csrSortedColIndA, beta, descrB, nnzB, csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC,
+				csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer));
+	}
+
+	private static native int cusparseDcsrgeam2Native(cusparseHandle handle, int m, int n, Pointer alpha,
+		cusparseMatDescr descrA, int nnzA, Pointer csrSortedValA, Pointer csrSortedRowPtrA, Pointer csrSortedColIndA,
+		Pointer beta, cusparseMatDescr descrB, int nnzB, Pointer csrSortedValB, Pointer csrSortedRowPtrB,
+		Pointer csrSortedColIndB, cusparseMatDescr descrC, Pointer csrSortedValC, Pointer csrSortedRowPtrC,
+		Pointer csrSortedColIndC, Pointer pBuffer);
+
+	public static int cusparseDcsrgeam2_bufferSizeExt(cusparseHandle handle, int m, int n, Pointer alpha,
+		cusparseMatDescr descrA, int nnzA, Pointer csrSortedValA, Pointer csrSortedRowPtrA, Pointer csrSortedColIndA,
+		Pointer beta, cusparseMatDescr descrB, int nnzB, Pointer csrSortedValB, Pointer csrSortedRowPtrB,
+		Pointer csrSortedColIndB, cusparseMatDescr descrC, Pointer csrSortedValC, Pointer csrSortedRowPtrC,
+		Pointer csrSortedColIndC, long[] pBufferSizeInBytes) {
+		return checkCusparseStatus(
+			cusparseDcsrgeam2_bufferSizeExtNative(handle, m, n, alpha, descrA, nnzA, csrSortedValA, csrSortedRowPtrA,
+				csrSortedColIndA, beta, descrB, nnzB, csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC,
+				csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes));
+	}
+
+	private static native int cusparseDcsrgeam2_bufferSizeExtNative(cusparseHandle handle, int m, int n, Pointer alpha,
+		cusparseMatDescr descrA, int nnzA, Pointer csrSortedValA, Pointer csrSortedRowPtrA, Pointer csrSortedColIndA,
+		Pointer beta, cusparseMatDescr descrB, int nnzB, Pointer csrSortedValB, Pointer csrSortedRowPtrB,
+		Pointer csrSortedColIndB, cusparseMatDescr descrC, Pointer csrSortedValC, Pointer csrSortedRowPtrC,
+		Pointer csrSortedColIndC, long[] pBufferSizeInBytes);
+
+	public static int cusparseSparseToDense(cusparseHandle handle, cusparseConstSpMatDescr matA,
+		cusparseDnMatDescr matB, int alg, Pointer externalBuffer) {
+		return checkCusparseStatus(cusparseSparseToDenseNative(handle, matA, matB, alg, externalBuffer));
+	}
+
+	private static native int cusparseSparseToDenseNative(cusparseHandle handle, cusparseConstSpMatDescr matA,
+		cusparseDnMatDescr matB, int alg, Pointer externalBuffer);
+
+	public static int cusparseSparseToDense_bufferSize(cusparseHandle handle, cusparseConstSpMatDescr matA,
+		cusparseDnMatDescr matB, int alg, long[] bufferSize) {
+		return checkCusparseStatus(cusparseSparseToDense_bufferSizeNative(handle, matA, matB, alg, bufferSize));
+	}
+
+	private static native int cusparseSparseToDense_bufferSizeNative(cusparseHandle handle,
+		cusparseConstSpMatDescr matA, cusparseDnMatDescr matB, int alg, long[] bufferSize);
+
+	public static int cusparseDenseToSparse_bufferSize(cusparseHandle handle, cusparseConstDnMatDescr matA,
+		cusparseSpMatDescr matB, int alg, long[] bufferSize) {
+		return checkCusparseStatus(cusparseDenseToSparse_bufferSizeNative(handle, matA, matB, alg, bufferSize));
+	}
+
+	private static native int cusparseDenseToSparse_bufferSizeNative(cusparseHandle handle,
+		cusparseConstDnMatDescr matA, cusparseSpMatDescr matB, int alg, long[] bufferSize);
+
+	public static int cusparseDenseToSparse_analysis(cusparseHandle handle, cusparseConstDnMatDescr matA,
+		cusparseSpMatDescr matB, int alg, Pointer externalBuffer) {
+		return checkCusparseStatus(cusparseDenseToSparse_analysisNative(handle, matA, matB, alg, externalBuffer));
+	}
+
+	private static native int cusparseDenseToSparse_analysisNative(cusparseHandle handle, cusparseConstDnMatDescr matA,
+		cusparseSpMatDescr matB, int alg, Pointer externalBuffer);
+
+	public static int cusparseDenseToSparse_convert(cusparseHandle handle, cusparseConstDnMatDescr matA,
+		cusparseSpMatDescr matB, int alg, Pointer externalBuffer) {
+		return checkCusparseStatus(cusparseDenseToSparse_convertNative(handle, matA, matB, alg, externalBuffer));
+	}
+
+	private static native int cusparseDenseToSparse_convertNative(cusparseHandle handle, cusparseConstDnMatDescr matA,
+		cusparseSpMatDescr matB, int alg, Pointer externalBuffer);
+
+	public static int cusparseDnnz(cusparseHandle handle, int dirA, int m, int n, cusparseMatDescr descrA, Pointer A,
+		int lda, Pointer nnzPerRowCol, Pointer nnzTotalDevHostPtr) {
+		return checkCusparseStatus(
+			cusparseDnnzNative(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, nnzTotalDevHostPtr));
+	}
+
+	private static native int cusparseDnnzNative(cusparseHandle handle, int dirA, int m, int n, cusparseMatDescr descrA,
+		Pointer A, int lda, Pointer nnzPerRowCol, Pointer nnzTotalDevHostPtr);
+
+	public static int cusparseSetMatType(cusparseMatDescr descrA, int type) {
+		return checkCusparseStatus(cusparseSetMatTypeNative(descrA, type));
+	}
+
+	private static native int cusparseSetMatTypeNative(cusparseMatDescr descrA, int type);
+
+	public static int cusparseSetMatIndexBase(cusparseMatDescr descrA, int base) {
+		return checkCusparseStatus(cusparseSetMatIndexBaseNative(descrA, base));
+	}
+
+	private static native int cusparseSetMatIndexBaseNative(cusparseMatDescr descrA, int base);
+
+	public static int cusparseSetPointerMode(cusparseHandle handle, int mode) {
+		return checkCusparseStatus(cusparseSetPointerModeNative(handle, mode));
+	}
+
+	private static native int cusparseSetPointerModeNative(cusparseHandle handle, int mode);
+
+	public static int cusparseXcsrgeam2Nnz(cusparseHandle handle, int m, int n, cusparseMatDescr descrA, int nnzA,
+		Pointer csrSortedRowPtrA, Pointer csrSortedColIndA, cusparseMatDescr descrB, int nnzB, Pointer csrSortedRowPtrB,
+		Pointer csrSortedColIndB, cusparseMatDescr descrC, Pointer csrSortedRowPtrC, Pointer nnzTotalDevHostPtr,
+		Pointer workspace) {
+		return checkCusparseStatus(
+			cusparseXcsrgeam2NnzNative(handle, m, n, descrA, nnzA, csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB,
+				csrSortedRowPtrB, csrSortedColIndB, descrC, csrSortedRowPtrC, nnzTotalDevHostPtr, workspace));
+	}
+
+	private static native int cusparseXcsrgeam2NnzNative(cusparseHandle handle, int m, int n, cusparseMatDescr descrA,
+		int nnzA, Pointer csrSortedRowPtrA, Pointer csrSortedColIndA, cusparseMatDescr descrB, int nnzB,
+		Pointer csrSortedRowPtrB, Pointer csrSortedColIndB, cusparseMatDescr descrC, Pointer csrSortedRowPtrC,
+		Pointer nnzTotalDevHostPtr, Pointer workspace);
+
+	public static int cusparseSpGEMM_workEstimation(cusparseHandle handle, int opA, int opB, Pointer alpha,
+		cusparseConstSpMatDescr matA, cusparseConstSpMatDescr matB, Pointer beta, cusparseSpMatDescr matC,
+		int computeType, int alg, cusparseSpGEMMDescr spgemmDescr, long[] bufferSize1, Pointer externalBuffer1) {
+		return checkCusparseStatus(
+			cusparseSpGEMM_workEstimationNative(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, alg,
+				spgemmDescr, bufferSize1, externalBuffer1));
+	}
+
+	private static native int cusparseSpGEMM_workEstimationNative(cusparseHandle handle, int opA, int opB,
+		Pointer alpha, cusparseConstSpMatDescr matA, cusparseConstSpMatDescr matB, Pointer beta,
+		cusparseSpMatDescr matC, int computeType, int alg, cusparseSpGEMMDescr spgemmDescr, long[] bufferSize1,
+		Pointer externalBuffer1);
+
+	public static int cusparseSpGEMM_compute(cusparseHandle handle, int opA, int opB, Pointer alpha,
+		cusparseConstSpMatDescr matA, cusparseConstSpMatDescr matB, Pointer beta, cusparseSpMatDescr matC,
+		int computeType, int alg, cusparseSpGEMMDescr spgemmDescr, long[] bufferSize2, Pointer externalBuffer2) {
+		return checkCusparseStatus(
+			cusparseSpGEMM_computeNative(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, alg, spgemmDescr,
+				bufferSize2, externalBuffer2));
+	}
+
+	private static native int cusparseSpGEMM_computeNative(cusparseHandle handle, int opA, int opB, Pointer alpha,
+		cusparseConstSpMatDescr matA, cusparseConstSpMatDescr matB, Pointer beta, cusparseSpMatDescr matC,
+		int computeType, int alg, cusparseSpGEMMDescr spgemmDescr, long[] bufferSize2, Pointer externalBuffer2);
+
+	public static int cusparseSpMatGetSize(cusparseConstSpMatDescr spMatDescr, long[] rows, long[] cols, long[] nnz) {
+		return checkCusparseStatus(cusparseSpMatGetSizeNative(spMatDescr, rows, cols, nnz));
+	}
+
+	private static native int cusparseSpMatGetSizeNative(cusparseConstSpMatDescr spMatDescr, long[] rows, long[] cols,
+		long[] nnz);
+
+	public static int cusparseXcsrsort(cusparseHandle handle, int m, int n, int nnz, cusparseMatDescr descrA,
+		Pointer csrRowPtrA, Pointer csrColIndA, Pointer P, Pointer pBuffer) {
+		return checkCusparseStatus(
+			cusparseXcsrsortNative(handle, m, n, nnz, descrA, csrRowPtrA, csrColIndA, P, pBuffer));
+	}
+
+	private static native int cusparseXcsrsortNative(cusparseHandle handle, int m, int n, int nnz,
+		cusparseMatDescr descrA, Pointer csrRowPtrA, Pointer csrColIndA, Pointer P, Pointer pBuffer);
+
+	public static int cusparseXcsrsort_bufferSizeExt(cusparseHandle handle, int m, int n, int nnz, Pointer csrRowPtrA,
+		Pointer csrColIndA, long[] pBufferSizeInBytes) {
+		return checkCusparseStatus(
+			cusparseXcsrsort_bufferSizeExtNative(handle, m, n, nnz, csrRowPtrA, csrColIndA, pBufferSizeInBytes));
+	}
+
+	private static native int cusparseXcsrsort_bufferSizeExtNative(cusparseHandle handle, int m, int n, int nnz,
+		Pointer csrRowPtrA, Pointer csrColIndA, long[] pBufferSizeInBytes);
+
+	public static int cusparseCreate(cusparseHandle handle) {
+		return checkCusparseStatus(cusparseCreateNative(handle));
+	}
+
+	private static native int cusparseCreateNative(cusparseHandle handle);
+
+	public static int cusparseCreateIdentityPermutation(cusparseHandle handle, int n, Pointer p) {
+		return checkCusparseStatus(cusparseCreateIdentityPermutationNative(handle, n, p));
+	}
+
+	private static native int cusparseCreateIdentityPermutationNative(cusparseHandle handle, int n, Pointer p);
+
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseAction.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseAction.java
new file mode 100644
index 00000000000..4ac0d81b9b9
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseAction.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+public class cusparseAction {
+
+	public static final int CUSPARSE_ACTION_SYMBOLIC = 0;
+
+	public static final int CUSPARSE_ACTION_NUMERIC = 1;
+
+
+	private cusparseAction() {
+		// Private constructor to prevent instantiation
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseConstDnMatDescr.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseConstDnMatDescr.java
new file mode 100644
index 00000000000..8759e915a01
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseConstDnMatDescr.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+import org.apache.sysds.cujava.NativePointerObject;
+
+public class cusparseConstDnMatDescr extends NativePointerObject {
+
+	public cusparseConstDnMatDescr() {
+		// Default constructor
+	}
+
+	cusparseConstDnMatDescr(cusparseDnMatDescr other) {
+		super(other);
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseConstDnVecDescr.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseConstDnVecDescr.java
new file mode 100644
index 00000000000..a11667a8474
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseConstDnVecDescr.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+import org.apache.sysds.cujava.NativePointerObject;
+
+public class cusparseConstDnVecDescr extends NativePointerObject {
+
+	public cusparseConstDnVecDescr() {
+		// Default constructor
+	}
+
+	cusparseConstDnVecDescr(cusparseDnVecDescr other) {
+		super(other);
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseConstSpMatDescr.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseConstSpMatDescr.java
new file mode 100644
index 00000000000..ce1309d1b8d
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseConstSpMatDescr.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+import org.apache.sysds.cujava.NativePointerObject;
+
+public class cusparseConstSpMatDescr extends NativePointerObject {
+
+	public cusparseConstSpMatDescr() {
+		// Default constructor
+	}
+
+	cusparseConstSpMatDescr(cusparseSpMatDescr other) {
+		super(other);
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseCsr2CscAlg.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseCsr2CscAlg.java
new file mode 100644
index 00000000000..3eba805e54f
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseCsr2CscAlg.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+public class cusparseCsr2CscAlg {
+
+	public static final int CUSPARSE_CSR2CSC_ALG_DEFAULT = 1;
+	public static final int CUSPARSE_CSR2CSC_ALG1 = 1;
+
+	private cusparseCsr2CscAlg() {
+		// Private constructor to prevent instantiation
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDenseToSparseAlg.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDenseToSparseAlg.java
new file mode 100644
index 00000000000..6324cee2601
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDenseToSparseAlg.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+public class cusparseDenseToSparseAlg {
+
+	public static final int CUSPARSE_DENSETOSPARSE_ALG_DEFAULT = 0;
+
+	private cusparseDenseToSparseAlg() {
+		// Private constructor to prevent instantiation
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDiagType.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDiagType.java
new file mode 100644
index 00000000000..3d451bcb295
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDiagType.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+public class cusparseDiagType {
+
+	public static final int CUSPARSE_DIAG_TYPE_NON_UNIT = 0;
+
+	public static final int CUSPARSE_DIAG_TYPE_UNIT = 1;
+
+	private cusparseDiagType() {
+		// Private constructor to prevent instantiation
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDirection.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDirection.java
new file mode 100644
index 00000000000..443422c78cd
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDirection.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+public class cusparseDirection {
+
+	public static final int CUSPARSE_DIRECTION_ROW = 0;
+
+	public static final int CUSPARSE_DIRECTION_COLUMN = 1;
+
+	private cusparseDirection() {
+		// Private constructor to prevent instantiation
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDnMatDescr.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDnMatDescr.java
new file mode 100644
index 00000000000..dbcf1326c0c
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDnMatDescr.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+import org.apache.sysds.cujava.NativePointerObject;
+
+public class cusparseDnMatDescr extends NativePointerObject {
+
+	public cusparseDnMatDescr() {
+		// Default constructor
+	}
+
+	public cusparseConstDnMatDescr asConst() {
+		return new cusparseConstDnMatDescr(this);
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDnVecDescr.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDnVecDescr.java
new file mode 100644
index 00000000000..4973ab04c49
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseDnVecDescr.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+import org.apache.sysds.cujava.NativePointerObject;
+
+public class cusparseDnVecDescr extends NativePointerObject {
+
+	public cusparseDnVecDescr() {
+		// Default constructor
+	}
+
+	public cusparseConstDnVecDescr asConst() {
+		return new cusparseConstDnVecDescr(this);
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseFillMode.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseFillMode.java
new file mode 100644
index 00000000000..45be77d6551
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseFillMode.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+public class cusparseFillMode {
+
+	public static final int CUSPARSE_FILL_MODE_LOWER = 0;
+
+	public static final int CUSPARSE_FILL_MODE_UPPER = 1;
+
+	private cusparseFillMode() {
+		// Private constructor to prevent instantiation
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseHandle.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseHandle.java
new file mode 100644
index 00000000000..7289333e194
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseHandle.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+import org.apache.sysds.cujava.NativePointerObject;
+
+public class cusparseHandle extends NativePointerObject {
+
+	public cusparseHandle() {
+		// Default constructor
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseIndexBase.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseIndexBase.java
new file mode 100644
index 00000000000..bb7622bf2eb
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseIndexBase.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+public class cusparseIndexBase {
+
+	public static final int CUSPARSE_INDEX_BASE_ZERO = 0;
+
+	public static final int CUSPARSE_INDEX_BASE_ONE = 1;
+
+	private cusparseIndexBase() {
+		// Private constructor to prevent instantiation
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseIndexType.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseIndexType.java
new file mode 100644
index 00000000000..59e5dbd6060
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseIndexType.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+public class cusparseIndexType {
+
+	public static final int CUSPARSE_INDEX_16U = 1;
+
+	public static final int CUSPARSE_INDEX_32I = 2;
+
+	public static final int CUSPARSE_INDEX_64I = 3;
+
+	private cusparseIndexType() {
+		// Private constructor to prevent instantiation
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseMatDescr.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseMatDescr.java
new file mode 100644
index 00000000000..ef8dd2e90e7
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseMatDescr.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+import org.apache.sysds.cujava.NativePointerObject;
+
+public class cusparseMatDescr extends NativePointerObject {
+
+	public cusparseMatDescr() {
+		// Default constructor
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseMatrixType.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseMatrixType.java
new file mode 100644
index 00000000000..4f71f78be2a
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseMatrixType.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+public class cusparseMatrixType {
+
+	public static final int CUSPARSE_MATRIX_TYPE_GENERAL = 0;
+
+	public static final int CUSPARSE_MATRIX_TYPE_SYMMETRIC = 1;
+
+	public static final int CUSPARSE_MATRIX_TYPE_HERMITIAN = 2;
+
+	public static final int CUSPARSE_MATRIX_TYPE_TRIANGULAR = 3;
+
+	private cusparseMatrixType() {
+		// Private constructor to prevent instantiation
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseOperation.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseOperation.java
new file mode 100644
index 00000000000..c56167060b8
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseOperation.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+public class cusparseOperation {
+
+	public static final int CUSPARSE_OPERATION_NON_TRANSPOSE = 0;
+
+	public static final int CUSPARSE_OPERATION_TRANSPOSE = 1;
+
+	public static final int CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE = 2;
+
+	private cusparseOperation() {
+		// Private constructor to prevent instantiation
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseOrder.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseOrder.java
new file mode 100644
index 00000000000..6efedd2cbad
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseOrder.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+public class cusparseOrder {
+
+	public static final int CUSPARSE_ORDER_COL = 1;
+
+	public static final int CUSPARSE_ORDER_ROW = 2;
+
+	private cusparseOrder() {
+		// Private constructor to prevent instantiation
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparsePointerMode.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparsePointerMode.java
new file mode 100644
index 00000000000..80f65a3937c
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparsePointerMode.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+public class cusparsePointerMode {
+
+	public static final int CUSPARSE_POINTER_MODE_HOST = 0;
+
+	public static final int CUSPARSE_POINTER_MODE_DEVICE = 1;
+
+	private cusparsePointerMode() {
+		// Private constructor to prevent instantiation
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpGEMMAlg.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpGEMMAlg.java
new file mode 100644
index 00000000000..3b874858892
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpGEMMAlg.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+public class cusparseSpGEMMAlg {
+
+	public static final int CUSPARSE_SPGEMM_DEFAULT = 0;
+
+	public static final int CUSPARSE_SPGEMM_CSR_ALG_DETERMINITIC = 1;
+
+	public static final int CUSPARSE_SPGEMM_CSR_ALG_NONDETERMINITIC = 2;
+
+	public static final int CUSPARSE_SPGEMM_ALG1 = 3;
+
+	public static final int CUSPARSE_SPGEMM_ALG2 = 4;
+
+	public static final int CUSPARSE_SPGEMM_ALG3 = 5;
+
+	private cusparseSpGEMMAlg() {
+		// Private constructor to prevent instantiation
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpGEMMDescr.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpGEMMDescr.java
new file mode 100644
index 00000000000..a3edd1e2ef3
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpGEMMDescr.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+import org.apache.sysds.cujava.NativePointerObject;
+
+public class cusparseSpGEMMDescr extends NativePointerObject {
+
+	public cusparseSpGEMMDescr() {
+		// Default constructor
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpMMAlg.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpMMAlg.java
new file mode 100644
index 00000000000..be51e280504
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpMMAlg.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+public class cusparseSpMMAlg {
+
+	public static final int CUSPARSE_SPMM_ALG_DEFAULT = 0;
+
+	public static final int CUSPARSE_SPMM_COO_ALG1 = 1;
+
+	public static final int CUSPARSE_SPMM_COO_ALG2 = 2;
+
+	public static final int CUSPARSE_SPMM_COO_ALG3 = 3;
+
+	public static final int CUSPARSE_SPMM_COO_ALG4 = 5;
+
+	public static final int CUSPARSE_SPMM_CSR_ALG1 = 4;
+
+	public static final int CUSPARSE_SPMM_CSR_ALG2 = 6;
+
+	public static final int CUSPARSE_SPMM_CSR_ALG3 = 12;
+
+	public static final int CUSPARSE_SPMM_BLOCKED_ELL_ALG1 = 13;
+
+	private cusparseSpMMAlg() {
+		// Private constructor to prevent instantiation
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpMVAlg.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpMVAlg.java
new file mode 100644
index 00000000000..23b6896c7e3
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpMVAlg.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+public class cusparseSpMVAlg {
+
+	public static final int CUSPARSE_SPMV_ALG_DEFAULT = 0;
+
+	public static final int CUSPARSE_SPMV_CSR_ALG1 = 2;
+
+	public static final int CUSPARSE_SPMV_CSR_ALG2 = 3;
+
+	public static final int CUSPARSE_SPMV_COO_ALG1 = 1;
+
+	public static final int CUSPARSE_SPMV_COO_ALG2 = 4;
+
+
+	private cusparseSpMVAlg() {
+		// Private constructor to prevent instantiation
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpMatDescr.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpMatDescr.java
new file mode 100644
index 00000000000..2fbfb47042e
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSpMatDescr.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+import org.apache.sysds.cujava.NativePointerObject;
+
+public class cusparseSpMatDescr extends NativePointerObject {
+
+	public cusparseSpMatDescr() {
+		// Default constructor
+	}
+
+	public cusparseConstSpMatDescr asConst() {
+		return new cusparseConstSpMatDescr(this);
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSparseToDenseAlg.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSparseToDenseAlg.java
new file mode 100644
index 00000000000..39ef312a11a
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseSparseToDenseAlg.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+public class cusparseSparseToDenseAlg {
+
+	public static final int CUSPARSE_SPARSETODENSE_ALG_DEFAULT = 0;
+
+	private cusparseSparseToDenseAlg() {
+		// Private constructor to prevent instantiation
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/cusparse/cusparseStatus.java b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseStatus.java
new file mode 100644
index 00000000000..8387557c909
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/cusparse/cusparseStatus.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Source for the numerical value:
+ * https://gitlab.com/nvidia/headers/cuda-individual/cusparse/-/blob/d4fd9303b8a5a770d11c2c60211e3f9e76410e51/cusparse.h
+ */
+
+package org.apache.sysds.cujava.cusparse;
+
+public class cusparseStatus {
+
+	public static final int CUSPARSE_STATUS_SUCCESS = 0;
+
+	public static final int CUSPARSE_STATUS_NOT_INITIALIZED = 1;
+
+	public static final int CUSPARSE_STATUS_ALLOC_FAILED = 2;
+
+	public static final int CUSPARSE_STATUS_INVALID_VALUE = 3;
+
+	public static final int CUSPARSE_STATUS_ARCH_MISMATCH = 4;
+
+	public static final int CUSPARSE_STATUS_MAPPING_ERROR = 5;
+
+	public static final int CUSPARSE_STATUS_EXECUTION_FAILED = 6;
+
+	public static final int CUSPARSE_STATUS_INTERNAL_ERROR = 7;
+
+	public static final int CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED = 8;
+
+	public static final int CUSPARSE_STATUS_ZERO_PIVOT = 9;
+
+	public static final int CUSPARSE_STATUS_NOT_SUPPORTED = 10;
+
+	public static final int CUSPARSE_STATUS_INSUFFICIENT_RESOURCES = 11;
+
+	public static String statusString(int err) {
+		return switch(err) {
+			case CUSPARSE_STATUS_SUCCESS -> "CUSPARSE_STATUS_SUCCESS";
+			case CUSPARSE_STATUS_NOT_INITIALIZED -> "CUSPARSE_STATUS_NOT_INITIALIZED";
+			case CUSPARSE_STATUS_ALLOC_FAILED -> "CUSPARSE_STATUS_ALLOC_FAILED";
+			case CUSPARSE_STATUS_INVALID_VALUE -> "CUSPARSE_STATUS_INVALID_VALUE";
+			case CUSPARSE_STATUS_ARCH_MISMATCH -> "CUSPARSE_STATUS_ARCH_MISMATCH";
+			case CUSPARSE_STATUS_MAPPING_ERROR -> "CUSPARSE_STATUS_MAPPING_ERROR";
+			case CUSPARSE_STATUS_EXECUTION_FAILED -> "CUSPARSE_STATUS_EXECUTION_FAILED";
+			case CUSPARSE_STATUS_INTERNAL_ERROR -> "CUSPARSE_STATUS_INTERNAL_ERROR";
+			case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED -> "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
+			case CUSPARSE_STATUS_ZERO_PIVOT -> "CUSPARSE_STATUS_ZERO_PIVOT";
+			case CUSPARSE_STATUS_NOT_SUPPORTED -> "CUSPARSE_STATUS_NOT_SUPPORTED";
+			case CUSPARSE_STATUS_INSUFFICIENT_RESOURCES -> "CUSPARSE_STATUS_INSUFFICIENT_RESOURCES";
+			default -> "Invalid error";
+		};
+	}
+
+}
diff --git a/src/main/java/org/apache/sysds/cujava/driver/CUcontext.java b/src/main/java/org/apache/sysds/cujava/driver/CUcontext.java
new file mode 100644
index 00000000000..188d1908056
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/driver/CUcontext.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.driver;
+
+import org.apache.sysds.cujava.NativePointerObject;
+
+public class CUcontext extends NativePointerObject {
+
+	public CUcontext() {
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/driver/CUdevice.java b/src/main/java/org/apache/sysds/cujava/driver/CUdevice.java
new file mode 100644
index 00000000000..86b027717ef
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/driver/CUdevice.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.driver;
+
+import org.apache.sysds.cujava.NativePointerObject;
+
+public class CUdevice extends NativePointerObject {
+
+	public CUdevice() {}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/driver/CUdevice_attribute.java b/src/main/java/org/apache/sysds/cujava/driver/CUdevice_attribute.java
new file mode 100644
index 00000000000..d50e014ec21
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/driver/CUdevice_attribute.java
@@ -0,0 +1,756 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.driver;
+
+/**
+ * This class is a java-side replication of CUdevice_attribute.
+ * The descriptions were directly taken from:
+ * https://docs.nvidia.com/cuda/archive/12.6.1/pdf/CUDA_Driver_API.pdf
+ */
+
+public class CUdevice_attribute {
+
+	/**
+	 * Maximum number of threads per block
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1;
+
+	/**
+	 * Maximum block dimension X
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2;
+
+	/**
+	 * Maximum block dimension Y
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3;
+
+	/**
+	 * Maximum block dimension Z
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4;
+
+	/**
+	 * Maximum grid dimension X
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5;
+
+	/**
+	 * Maximum grid dimension Y
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6;
+
+	/**
+	 * Maximum grid dimension Z
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7;
+
+	/**
+	 * Maximum shared memory available per block in bytes
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8;
+
+	/**
+	 * @deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8;
+
+	/**
+	 * Memory available on device for __constant__ variables in a CUDA C kernel in bytes
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9;
+
+	/**
+	 * Warp size in threads
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10;
+
+	/**
+	 * Maximum pitch in bytes allowed by memory copies
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11;
+
+	/**
+	 * Maximum number of 32-bit registers available per block
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12;
+
+	/**
+	 * @deprecated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12;
+
+	/**
+	 * Typical clock frequency in kilohertz
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13;
+
+	/**
+	 * Alignment requirement for textures
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14;
+
+	/**
+	 * Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use instead
+	 * 	CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT.
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15;
+
+	/**
+	 * Number of multiprocessors on device
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16;
+
+	/**
+	 * Specifies whether there is a run time limit on kernels
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17;
+
+	/**
+	 * Device is integrated with host memory
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_INTEGRATED = 18;
+
+	/**
+	 * Device can map host memory into CUDA address space
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19;
+
+	/**
+	 * Compute mode (See CUcomputemode for details)
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20;
+
+	/**
+	 * Maximum 1D texture width
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21;
+
+	/**
+	 * Maximum 2D texture width
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22;
+
+	/**
+	 * Maximum 2D texture height
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23;
+
+	/**
+	 * Maximum 3D texture width
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24;
+
+	/**
+	 * Maximum 3D texture height
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25;
+
+	/**
+	 * Maximum 3D texture depth
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26;
+
+	/**
+	 * Maximum 2D layered texture width
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27;
+
+	/**
+	 * Maximum 2D layered texture height
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28;
+
+	/**
+	 * Maximum layers in a 2D layered texture
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29;
+
+	/**
+	 * 	@deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27;
+
+	/**
+	 * @deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28;
+
+	/**
+	 * @deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29;
+
+	/**
+	 * Alignment requirement for surfaces
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30;
+
+	/**
+	 * Device can possibly execute multiple kernels concurrently
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31;
+
+	/**
+	 * Device has ECC support enabled
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32;
+
+	/**
+	 * PCI bus ID of the device
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33;
+
+	/**
+	 * PCI device ID of the device
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34;
+
+	/**
+	 * Device is using TCC driver model
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35;
+
+	/**
+	 * Peak memory clock frequency in kilohertz
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36;
+
+	/**
+	 * Global memory bus width in bits
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37;
+
+	/**
+	 * Size of L2 cache in bytes
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38;
+
+	/**
+	 * Maximum resident threads per multiprocessor
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39;
+
+	/**
+	 * Number of asynchronous engines
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40;
+
+	/**
+	 * Device shares a unified address space with the host
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41;
+
+	/**
+	 * Maximum 1D layered texture width
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42;
+
+	/**
+	 * Maximum layers in a 1D layered texture
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43;
+
+	/**
+	 * @deprecated, do not use.
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44;
+
+	/**
+	 * Maximum 2D texture width if CUDA_ARRAY3D_TEXTURE_GATHER is set
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45;
+
+	/**
+	 * Maximum 2D texture height if CUDA_ARRAY3D_TEXTURE_GATHER is set
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46;
+
+	/**
+	 * Alternate maximum 3D texture width
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47;
+
+	/**
+	 * Alternate maximum 3D texture height
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48;
+
+	/**
+	 * Alternate maximum 3D texture depth
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49;
+
+	/**
+	 * PCI domain ID of the device
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50;
+
+	/**
+	 * Pitch alignment requirement for textures
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51;
+
+	/**
+	 * Maximum cubemap texture width/height
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52;
+
+	/**
+	 * Maximum cubemap layered texture width/height
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53;
+
+	/**
+	 * Maximum layers in a cubemap layered texture
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54;
+
+	/**
+	 * Maximum 1D surface width
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55;
+
+	/**
+	 * Maximum 2D surface width
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56;
+
+	/**
+	 * Maximum 2D surface height
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57;
+
+	/**
+	 * Maximum 3D surface width
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58;
+
+	/**
+	 * Maximum 3D surface height
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59;
+
+	/**
+	 * Maximum 3D surface depth
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60;
+
+	/**
+	 * Maximum 1D layered surface width
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61;
+
+	/**
+	 * Maximum layers in a 1D layered surface
+	 */
+	public static final int	CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62;
+
+	/**
+	 * Maximum 2D layered surface width
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63;
+
+	/**
+	 * Maximum 2D layered surface height
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64;
+
+	/**
+	 * Maximum layers in a 2D layered surface
+	 */
+	public static final int	CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65;
+
+	/**
+	 * Maximum cubemap surface width
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66;
+
+	/**
+	 * Maximum cubemap layered surface width
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67;
+
+	/**
+	 * Maximum layers in a cubemap layered surface
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68;
+
+	/**
+	 * @deprecated, do not use. Use cudaDeviceGetTexture1DLinearMaxWidth() or
+	 * 	cuDeviceGetTexture1DLinearMaxWidth() instead.
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69;
+
+	/**
+	 * Maximum 2D linear texture width
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70;
+
+	/**
+	 * Maximum 2D linear texture height
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71;
+
+	/**
+	 * Maximum 2D linear texture pitch in bytes
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72;
+
+	/**
+	 * Maximum mipmapped 2D texture width
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73;
+
+	/**
+	 * Maximum mipmapped 2D texture height
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74;
+
+	/**
+	 * Major compute capability version number
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75;
+
+	/**
+	 * Minor compute capability version number
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76;
+
+	/**
+	 * Maximum mipmapped 1D texture width
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77;
+
+	/**
+	 * Device supports stream priorities
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78;
+
+	/**
+	 * Device supports caching globals in L1
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79;
+
+	/**
+	 * Device supports caching locals in L1
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80;
+
+	/**
+	 * Maximum shared memory available per multiprocessor in bytes
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81;
+
+	/**
+	 * Maximum number of 32-bit registers available per multiprocessor
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82;
+
+	/**
+	 * Device can allocate managed memory on this system
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83;
+
+	/**
+	 * Device is on a multi-GPU board
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84;
+
+	/**
+	 * Unique id for a group of devices on the same multi-GPU board
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85;
+
+	/**
+	 * 	Link between the device and the host supports native atomic operations (this is a placeholder
+	 * 	attribute, and is not supported on any current hardware)
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86;
+
+	/**
+	 * Ratio of single precision performance (in floating-point operations per second) to double precision
+	 * 	performance
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87;
+
+	/**
+	 * Device supports coherently accessing pageable memory without calling cudaHostRegister on it
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88;
+
+	/**
+	 * Device can coherently access managed memory concurrently with the CPU
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89;
+
+	/**
+	 * Device supports compute preemption.
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90;
+
+	/**
+	 * Device can access host registered memory at the same virtual address as the CPU
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91;
+
+	/**
+	 * @deprecated, along with v1 MemOps API, cuStreamBatchMemOp and related APIs are supported.
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS_V1 = 92;
+
+	/**
+	 * @deprecated, along with v1 MemOps API, 64-bit operations are supported in cuStreamBatchMemOp
+	 * and related APIs.
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS_V1 = 93;
+
+	/**
+	 * @deprecated, along with v1 MemOps API, CU_STREAM_WAIT_VALUE_NOR is supported.
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V1 = 94;
+
+	/**
+	 * Device supports launching cooperative kernels via cuLaunchCooperativeKernel
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95;
+
+	/**
+	 * @deprecated, cuLaunchCooperativeKernelMultiDevice is deprecated.
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96;
+
+	/**
+	 * Maximum optin shared memory per block
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97;
+
+	/**
+	 * 	The CU_STREAM_WAIT_VALUE_FLUSH flag and the
+	 * 	CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are supported on the device. See
+	 * 	Stream Memory Operations for additional details.
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = 98;
+
+	/**
+	 * Device supports host memory registration via cudaHostRegister.
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = 99;
+
+	/**
+	 * Device accesses pageable memory via the host's page tables.
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100;
+
+	/**
+	 * The host can directly access managed memory on the device without migration.
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101;
+
+	/**
+	 * @deprecated, Use
+	 * CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED = 102;
+
+	/**
+	 * 	Device supports virtual memory management APIs like cuMemAddressReserve, cuMemCreate,
+	 * 	cuMemMap and related APIs
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED = 102;
+
+	/**
+	 * 	Device supports exporting memory to a posix file descriptor with
+	 * 	cuMemExportToShareableHandle, if requested via cuMemCreate
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103;
+
+	/**
+	 * 	Device supports exporting memory to a Win32 NT handle with cuMemExportToShareableHandle,
+	 * 	if requested via cuMemCreate
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104;
+
+	/**
+	 * 	Device supports exporting memory to a Win32 KMT handle with
+	 * 	cuMemExportToShareableHandle, if requested via cuMemCreate
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105;
+
+	/**
+	 * Maximum number of blocks per multiprocessor
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = 106;
+
+	/**
+	 * Device supports compression of memory
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED = 107;
+
+	/**
+	 * Maximum L2 persisting lines capacity setting in bytes.
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE = 108;
+
+	/**
+	 * Maximum value of CUaccessPolicyWindow::num_bytes.
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE = 109;
+
+	/**
+	 * Device supports specifying the GPUDirect RDMA flag with cuMemCreate
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED = 110;
+
+	/**
+	 * Shared memory reserved by CUDA driver per block in bytes
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK = 111;
+
+	/**
+	 * Device supports sparse CUDA arrays and sparse CUDA mipmapped arrays
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED = 112;
+
+	/**
+	 * Device supports using the cuMemHostRegister flag CU_MEMHOSTERGISTER_READ_ONLY to
+	 * register memory that must be mapped as read-only to the GPU
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED = 113;
+
+	/**
+	 * External timeline semaphore interop is supported on the device
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED = 114;
+
+	/**
+	 * Device supports using the cuMemAllocAsync and cuMemPool family of APIs
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED = 115;
+
+	/**
+	 * Device supports GPUDirect RDMA APIs, like nvidia_p2p_get_pages
+	 * (see https://docs.nvidia.com/cuda/gpudirect-rdma for more information)
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED = 116;
+
+	/**
+	 * The returned attribute shall be interpreted as a bitmask, where the individual bits are described by
+	 * the CUflushGPUDirectRDMAWritesOptions enum
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS = 117;
+
+	/**
+	 * GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope
+	 * indicated by the returned attribute. See CUGPUDirectRDMAWritesOrdering for the numerical
+	 * values returned here.
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING = 118;
+
+	/**
+	 * Handle types supported with mempool based IPC
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES = 119;
+
+	/**
+	 * Indicates device supports cluster launch
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH = 120;
+
+	/**
+	 * Device supports deferred mapping CUDA arrays and CUDA mipmapped arrays
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED = 121;
+
+	/**
+	 * 64-bit operations are supported in cuStreamBatchMemOp and related MemOp APIs.
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = 122;
+
+	/**
+	 * CU_STREAM_WAIT_VALUE_NOR is supported by MemOp APIs.
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 123;
+
+	/**
+	 * Device supports buffer sharing with dma_buf mechanism.
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED = 124;
+
+	/**
+	 * Device supports IPC Events.
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED = 125;
+
+	/**
+	 * Number of memory domains the device supports.
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MEM_SYNC_DOMAIN_COUNT = 126;
+
+	/**
+	 * Device supports accessing memory using Tensor Map.
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_TENSOR_MAP_ACCESS_SUPPORTED = 127;
+
+	/**
+	 * Device supports exporting memory to a fabric handle with cuMemExportToShareableHandle() or
+	 * requested with cuMemCreate()
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_FABRIC_SUPPORTED = 128;
+
+	/**
+	 * Device supports unified function pointers.
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS = 129;
+
+	/**
+	 * NUMA configuration of a device: value is of type CUdeviceNumaConfig enum
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_NUMA_CONFIG = 130;
+
+	/**
+	 * NUMA node ID of the GPU memory
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_NUMA_ID = 131;
+
+	/**
+	 * Device supports switch multicast and reduction operations.
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED = 132;
+
+	/**
+	 * Indicates if contexts created on this device will be shared via MPS
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_MPS_ENABLED = 133;
+
+	/**
+	 * NUMA ID of the host node closest to the device. Returns -1 when system does not support NUMA.
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID = 134;
+
+	/**
+	 * Device supports CIG with D3D12.
+	 */
+	public static final int CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED = 135;
+
+	//CU_DEVICE_ATTRIBUTE_MAX
+
+
+}
diff --git a/src/main/java/org/apache/sysds/cujava/driver/CUdeviceptr.java b/src/main/java/org/apache/sysds/cujava/driver/CUdeviceptr.java
new file mode 100644
index 00000000000..e6299b19e68
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/driver/CUdeviceptr.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.driver;
+
+import org.apache.sysds.cujava.Pointer;
+
+public class CUdeviceptr extends Pointer {
+
+	public CUdeviceptr() {
+	}
+
+	protected CUdeviceptr(CUdeviceptr other) {
+		super(other);
+	}
+
+	protected CUdeviceptr(CUdeviceptr other, long byteOffset) {
+		super(other, byteOffset);
+	}
+
+	@Override
+	public CUdeviceptr withByteOffset(long byteOffset) {
+		return new CUdeviceptr(this, byteOffset);
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/driver/CUfunction.java b/src/main/java/org/apache/sysds/cujava/driver/CUfunction.java
new file mode 100644
index 00000000000..288b63ee097
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/driver/CUfunction.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.driver;
+
+import org.apache.sysds.cujava.NativePointerObject;
+
+public class CUfunction extends NativePointerObject {
+
+	public CUfunction() {
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/driver/CUmodule.java b/src/main/java/org/apache/sysds/cujava/driver/CUmodule.java
new file mode 100644
index 00000000000..a7481e2960d
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/driver/CUmodule.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.driver;
+
+import org.apache.sysds.cujava.NativePointerObject;
+
+public class CUmodule extends NativePointerObject {
+
+	public CUmodule() {
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/driver/CUresult.java b/src/main/java/org/apache/sysds/cujava/driver/CUresult.java
new file mode 100644
index 00000000000..2814b771e68
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/driver/CUresult.java
@@ -0,0 +1,721 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.driver;
+
+/**
+ * The descriptions are directly taken from:
+ * https://docs.nvidia.com/cuda/archive/12.6.1/pdf/CUDA_Driver_API.pdf
+ */
+
+public class CUresult {
+
+
+	/**
+	 * The API call returned with no errors. In the case of query calls, this also means that the operation
+	 * being queried is complete (see cuEventQuery() and cuStreamQuery()).
+	 */
+	public static final int CUDA_SUCCESS = 0;
+
+	/**
+	 * This indicates that one or more of the parameters passed to the API call is not within an acceptable
+	 * range of values.
+	 */
+	public static final int CUDA_ERROR_INVALID_VALUE = 1;
+
+	/**
+	 * The API call failed because it was unable to allocate enough memory or other resources to perform
+	 * the requested operation.
+	 */
+	public static final int CUDA_ERROR_OUT_OF_MEMORY = 2;
+
+	/**
+	 * This indicates that the CUDA driver has not been initialized with cuInit() or that initialization has
+	 * failed.
+	 */
+	public static final int CUDA_ERROR_NOT_INITIALIZED = 3;
+
+	/**
+	 * This indicates that the CUDA driver is in the process of shutting down.
+	 */
+	public static final int CUDA_ERROR_DEINITIALIZED = 4;
+
+	/**
+	 * This indicates profiler is not initialized for this run. This can happen when the application is running
+	 * with external profiling tools like visual profiler.
+	 */
+	public static final int CUDA_ERROR_PROFILER_DISABLED = 5;
+
+	/**
+	 * @deprecated
+	 * This error return is deprecated as of CUDA 5.0. It is no longer an error to attempt to
+	 * enable/disable the profiling via cuProfilerStart or cuProfilerStop without initialization.
+	 */
+	public static final int CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6;
+
+	/**
+	 * @deprecated
+	 * This error return is deprecated as of CUDA 5.0. It is no longer an error to call
+	 * cuProfilerStart() when profiling is already enabled.
+	 */
+	public static final int CUDA_ERROR_PROFILER_ALREADY_STARTED = 7;
+
+	/**
+	 * @deprecated
+	 * This error return is deprecated as of CUDA 5.0. It is no longer an error to call
+	 * cuProfilerStop() when profiling is already disabled.
+	 */
+	public static final int CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8;
+
+	/**
+	 * This indicates that the CUDA driver that the application has loaded is a stub library. Applications
+	 * that run with the stub rather than a real driver loaded will result in CUDA API returning this error.
+	 */
+	public static final int CUDA_ERROR_STUB_LIBRARY = 34;
+
+	/**
+	 * This indicates that requested CUDA device is unavailable at the current time. Devices
+	 * are often unavailable due to use of CU_COMPUTEMODE_EXCLUSIVE_PROCESS or
+	 * CU_COMPUTEMODE_PROHIBITED.
+	 */
+	public static final int CUDA_ERROR_DEVICE_UNAVAILABLE = 46;
+
+	/**
+	 * This indicates that no CUDA-capable devices were detected by the installed CUDA driver.
+	 */
+	public static final int CUDA_ERROR_NO_DEVICE = 100;
+
+	/**
+	 * This indicates that the device ordinal supplied by the user does not correspond to a valid CUDA
+	 * device or that the action requested is invalid for the specified device.
+	 */
+	public static final int CUDA_ERROR_INVALID_DEVICE = 101;
+
+	/**
+	 * This error indicates that the Grid license is not applied.
+	 */
+	public static final int CUDA_ERROR_DEVICE_NOT_LICENSED = 102;
+
+	/**
+	 * This indicates that the device kernel image is invalid. This can also indicate an invalid CUDA
+	 * module.
+	 */
+	public static final int CUDA_ERROR_INVALID_IMAGE = 200;
+
+	/**
+	 * This most frequently indicates that there is no context bound to the current thread. This can also
+	 * be returned if the context passed to an API call is not a valid handle (such as a context that has had
+	 * cuCtxDestroy() invoked on it). This can also be returned if a user mixes different API versions
+	 * (i.e. 3010 context with 3020 API calls). See cuCtxGetApiVersion() for more details. This can also
+	 * be returned if the green context passed to an API call was not converted to a CUcontext using
+	 * cuCtxFromGreenCtx API.
+	 */
+	public static final int CUDA_ERROR_INVALID_CONTEXT = 201;
+
+	/**
+	 * This indicated that the context being supplied as a parameter to the API call was already the active
+	 * context. Deprecated This error return is deprecated as of CUDA 3.2. It is no longer an error to
+	 * attempt to push the active context via cuCtxPushCurrent().
+	 */
+	public static final int CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202;
+
+	/**
+	 * This indicates that a map or register operation has failed.
+	 */
+	public static final int CUDA_ERROR_MAP_FAILED = 205;
+
+	/**
+	 * This indicates that an unmap or unregister operation has failed.
+	 */
+	public static final int CUDA_ERROR_UNMAP_FAILED = 206;
+
+	/**
+	 * This indicates that the specified array is currently mapped and thus cannot be destroyed.
+	 */
+	public static final int CUDA_ERROR_ARRAY_IS_MAPPED = 207;
+
+	/**
+	 * This indicates that the resource is already mapped.
+	 */
+	public static final int CUDA_ERROR_ALREADY_MAPPED = 208;
+
+	/**
+	 * This indicates that there is no kernel image available that is suitable for the device. This can occur
+	 * when a user specifies code generation options for a particular CUDA source file that do not include
+	 * the corresponding device configuration.
+	 */
+	public static final int CUDA_ERROR_NO_BINARY_FOR_GPU = 209;
+
+	/**
+	 * This indicates that a resource has already been acquired.
+	 */
+	public static final int CUDA_ERROR_ALREADY_ACQUIRED = 210;
+
+	/**
+	 * This indicates that a resource is not mapped.
+	 */
+	public static final int CUDA_ERROR_NOT_MAPPED = 211;
+
+	/**
+	 * This indicates that a mapped resource is not available for access as an array.
+	 */
+	public static final int CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212;
+
+	/**
+	 * This indicates that a mapped resource is not available for access as a pointer.
+	 */
+	public static final int CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213;
+
+	/**
+	 * This indicates that an uncorrectable ECC error was detected during execution.
+	 */
+	public static final int CUDA_ERROR_ECC_UNCORRECTABLE = 214;
+
+	/**
+	 * This indicates that the CUlimit passed to the API call is not supported by the active device.
+	 */
+	public static final int CUDA_ERROR_UNSUPPORTED_LIMIT = 215;
+
+	/**
+	 * This indicates that the CUcontext passed to the API call can only be bound to a single CPU thread at
+	 * a time but is already bound to a CPU thread.
+	 */
+	public static final int CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216;
+
+	/**
+	 * This indicates that peer access is not supported across the given devices.
+	 */
+	public static final int CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217;
+
+	/**
+	 * This indicates that a PTX JIT compilation failed.
+	 */
+	public static final int CUDA_ERROR_INVALID_PTX = 218;
+
+	/**
+	 * This indicates an error with OpenGL or DirectX context.
+	 */
+	public static final int CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219;
+
+	/**
+	 * This indicates that an uncorrectable NVLink error was detected during the execution.
+	 */
+	public static final int CUDA_ERROR_NVLINK_UNCORRECTABLE = 220;
+
+	/**
+	 * This indicates that the PTX JIT compiler library was not found.
+	 */
+	public static final int CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221;
+
+	/**
+	 * This indicates that the provided PTX was compiled with an unsupported toolchain.
+	 */
+	public static final int CUDA_ERROR_UNSUPPORTED_PTX_VERSION = 222;
+
+	/**
+	 * This indicates that the PTX JIT compilation was disabled.
+	 */
+	public static final int CUDA_ERROR_JIT_COMPILATION_DISABLED = 223;
+
+	/**
+	 * This indicates that the CUexecAffinityType passed to the API call is not supported by the active
+	 * device.
+	 */
+	public static final int CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY = 224;
+
+	/**
+	 * This indicates that the code to be compiled by the PTX JIT contains unsupported call to
+	 * cudaDeviceSynchronize.
+	 */
+	public static final int CUDA_ERROR_UNSUPPORTED_DEVSIDE_SYNC = 225;
+
+	/**
+	 * This indicates that the device kernel source is invalid. This includes compilation/linker errors
+	 * encountered in device code or user error.
+	 */
+	public static final int CUDA_ERROR_INVALID_SOURCE = 300;
+
+	/**
+	 * This indicates that the file specified was not found.
+	 */
+	public static final int CUDA_ERROR_FILE_NOT_FOUND = 301;
+
+	/**
+	 * This indicates that a link to a shared object failed to resolve.
+	 */
+	public static final int CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302;
+
+	/**
+	 * This indicates that initialization of a shared object failed.
+	 */
+	public static final int CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303;
+
+	/**
+	 * This indicates that an OS call failed.
+	 */
+	public static final int CUDA_ERROR_OPERATING_SYSTEM = 304;
+
+	/**
+	 * This indicates that a resource handle passed to the API call was not valid. Resource handles are
+	 * opaque types like CUstream and CUevent.
+	 */
+	public static final int CUDA_ERROR_INVALID_HANDLE = 400;
+
+	/**
+	 * This indicates that a resource required by the API call is not in a valid state to perform the requested
+	 * operation.
+	 */
+	public static final int CUDA_ERROR_ILLEGAL_STATE = 401;
+
+	/**
+	 * This indicates an attempt was made to introspect an object in a way that would discard semantically
+	 * important information. This is either due to the object using funtionality newer than the API version
+	 * used to introspect it or omission of optional return arguments.
+	 */
+	public static final int CUDA_ERROR_LOSSY_QUERY = 402;
+
+	/**
+	 * This indicates that a named symbol was not found. Examples of symbols are global/constant
+	 * variable names, driver function names, texture names, and surface names.
+	 */
+	public static final int CUDA_ERROR_NOT_FOUND = 500;
+
+	/**
+	 * This indicates that asynchronous operations issued previously have not completed yet. This result
+	 * is not actually an error, but must be indicated differently than CUDA_SUCCESS (which indicates
+	 * completion). Calls that may return this value include cuEventQuery() and cuStreamQuery().
+	 */
+	public static final int CUDA_ERROR_NOT_READY = 600;
+
+	/**
+	 * While executing a kernel, the device encountered a load or store instruction on an invalid memory
+	 * address. This leaves the process in an inconsistent state and any further CUDA work will return the
+	 * same error. To continue using CUDA, the process must be terminated and relaunched.
+	 */
+	public static final int CUDA_ERROR_ILLEGAL_ADDRESS = 700;
+
+	/**
+	 * This indicates that a launch did not occur because it did not have appropriate resources. This error
+	 * usually indicates that the user has attempted to pass too many arguments to the device kernel, or
+	 * the kernel launch specifies too many threads for the kernel's register count. Passing arguments of
+	 * the wrong size (i.e. a 64-bit pointer when a 32-bit int is expected) is equivalent to passing too many
+	 * arguments and can also result in this error.
+	 */
+	public static final int CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701;
+
+	/**
+	 * This indicates that the device kernel took too long to execute. This can only occur if timeouts are
+	 * enabled - see the device attribute CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for
+	 * more information. This leaves the process in an inconsistent state and any further CUDA work will
+	 * return the same error. To continue using CUDA, the process must be terminated and relaunched.
+	 */
+	public static final int CUDA_ERROR_LAUNCH_TIMEOUT = 702;
+
+	/**
+	 * This error indicates a kernel launch that uses an incompatible texturing mode.
+	 */
+	public static final int CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703;
+
+	/**
+	 * This error indicates that a call to cuCtxEnablePeerAccess() is trying to re-enable peer access to a
+	 * context which has already had peer access to it enabled.
+	 */
+	public static final int CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704;
+
+	/**
+	 * This error indicates that cuCtxDisablePeerAccess() is trying to disable peer access which has not
+	 * been enabled yet via cuCtxEnablePeerAccess().
+	 */
+	public static final int CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705;
+
+	/**
+	 * This error indicates that the primary context for the specified device has already been initialized.
+	 */
+	public static final int CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708;
+
+	/**
+	 * This error indicates that the context current to the calling thread has been destroyed using
+	 * cuCtxDestroy, or is a primary context which has not yet been initialized.
+	 */
+	public static final int CUDA_ERROR_CONTEXT_IS_DESTROYED = 709;
+
+	/**
+	 * A device-side assert triggered during kernel execution. The context cannot be used anymore, and
+	 * must be destroyed. All existing device memory allocations from this context are invalid and must be
+	 * reconstructed if the program is to continue using CUDA.
+	 */
+	public static final int CUDA_ERROR_ASSERT = 710;
+
+	/**
+	 * This error indicates that the hardware resources required to enable peer access have been exhausted
+	 * for one or more of the devices passed to cuCtxEnablePeerAccess().
+	 */
+	public static final int CUDA_ERROR_TOO_MANY_PEERS = 711;
+
+	/**
+	 * This error indicates that the memory range passed to cuMemHostRegister() has already been
+	 * registered.
+	 */
+	public static final int CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712;
+
+	/**
+	 * This error indicates that the pointer passed to cuMemHostUnregister() does not correspond to any
+	 * currently registered memory region.
+	 */
+	public static final int CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED = 713;
+
+	/**
+	 * While executing a kernel, the device encountered a stack error. This can be due to stack corruption
+	 * or exceeding the stack size limit. This leaves the process in an inconsistent state and any further
+	 * CUDA work will return the same error. To continue using CUDA, the process must be terminated
+	 * and relaunched.
+	 */
+	public static final int CUDA_ERROR_HARDWARE_STACK_ERROR = 714;
+
+	/**
+	 * While executing a kernel, the device encountered an illegal instruction. This leaves the process in an
+	 * inconsistent state and any further CUDA work will return the same error. To continue using CUDA,
+	 * the process must be terminated and relaunched.
+	 */
+	public static final int CUDA_ERROR_ILLEGAL_INSTRUCTION = 715;
+
+	/**
+	 * While executing a kernel, the device encountered a load or store instruction on a memory address
+	 * which is not aligned. This leaves the process in an inconsistent state and any further CUDA
+	 * work will return the same error. To continue using CUDA, the process must be terminated and
+	 * relaunched.
+	 */
+	public static final int CUDA_ERROR_MISALIGNED_ADDRESS = 716;
+
+	/**
+	 * While executing a kernel, the device encountered an instruction which can only operate on memory
+	 * locations in certain address spaces (global, shared, or local), but was supplied a memory address
+	 * not belonging to an allowed address space. This leaves the process in an inconsistent state and any
+	 * further CUDA work will return the same error. To continue using CUDA, the process must be
+	 * terminated and relaunched.
+	 */
+	public static final int CUDA_ERROR_INVALID_ADDRESS_SPACE = 717;
+
+	/**
+	 * While executing a kernel, the device program counter wrapped its address space. This leaves the
+	 * process in an inconsistent state and any further CUDA work will return the same error. To continue
+	 * using CUDA, the process must be terminated and relaunched.
+	 */
+	public static final int CUDA_ERROR_INVALID_PC = 718;
+
+	/**
+	 * An exception occurred on the device while executing a kernel. Common causes include
+	 * dereferencing an invalid device pointer and accessing out of bounds shared memory. Less common
+	 * cases can be system specific - more information about these cases can be found in the system
+	 * specific user guide. This leaves the process in an inconsistent state and any further CUDA work will
+	 * return the same error. To continue using CUDA, the process must be terminated and relaunched.
+	 */
+	public static final int CUDA_ERROR_LAUNCH_FAILED = 719;
+
+	/**
+	 * This error indicates that the number of blocks launched per grid for a kernel that was launched
+	 * via either cuLaunchCooperativeKernel or cuLaunchCooperativeKernelMultiDevice exceeds the
+	 * maximum number of blocks as allowed by cuOccupancyMaxActiveBlocksPerMultiprocessor or
+	 * cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors as
+	 * specified by the device attribute CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT.
+	 */
+	public static final int CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720;
+
+	/**
+	 * This error indicates that the attempted operation is not permitted.
+	 */
+	public static final int CUDA_ERROR_NOT_PERMITTED = 800;
+
+	/**
+	 * This error indicates that the attempted operation is not supported on the current system or device.
+	 */
+	public static final int CUDA_ERROR_NOT_SUPPORTED = 801;
+
+	/**
+	 * This error indicates that the system is not yet ready to start any CUDA work. To continue using
+	 * CUDA, verify the system configuration is in a valid state and all required driver daemons are
+	 * actively running. More information about this error can be found in the system specific user guide.
+	 */
+	public static final int CUDA_ERROR_SYSTEM_NOT_READY = 802;
+
+	/**
+	 * This error indicates that there is a mismatch between the versions of the display driver and the
+	 * CUDA driver. Refer to the compatibility documentation for supported versions.
+	 */
+	public static final int CUDA_ERROR_SYSTEM_DRIVER_MISMATCH = 803;
+
+	/**
+	 * This error indicates that the system was upgraded to run with forward compatibility but the visible
+	 * hardware detected by CUDA does not support this configuration. Refer to the compatibility
+	 * documentation for the supported hardware matrix or ensure that only supported hardware is visible
+	 * during initialization via the CUDA_VISIBLE_DEVICES environment variable.
+	 */
+	public static final int CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804;
+
+	/**
+	 * This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS
+	 * server.
+	 */
+	public static final int CUDA_ERROR_MPS_CONNECTION_FAILED = 805;
+
+	/**
+	 * This error indicates that the remote procedural call between the MPS server and the MPS client
+	 * failed.
+	 */
+	public static final int CUDA_ERROR_MPS_RPC_FAILURE = 806;
+
+	/**
+	 * This error indicates that the MPS server is not ready to accept new MPS client requests. This error
+	 * can be returned when the MPS server is in the process of recovering from a fatal failure.
+	 */
+	public static final int CUDA_ERROR_MPS_SERVER_NOT_READY = 807;
+
+	/**
+	 * This error indicates that the hardware resources required to create MPS client have been exhausted.
+	 */
+	public static final int CUDA_ERROR_MPS_MAX_CLIENTS_REACHED = 808;
+
+	/**
+	 * This error indicates the the hardware resources required to support device connections have been
+	 * exhausted.
+	 */
+	public static final int CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED = 809;
+
+	/**
+	 * This error indicates that the MPS client has been terminated by the server. To continue using
+	 * CUDA, the process must be terminated and relaunched.
+	 */
+	public static final int CUDA_ERROR_MPS_CLIENT_TERMINATED = 810;
+
+	/**
+	 * This error indicates that the module is using CUDA Dynamic Parallelism, but the current
+	 * configuration, like MPS, does not support it.
+	 */
+	public static final int CUDA_ERROR_CDP_NOT_SUPPORTED = 811;
+
+	/**
+	 * This error indicates that a module contains an unsupported interaction between different versions of
+	 * CUDA Dynamic Parallelism.
+	 */
+	public static final int CUDA_ERROR_CDP_VERSION_MISMATCH = 812;
+
+	/**
+	 * This error indicates that the operation is not permitted when the stream is capturing.
+	 */
+	public static final int CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED = 900;
+
+	/**
+	 * This error indicates that the current capture sequence on the stream has been invalidated due to a
+	 * previous error.
+	 */
+	public static final int CUDA_ERROR_STREAM_CAPTURE_INVALIDATED = 901;
+
+	/**
+	 * This error indicates that the operation would have resulted in a merge of two independent capture
+	 * sequences.
+	 */
+	public static final int CUDA_ERROR_STREAM_CAPTURE_MERGE = 902;
+
+	/**
+	 * This error indicates that the capture was not initiated in this stream.
+	 */
+	public static final int CUDA_ERROR_STREAM_CAPTURE_UNMATCHED = 903;
+
+	/**
+	 * This error indicates that the capture sequence contains a fork that was not joined to the primary
+	 * stream.
+	 */
+	public static final int CUDA_ERROR_STREAM_CAPTURE_UNJOINED = 904;
+
+	/**
+	 * This error indicates that a dependency would have been created which crosses the capture sequence
+	 * boundary. Only implicit in-stream ordering dependencies are allowed to cross the boundary.
+	 */
+	public static final int CUDA_ERROR_STREAM_CAPTURE_ISOLATION = 905;
+
+	/**
+	 * This error indicates a disallowed implicit dependency on a current capture sequence from
+	 * cudaStreamLegacy.
+	 */
+	public static final int CUDA_ERROR_STREAM_CAPTURE_IMPLICIT = 906;
+
+	/**
+	 * This error indicates that the operation is not permitted on an event which was last recorded in a
+	 * capturing stream.
+	 */
+	public static final int CUDA_ERROR_CAPTURED_EVENT = 907;
+
+	/**
+	 * A stream capture sequence not initiated with the CU_STREAM_CAPTURE_MODE_RELAXED
+	 * argument to cuStreamBeginCapture was passed to cuStreamEndCapture in a different thread.
+	 */
+	public static final int CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD = 908;
+
+	/**
+	 * This error indicates that the timeout specified for the wait operation has lapsed.
+	 */
+	public static final int CUDA_ERROR_TIMEOUT = 909;
+
+	/**
+	 * This error indicates that the graph update was not performed because it included changes which
+	 * violated constraints specific to instantiated graph update.
+	 */
+	public static final int CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE = 910;
+
+	/**
+	 * This indicates that an async error has occurred in a device outside of CUDA. If CUDA was waiting
+	 * for an external device's signal before consuming shared data, the external device signaled an error
+	 * indicating that the data is not valid for consumption. This leaves the process in an inconsistent state
+	 * and any further CUDA work will return the same error. To continue using CUDA, the process must
+	 * be terminated and relaunched.
+	 */
+	public static final int CUDA_ERROR_EXTERNAL_DEVICE = 911;
+
+	/**
+	 * Indicates a kernel launch error due to cluster misconfiguration.
+	 */
+	public static final int CUDA_ERROR_INVALID_CLUSTER_SIZE = 912;
+
+	/**
+	 * Indiciates a function handle is not loaded when calling an API that requires a loaded function.
+	 */
+	public static final int CUDA_ERROR_FUNCTION_NOT_LOADED = 913;
+
+	/**
+	 * This error indicates one or more resources passed in are not valid resource types for the operation.
+	 */
+	public static final int CUDA_ERROR_INVALID_RESOURCE_TYPE = 914;
+
+	/**
+	 * This error indicates one or more resources are insufficient or non-applicable for the operation.
+	 */
+	public static final int CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION = 915;
+
+	/**
+	 * This indicates that an unknown internal error has occurred.
+	 */
+	public static final int CUDA_ERROR_UNKNOWN = 999;
+
+	public static String resultString(int err){
+		return switch (err) {
+			case CUDA_SUCCESS -> "CUDA_SUCCESS";
+			case CUDA_ERROR_INVALID_VALUE -> "CUDA_ERROR_INVALID_VALUE";
+			case CUDA_ERROR_OUT_OF_MEMORY -> "CUDA_ERROR_OUT_OF_MEMORY";
+			case CUDA_ERROR_NOT_INITIALIZED -> "CUDA_ERROR_NOT_INITIALIZED";
+			case CUDA_ERROR_DEINITIALIZED -> "CUDA_ERROR_DEINITIALIZED";
+			case CUDA_ERROR_PROFILER_DISABLED -> "CUDA_ERROR_PROFILER_DISABLED";
+			case CUDA_ERROR_PROFILER_NOT_INITIALIZED -> "CUDA_ERROR_PROFILER_NOT_INITIALIZED";
+			case CUDA_ERROR_PROFILER_ALREADY_STARTED -> "CUDA_ERROR_PROFILER_ALREADY_STARTED";
+			case CUDA_ERROR_PROFILER_ALREADY_STOPPED -> "CUDA_ERROR_PROFILER_ALREADY_STOPPED";
+			case CUDA_ERROR_STUB_LIBRARY -> "CUDA_ERROR_STUB_LIBRARY";
+			case CUDA_ERROR_DEVICE_UNAVAILABLE -> "CUDA_ERROR_DEVICE_UNAVAILABLE";
+			case CUDA_ERROR_NO_DEVICE -> "CUDA_ERROR_NO_DEVICE";
+			case CUDA_ERROR_INVALID_DEVICE -> "CUDA_ERROR_INVALID_DEVICE";
+			case CUDA_ERROR_DEVICE_NOT_LICENSED -> "CUDA_ERROR_DEVICE_NOT_LICENSED";
+			case CUDA_ERROR_INVALID_IMAGE -> "CUDA_ERROR_INVALID_IMAGE";
+			case CUDA_ERROR_INVALID_CONTEXT -> "CUDA_ERROR_INVALID_CONTEXT";
+			case CUDA_ERROR_CONTEXT_ALREADY_CURRENT -> "CUDA_ERROR_CONTEXT_ALREADY_CURRENT";
+			case CUDA_ERROR_MAP_FAILED -> "CUDA_ERROR_MAP_FAILED";
+			case CUDA_ERROR_UNMAP_FAILED -> "CUDA_ERROR_UNMAP_FAILED";
+			case CUDA_ERROR_ARRAY_IS_MAPPED -> "CUDA_ERROR_ARRAY_IS_MAPPED";
+			case CUDA_ERROR_ALREADY_MAPPED -> "CUDA_ERROR_ALREADY_MAPPED";
+			case CUDA_ERROR_NO_BINARY_FOR_GPU -> "CUDA_ERROR_NO_BINARY_FOR_GPU";
+			case CUDA_ERROR_ALREADY_ACQUIRED -> "CUDA_ERROR_ALREADY_ACQUIRED";
+			case CUDA_ERROR_NOT_MAPPED -> "CUDA_ERROR_NOT_MAPPED";
+			case CUDA_ERROR_NOT_MAPPED_AS_ARRAY -> "CUDA_ERROR_NOT_MAPPED_AS_ARRAY";
+			case CUDA_ERROR_NOT_MAPPED_AS_POINTER -> "CUDA_ERROR_NOT_MAPPED_AS_POINTER";
+			case CUDA_ERROR_ECC_UNCORRECTABLE -> "CUDA_ERROR_ECC_UNCORRECTABLE";
+			case CUDA_ERROR_UNSUPPORTED_LIMIT -> "CUDA_ERROR_UNSUPPORTED_LIMIT";
+			case CUDA_ERROR_CONTEXT_ALREADY_IN_USE -> "CUDA_ERROR_CONTEXT_ALREADY_IN_USE";
+			case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED -> "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED";
+			case CUDA_ERROR_INVALID_PTX -> "CUDA_ERROR_INVALID_PTX";
+			case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT -> "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT";
+			case CUDA_ERROR_NVLINK_UNCORRECTABLE -> "CUDA_ERROR_NVLINK_UNCORRECTABLE";
+			case CUDA_ERROR_JIT_COMPILER_NOT_FOUND -> "CUDA_ERROR_JIT_COMPILER_NOT_FOUND";
+			case CUDA_ERROR_UNSUPPORTED_PTX_VERSION -> "CUDA_ERROR_UNSUPPORTED_PTX_VERSION";
+			case CUDA_ERROR_JIT_COMPILATION_DISABLED -> "CUDA_ERROR_JIT_COMPILATION_DISABLED";
+			case CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY -> "CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY";
+			case CUDA_ERROR_UNSUPPORTED_DEVSIDE_SYNC -> "CUDA_ERROR_UNSUPPORTED_DEVSIDE_SYNC";
+			case CUDA_ERROR_INVALID_SOURCE -> "CUDA_ERROR_INVALID_SOURCE";
+			case CUDA_ERROR_FILE_NOT_FOUND -> "CUDA_ERROR_FILE_NOT_FOUND";
+			case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND -> "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND";
+			case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED -> "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED";
+			case CUDA_ERROR_OPERATING_SYSTEM -> "CUDA_ERROR_OPERATING_SYSTEM";
+			case CUDA_ERROR_INVALID_HANDLE -> "CUDA_ERROR_INVALID_HANDLE";
+			case CUDA_ERROR_ILLEGAL_STATE -> "CUDA_ERROR_ILLEGAL_STATE";
+			case CUDA_ERROR_LOSSY_QUERY -> "CUDA_ERROR_LOSSY_QUERY";
+			case CUDA_ERROR_NOT_FOUND -> "CUDA_ERROR_NOT_FOUND";
+			case CUDA_ERROR_NOT_READY -> "CUDA_ERROR_NOT_READY";
+			case CUDA_ERROR_ILLEGAL_ADDRESS -> "CUDA_ERROR_ILLEGAL_ADDRESS";
+			case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES -> "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES";
+			case CUDA_ERROR_LAUNCH_TIMEOUT -> "CUDA_ERROR_LAUNCH_TIMEOUT";
+			case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING -> "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING";
+			case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED -> "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED";
+			case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED -> "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED";
+			case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE -> "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE";
+			case CUDA_ERROR_CONTEXT_IS_DESTROYED -> "CUDA_ERROR_CONTEXT_IS_DESTROYED";
+			case CUDA_ERROR_ASSERT -> "CUDA_ERROR_ASSERT";
+			case CUDA_ERROR_TOO_MANY_PEERS -> "CUDA_ERROR_TOO_MANY_PEERS";
+			case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED -> "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED";
+			case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED -> "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED";
+			case CUDA_ERROR_HARDWARE_STACK_ERROR -> "CUDA_ERROR_HARDWARE_STACK_ERROR";
+			case CUDA_ERROR_ILLEGAL_INSTRUCTION -> "CUDA_ERROR_ILLEGAL_INSTRUCTION";
+			case CUDA_ERROR_MISALIGNED_ADDRESS -> "CUDA_ERROR_MISALIGNED_ADDRESS";
+			case CUDA_ERROR_INVALID_ADDRESS_SPACE -> "CUDA_ERROR_INVALID_ADDRESS_SPACE";
+			case CUDA_ERROR_INVALID_PC -> "CUDA_ERROR_INVALID_PC";
+			case CUDA_ERROR_LAUNCH_FAILED -> "CUDA_ERROR_LAUNCH_FAILED";
+			case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE -> "CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE";
+			case CUDA_ERROR_NOT_PERMITTED -> "CUDA_ERROR_NOT_PERMITTED";
+			case CUDA_ERROR_NOT_SUPPORTED -> "CUDA_ERROR_NOT_SUPPORTED";
+			case CUDA_ERROR_SYSTEM_NOT_READY -> "CUDA_ERROR_SYSTEM_NOT_READY";
+			case CUDA_ERROR_SYSTEM_DRIVER_MISMATCH -> "CUDA_ERROR_SYSTEM_DRIVER_MISMATCH";
+			case CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE -> "CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE";
+			case CUDA_ERROR_MPS_CONNECTION_FAILED -> "CUDA_ERROR_MPS_CONNECTION_FAILED";
+			case CUDA_ERROR_MPS_RPC_FAILURE -> "CUDA_ERROR_MPS_RPC_FAILURE";
+			case CUDA_ERROR_MPS_SERVER_NOT_READY -> "CUDA_ERROR_MPS_SERVER_NOT_READY";
+			case CUDA_ERROR_MPS_MAX_CLIENTS_REACHED -> "CUDA_ERROR_MPS_MAX_CLIENTS_REACHED";
+			case CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED -> "CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED";
+			case CUDA_ERROR_MPS_CLIENT_TERMINATED -> "CUDA_ERROR_MPS_CLIENT_TERMINATED";
+			case CUDA_ERROR_CDP_NOT_SUPPORTED -> "CUDA_ERROR_CDP_NOT_SUPPORTED";
+			case CUDA_ERROR_CDP_VERSION_MISMATCH -> "CUDA_ERROR_CDP_VERSION_MISMATCH";
+			case CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED -> "CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED";
+			case CUDA_ERROR_STREAM_CAPTURE_INVALIDATED -> "CUDA_ERROR_STREAM_CAPTURE_INVALIDATED";
+			case CUDA_ERROR_STREAM_CAPTURE_MERGE -> "CUDA_ERROR_STREAM_CAPTURE_MERGE";
+			case CUDA_ERROR_STREAM_CAPTURE_UNMATCHED -> "CUDA_ERROR_STREAM_CAPTURE_UNMATCHED";
+			case CUDA_ERROR_STREAM_CAPTURE_UNJOINED -> "CUDA_ERROR_STREAM_CAPTURE_UNJOINED";
+			case CUDA_ERROR_STREAM_CAPTURE_ISOLATION -> "CUDA_ERROR_STREAM_CAPTURE_ISOLATION";
+			case CUDA_ERROR_STREAM_CAPTURE_IMPLICIT -> "CUDA_ERROR_STREAM_CAPTURE_IMPLICIT";
+			case CUDA_ERROR_CAPTURED_EVENT -> "CUDA_ERROR_CAPTURED_EVENT";
+			case CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD -> "CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD";
+			case CUDA_ERROR_TIMEOUT -> "CUDA_ERROR_TIMEOUT";
+			case CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE -> "CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE";
+			case CUDA_ERROR_EXTERNAL_DEVICE -> "CUDA_ERROR_EXTERNAL_DEVICE";
+			case CUDA_ERROR_INVALID_CLUSTER_SIZE -> "CUDA_ERROR_INVALID_CLUSTER_SIZE";
+			case CUDA_ERROR_FUNCTION_NOT_LOADED -> "CUDA_ERROR_FUNCTION_NOT_LOADED";
+			case CUDA_ERROR_INVALID_RESOURCE_TYPE -> "CUDA_ERROR_INVALID_RESOURCE_TYPE";
+			case CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION -> "CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION";
+			case CUDA_ERROR_UNKNOWN -> "CUDA_ERROR_UNKNOWN";
+			default -> "Invalid error";
+		};
+	}
+
+	private CUresult() {
+		// prevent instantiation.
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/driver/CUstream.java b/src/main/java/org/apache/sysds/cujava/driver/CUstream.java
new file mode 100644
index 00000000000..1b1b07a6ec3
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/driver/CUstream.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.driver;
+
+import org.apache.sysds.cujava.NativePointerObject;
+import org.apache.sysds.cujava.runtime.cudaStream_t;
+
+public class CUstream extends NativePointerObject {
+
+	public CUstream() {
+	}
+
+	public CUstream(cudaStream_t stream) {
+		super(stream);
+	}
+
+	CUstream(long value) {
+		super(value);
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/driver/CuJavaDriver.java b/src/main/java/org/apache/sysds/cujava/driver/CuJavaDriver.java
new file mode 100644
index 00000000000..fcc495df005
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/driver/CuJavaDriver.java
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.driver;
+
+import org.apache.sysds.cujava.CuJavaLibLoader;
+import org.apache.sysds.cujava.CudaException;
+import org.apache.sysds.cujava.Pointer;
+
+public class CuJavaDriver {
+
+	private static boolean exceptionsEnabled = false;
+
+	private static final String LIB_BASE = "cujava_driver";
+
+	private CuJavaDriver() {
+
+	}
+
+	static {
+		CuJavaLibLoader.load(LIB_BASE);
+	}
+
+	private static int checkCudaResult(int result) {
+		if(exceptionsEnabled && result != CUresult.CUDA_SUCCESS) {
+			throw new CudaException(CUresult.resultString(result));
+		}
+		return result;
+	}
+
+	public static int cuCtxCreate(CUcontext pctx, int flags, CUdevice dev) {
+		return checkCudaResult(cuCtxCreateNative(pctx, flags, dev));
+	}
+
+	private static native int cuCtxCreateNative(CUcontext pctx, int flags, CUdevice dev);
+
+	public static int cuDeviceGet(CUdevice device, int ordinal) {
+		return checkCudaResult(cuDeviceGetNative(device, ordinal));
+	}
+
+	private static native int cuDeviceGetNative(CUdevice device, int ordinal);
+
+	public static int cuDeviceGetCount(int count[]) {
+		return checkCudaResult(cuDeviceGetCountNative(count));
+	}
+
+	private static native int cuDeviceGetCountNative(int count[]);
+
+	public static int cuInit(int flags) {
+		return checkCudaResult(cuInitNative(flags));
+	}
+
+	private static native int cuInitNative(int flags);
+
+	public static int cuLaunchKernel(CUfunction f, int gridDimX, int gridDimY, int gridDimZ, int blockDimX,
+		int blockDimY, int blockDimZ, int sharedMemBytes, CUstream hStream, Pointer kernelParams, Pointer extra) {
+		return checkCudaResult(
+			cuLaunchKernelNative(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes,
+				hStream, kernelParams, extra));
+	}
+
+	private static native int cuLaunchKernelNative(CUfunction f, int gridDimX, int gridDimY, int gridDimZ,
+		int blockDimX, int blockDimY, int blockDimZ, int sharedMemBytes, CUstream hStream, Pointer kernelParams,
+		Pointer extra);
+
+	public static int cuModuleGetFunction(CUfunction hfunc, CUmodule hmod, String name) {
+		return checkCudaResult(cuModuleGetFunctionNative(hfunc, hmod, name));
+	}
+
+	private static native int cuModuleGetFunctionNative(CUfunction hfunc, CUmodule hmod, String name);
+
+	public static int cuModuleLoadDataEx(CUmodule phMod, Pointer p, int numOptions, int options[],
+		Pointer optionValues) {
+		if(numOptions == 0) {
+			options = (options != null) ? options : new int[0];
+			optionValues = (optionValues != null) ? optionValues : Pointer.to(new int[0]);
+		}
+		return checkCudaResult(cuModuleLoadDataExNative(phMod, p, numOptions, options, optionValues));
+	}
+
+	private static native int cuModuleLoadDataExNative(CUmodule phMod, Pointer p, int numOptions, int options[],
+		Pointer optionValues);
+
+	public static void setExceptionsEnabled(boolean enabled) {
+		exceptionsEnabled = enabled;
+	}
+
+	public static int cuMemAlloc(CUdeviceptr dptr, long bytesize) {
+		return checkCudaResult(cuMemAllocNative(dptr, bytesize));
+	}
+
+	private static native int cuMemAllocNative(CUdeviceptr dptr, long bytesize);
+
+	public static int cuModuleUnload(CUmodule hmod) {
+		return checkCudaResult(cuModuleUnloadNative(hmod));
+	}
+
+	private static native int cuModuleUnloadNative(CUmodule hmod);
+
+	public static int cuCtxDestroy(CUcontext ctx) {
+		return checkCudaResult(cuCtxDestroyNative(ctx));
+	}
+
+	private static native int cuCtxDestroyNative(CUcontext ctx);
+
+	public static int cuMemFree(CUdeviceptr dptr) {
+		return checkCudaResult(cuMemFreeNative(dptr));
+	}
+
+	private static native int cuMemFreeNative(CUdeviceptr dptr);
+
+
+	public static int cuMemcpyDtoH(Pointer dstHost, CUdeviceptr srcDevice, long ByteCount) {
+		return checkCudaResult(cuMemcpyDtoHNative(dstHost, srcDevice, ByteCount));
+	}
+
+	private static native int cuMemcpyDtoHNative(Pointer dstHost, CUdeviceptr srcDevice, long ByteCount);
+
+	public static int cuCtxSynchronize() {
+		return checkCudaResult(cuCtxSynchronizeNative());
+	}
+
+	private static native int cuCtxSynchronizeNative();
+
+	public static int cuDeviceGetAttribute(int pi[], int attrib, CUdevice dev) {
+		return checkCudaResult(cuDeviceGetAttributeNative(pi, attrib, dev));
+	}
+
+	private static native int cuDeviceGetAttributeNative(int pi[], int attrib, CUdevice dev);
+}
diff --git a/src/main/java/org/apache/sysds/cujava/interop/JCudaAdapter.java b/src/main/java/org/apache/sysds/cujava/interop/JCudaAdapter.java
new file mode 100644
index 00000000000..0fbb4b014cd
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/interop/JCudaAdapter.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.interop;
+
+import java.lang.reflect.Field;
+
+public class JCudaAdapter {
+	private JCudaAdapter() {}
+
+	public static jcuda.Pointer toJCuda(org.apache.sysds.cujava.Pointer p) {
+		try {
+			jcuda.Pointer q = new jcuda.Pointer();
+
+			// jcuda.NativePointerObject.nativePointer = cuJava nativePointer
+			Field np = jcuda.NativePointerObject.class.getDeclaredField("nativePointer");
+			np.setAccessible(true);
+			np.setLong(q, p.getNativePointer());
+
+			// jcuda.Pointer.byteOffset = cuJava byteOffset
+			Field bo = jcuda.Pointer.class.getDeclaredField("byteOffset");
+			bo.setAccessible(true);
+			bo.setLong(q, p.getByteOffset());
+
+			return q;
+		} catch (ReflectiveOperationException e) {
+			throw new IllegalStateException("cuJava→JCuda pointer adaptation failed", e);
+		}
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/runtime/CuJava.java b/src/main/java/org/apache/sysds/cujava/runtime/CuJava.java
new file mode 100644
index 00000000000..24f7246a943
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/runtime/CuJava.java
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.runtime;
+
+import org.apache.sysds.cujava.CuJavaLibLoader;
+import org.apache.sysds.cujava.CudaException;
+import org.apache.sysds.cujava.Pointer;
+
+public class CuJava {
+
+	private static boolean exceptionsEnabled = false;
+
+	private static final String LIB_BASE = "cujava_runtime";
+
+	public static final int cudaMemAttachGlobal = 1;
+
+	public static final int cudaDeviceScheduleBlockingSync = 4;
+
+	private CuJava() {
+
+	}
+
+	static {
+		CuJavaLibLoader.load(LIB_BASE);
+	}
+
+	private static int checkCudaError(int result) {
+		if(exceptionsEnabled && result != CudaError.cudaSuccess) {
+			throw new CudaException(CudaError.errorString(result));
+		}
+		return result;
+	}
+
+	public static int cudaMemcpy(Pointer dst, Pointer src, long count, int cudaMemcpyKind_kind) {
+		return checkCudaError(cudaMemcpyNative(dst, src, count, cudaMemcpyKind_kind));
+	}
+
+	private static native int cudaMemcpyNative(Pointer dst, Pointer src, long count, int cudaMemcpyKind_kind);
+
+	public static int cudaMalloc(Pointer devPtr, long size) {
+		return checkCudaError(cudaMallocNative(devPtr, size));
+	}
+
+	private static native int cudaMallocNative(Pointer devPtr, long size);
+
+	public static int cudaFree(Pointer devPtr) {
+		return checkCudaError(cudaFreeNative(devPtr));
+	}
+
+	private static native int cudaFreeNative(Pointer devPtr);
+
+	public static int cudaMemset(Pointer mem, int c, long count) {
+		return checkCudaError(cudaMemsetNative(mem, c, count));
+	}
+
+	private static native int cudaMemsetNative(Pointer mem, int c, long count);
+
+	public static int cudaDeviceSynchronize() {
+		return checkCudaError(cudaDeviceSynchronizeNative());
+	}
+
+	private static native int cudaDeviceSynchronizeNative();
+
+	public static void setExceptionsEnabled(boolean enabled) {
+		exceptionsEnabled = enabled;
+	}
+
+	public static int cudaMallocManaged(Pointer devPtr, long size, int flags) {
+		return checkCudaError(cudaMallocManagedNative(devPtr, size, flags));
+	}
+
+	private static native int cudaMallocManagedNative(Pointer devPtr, long size, int flags);
+
+	public static int cudaMemGetInfo(long free[], long total[]) {
+		return checkCudaError(cudaMemGetInfoNative(free, total));
+	}
+
+	private static native int cudaMemGetInfoNative(long free[], long total[]);
+
+	public static int cudaGetDeviceCount(int count[]) {
+		return checkCudaError(cudaGetDeviceCountNative(count));
+	}
+
+	private static native int cudaGetDeviceCountNative(int count[]);
+
+	public static int cudaSetDevice(int device) {
+		return checkCudaError(cudaSetDeviceNative(device));
+	}
+
+	private static native int cudaSetDeviceNative(int device);
+
+	public static int cudaSetDeviceFlags(int flags) {
+		return checkCudaError(cudaSetDeviceFlagsNative(flags));
+	}
+
+	private static native int cudaSetDeviceFlagsNative(int flags);
+
+	public static int cudaGetDevice(int device[]) {
+		return checkCudaError(cudaGetDeviceNative(device));
+	}
+
+	private static native int cudaGetDeviceNative(int device[]);
+
+	public static int cudaGetDeviceProperties(CudaDeviceProp prop, int device) {
+		return checkCudaError(cudaGetDevicePropertiesNative(prop, device));
+	}
+
+	private static native int cudaGetDevicePropertiesNative(CudaDeviceProp prop, int device);
+
+}
diff --git a/src/main/java/org/apache/sysds/cujava/runtime/CudaDeviceProp.java b/src/main/java/org/apache/sysds/cujava/runtime/CudaDeviceProp.java
new file mode 100644
index 00000000000..18c5e1ea7bc
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/runtime/CudaDeviceProp.java
@@ -0,0 +1,503 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.runtime;
+
+/**
+ * This class replicates the CUDA device properties (cudaDeviceProp).
+ * The descriptions are directly taken from the Documentation:
+ * https://docs.nvidia.com/cuda/archive/12.8.0/pdf/CUDA_Runtime_API.pdf
+ */
+public class CudaDeviceProp {
+
+	/**
+	 * The maximum value of cudaAccessPolicyWindow::num_bytes.
+	 */
+	public int accessPolicyMaxWindowSize;
+
+	/**
+	 * Number of asynchronous engines
+	 */
+	public int asyncEngineCount;
+
+	/**
+	 * Device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
+	 */
+	public int canMapHostMemory;
+
+	/**
+	 * Device can access host registered memory at the same virtual address as the CPU
+	 */
+	public int canUseHostPointerForRegisteredMem;
+
+	/**
+	 * @deprecated in CUDA 12 Clock frequency in kilohertz
+	 */
+	public int clockRate;
+
+	/**
+	 * Indicates device supports cluster launch
+	 */
+	public int clusterLaunch;
+
+	/**
+	 * @deprecated Compute mode (See cudaComputeMode)
+	 */
+	public int computeMode;
+
+	/**
+	 * Device supports Compute Preemption
+	 */
+	public int computePreemptionSupported;
+
+	/**
+	 * Device can possibly execute multiple kernels concurrently
+	 */
+	public int concurrentKernels;
+
+	/**
+	 * Device can coherently access managed memory concurrently with the CPU
+	 */
+	public int concurrentManagedAccess;
+
+	/**
+	 * Device supports launching cooperative kernels via cudaLaunchCooperativeKernel
+	 */
+	public int cooperativeLaunch;
+
+	/**
+	 * @deprecated cudaLaunchCooperativeKernelMultiDevice is deprecated.
+	 */
+	public int cooperativeMultiDeviceLaunch;
+
+	/**
+	 * 1 if the device supports deferred mapping CUDA arrays and CUDA mipmapped arrays
+	 */
+	public int deferredMappingCudaArraySupported;
+
+	/**
+	 * Device can concurrently copy memory and execute a kernel. Deprecated. Use instead asyncEngineCount.
+	 */
+	public int deviceOverlap;
+
+	/**
+	 * Host can directly access managed memory on the device without migration.
+	 */
+	public int directManagedMemAccessFromHost;
+
+	/**
+	 * Device has ECC support enabled
+	 */
+	public int ECCEnabled;
+
+	/**
+	 * Device supports caching globals in L1
+	 */
+	public int globalL1CacheSupported;
+
+	/**
+	 * Bitmask to be interpreted according to the cudaFlushGPUDirectRDMAWritesOptions enum
+	 */
+	public int gpuDirectRDMAFlushWritesOptions;
+
+	/**
+	 * 1 if the device supports GPUDirect RDMA APIs, 0 otherwise
+	 */
+	public int gpuDirectRDMASupported;
+
+	/**
+	 * See the cudaGPUDirectRDMAWritesOrdering enum for numerical values
+	 */
+	public int gpuDirectRDMAWritesOrdering;
+
+	/**
+	 * Link between the device and the host supports native atomic operations
+	 */
+	public int hostNativeAtomicSupported;
+
+	/**
+	 * Device supports using the cudaHostRegister flag cudaHostRegisterReadOnly to register memory that must be mapped
+	 * as read-only to the GPU
+	 */
+	public int hostRegisterReadOnlySupported;
+
+	/**
+	 * Device supports host memory registration via cudaHostRegister.
+	 */
+	public int hostRegisterSupported;
+
+	/**
+	 * Device is integrated as opposed to discrete
+	 */
+	public int integrated;
+
+	/**
+	 * Device supports IPC Events.
+	 */
+	public int ipcEventSupported;
+
+	/**
+	 * Device is on a multi-GPU board
+	 */
+	public int isMultiGpuBoard;
+
+	/**
+	 * @deprecated Specified whether there is a run time limit on kernels
+	 */
+	public int kernelExecTimeoutEnabled;
+
+	/**
+	 * Size of L2 cache in bytes
+	 */
+	public int l2CacheSize;
+
+	/**
+	 * Device supports caching locals in L1
+	 */
+	public int localL1CacheSupported;
+
+	/**
+	 * 8-byte locally unique identifier. Value is undefined on TCC and non-Windows platforms
+	 */
+	public byte[] luid = new byte[8];
+
+	/**
+	 * LUID device node mask. Value is undefined on TCC and non-Windows platforms
+	 */
+	public int luidDeviceNodeMask;
+
+	/**
+	 * Major compute capability
+	 */
+	public int major;
+
+	/**
+	 * Device supports allocating managed memory on this system
+	 */
+	public int managedMemory;
+
+	/**
+	 * Maximum number of resident blocks per multiprocessor
+	 */
+	public int maxBlocksPerMultiProcessor;
+
+	/**
+	 * Maximum size of each dimension of a grid
+	 */
+	public int[] maxGridSize = new int[3];
+
+	/**
+	 * Maximum 1D surface size
+	 */
+	public int maxSurface1D;
+
+	/**
+	 * Maximum 1D layered surface dimensions
+	 */
+	public int[] maxSurface1DLayered = new int[2];
+
+	/**
+	 * Maximum 2D surface dimensions
+	 */
+	public int[] maxSurface2D = new int[2];
+
+	/**
+	 * Maximum 2D layered surface dimensions
+	 */
+	public int[] maxSurface2DLayered = new int[3];
+
+	/**
+	 * Maximum 3D surface dimensions
+	 */
+	public int[] maxSurface3D = new int[3];
+
+	/**
+	 * Maximum Cubemap surface dimensions
+	 */
+	public int maxSurfaceCubemap;
+
+	/**
+	 * Maximum Cubemap layered surface dimensions
+	 */
+	public int[] maxSurfaceCubemapLayered = new int[2];
+
+	/**
+	 * Maximum 1D texture size
+	 */
+	public int maxTexture1D;
+
+	/**
+	 * Maximum 1D layered texture dimensions
+	 */
+	public int[] maxTexture1DLayered = new int[2];
+
+	/**
+	 * @deprecated Use cudaDeviceGetTexture1DLinearMaxWidth() or cuDeviceGetTexture1DLinearMaxWidth() instead.
+	 */
+	public int maxTexture1DLinear;
+
+	/**
+	 * Maximum 1D mipmapped texture size
+	 */
+	public int maxTexture1DMipmap;
+
+	/**
+	 * Maximum 2D texture dimensions
+	 */
+	public int[] maxTexture2D = new int[2];
+
+	/**
+	 * Maximum 2D texture dimensions if texture gather operations have to be performed
+	 */
+	public int[] maxTexture2DGather = new int[2];
+
+	/**
+	 * Maximum 2D layered texture dimensions
+	 */
+	public int[] maxTexture2DLayered = new int[3];
+
+	/**
+	 * Maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
+	 */
+	public int[] maxTexture2DLinear = new int[3];
+
+	/**
+	 * Maximum 2D mipmapped texture dimensions
+	 */
+	public int[] maxTexture2DMipmap = new int[2];
+
+	/**
+	 * Maximum 3D texture dimensions
+	 */
+	public int[] maxTexture3D = new int[3];
+
+	/**
+	 * Contains the maximum alternate 3D texture dimensions
+	 */
+	public int[] maxTexture3DAlt = new int[3];
+
+	/**
+	 * Maximum Cubemap texture dimensions
+	 */
+	public int maxTextureCubemap;
+
+	/**
+	 * Maximum Cubemap layered texture dimensions
+	 */
+	public int[] maxTextureCubemapLayered = new int[2];
+
+	/**
+	 * The maximum sizes of each dimension of a block;
+	 */
+	public int[] maxThreadsDim = new int[3];
+
+	/**
+	 * The maximum number of threads per block;
+	 */
+	public int maxThreadsPerBlock;
+
+	/**
+	 * The number of maximum resident threads per multiprocessor.
+	 */
+	public int maxThreadsPerMultiProcessor;
+
+	/**
+	 * The memory bus width in bits
+	 */
+	public int memoryBusWidth;
+
+	/**
+	 * @deprecated The peak memory clock frequency in kilohertz.
+	 */
+	public int memoryClockRate;
+
+	/**
+	 * 1 if the device supports using the cudaMallocAsync and cudaMemPool family of APIs, 0 otherwise
+	 */
+	public int memoryPoolsSupported;
+
+	/**
+	 * Bitmask of handle types supported with mempool-based IPC
+	 */
+	public int memoryPoolSupportedHandleTypes;
+
+	/**
+	 * The maximum pitch in bytes allowed by the memory copy functions that involve memory regions allocated through
+	 * cudaMallocPitch();
+	 */
+	public long memPitch;
+
+	/**
+	 * Minor compute capability
+	 */
+	public int minor;
+
+	/**
+	 * Unique identifier for a group of devices on the same multi-GPU board
+	 */
+	public int multiGpuBoardGroupID;
+
+	/**
+	 * Number of multiprocessors on device
+	 */
+	public int multiProcessorCount;
+
+	/**
+	 * An ASCII string identifying the device;
+	 */
+	public byte[] name = new byte[256];
+
+	/**
+	 * Device supports coherently accessing pageable memory without calling cudaHostRegister on it
+	 */
+	public int pageableMemoryAccess;
+
+	/**
+	 * Device accesses pageable memory via the host's page tables
+	 */
+	public int pageableMemoryAccessUsesHostPageTables;
+
+	/**
+	 * PCI bus ID of the device
+	 */
+	public int pciBusID;
+
+	/**
+	 * PCI device ID of the device
+	 */
+	public int pciDeviceID;
+
+	/**
+	 * PCI domain ID of the device
+	 */
+	public int pciDomainID;
+
+	/**
+	 * Device's maximum l2 persisting lines capacity setting in bytes
+	 */
+	public int persistingL2CacheMaxSize;
+
+	/**
+	 * The maximum number of 32-bit registers available to a thread block; this number is shared by all thread blocks
+	 * simultaneously resident on a multiprocessor;
+	 */
+	public int regsPerBlock;
+
+	/**
+	 * 32-bit registers available per multiprocessor
+	 */
+	public int regsPerMultiprocessor;
+
+	/**
+	 * Reserved for future use
+	 */
+	public int reserved;
+
+	/**
+	 * Shared memory reserved by CUDA driver per block in bytes
+	 */
+	public long reservedSharedMemPerBlock;
+
+	/**
+	 * The maximum amount of shared memory available to a thread block in bytes; this amount is shared by all thread
+	 * blocks simultaneously resident on a multiprocessor;
+	 */
+	public long sharedMemPerBlock;
+
+	/**
+	 * Per device maximum shared memory per block usable by special opt in
+	 */
+	public long sharedMemPerBlockOptin;
+
+	/**
+	 * Shared memory available per multiprocessor in bytes
+	 */
+	public long sharedMemPerMultiprocessor;
+
+	/**
+	 * @deprecated Ratio of single precision performance (in floating-point operations per second) to double precision
+	 * performance
+	 */
+	public int singleToDoublePrecisionPerfRatio;
+
+	/**
+	 * 1 if the device supports sparse CUDA arrays and sparse CUDA mipmapped arrays, 0 otherwise
+	 */
+	public int sparseCudaArraySupported;
+
+	/**
+	 * Is 1 if the device supports stream priorities, or 0 if it is not supported
+	 */
+	public int streamPrioritiesSupported;
+
+	/**
+	 * Alignment requirements for surfaces
+	 */
+	public long surfaceAlignment;
+
+	/**
+	 * 1 if device is a Tesla device using TCC driver, 0 otherwise
+	 */
+	public int tccDriver;
+
+	/**
+	 * The alignment requirement; texture base addresses that are aligned to textureAlignment bytes do not need an
+	 * offset applied to texture fetches;
+	 */
+	public long textureAlignment;
+
+	/**
+	 * Pitch alignment requirement for texture references bound to pitched memory
+	 */
+	public long texturePitchAlignment;
+
+	/**
+	 * External timeline semaphore interop is supported on the device
+	 */
+	public int timelineSemaphoreInteropSupported;
+
+	/**
+	 * The total amount of constant memory available on the device in bytes;
+	 */
+	public long totalConstMem;
+
+	/**
+	 * The total amount of global memory available on the device in bytes;
+	 */
+	public long totalGlobalMem;
+
+	/**
+	 * 1 if the device shares a unified address space with the host and 0 otherwise.
+	 */
+	public int unifiedAddressing;
+
+	/**
+	 * Indicates device supports unified pointers
+	 */
+	public int unifiedFunctionPointers;
+
+	/**
+	 * The warp size in threads;
+	 */
+	public int warpSize;
+
+	// Uninitialized CudaDeviceProp object
+	public CudaDeviceProp() {
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/runtime/CudaError.java b/src/main/java/org/apache/sysds/cujava/runtime/CudaError.java
new file mode 100644
index 00000000000..d8c6fab7d29
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/runtime/CudaError.java
@@ -0,0 +1,996 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.runtime;
+
+/**
+ * This class replicates the Cuda error types from the CUDA runtime API.
+ * The descriptions are directly taken from the Documentation:
+ * https://docs.nvidia.com/cuda/archive/12.8.0/pdf/CUDA_Runtime_API.pdf
+ */
+public class CudaError {
+
+	/**
+	 * The API call returned with no errors. In the case of query calls, this also means that the operation being
+	 * queried is complete
+	 */
+	public static final int cudaSuccess = 0;
+
+	/**
+	 * This indicates that one or more of the parameters passed to the API call is not within an acceptable range of
+	 * values.
+	 */
+	public static final int cudaErrorInvalidValue = 1;
+
+	/**
+	 * The API call failed because it was unable to allocate enough memory or other resources to perform the requested
+	 * operation
+	 */
+	public static final int cudaErrorMemoryAllocation = 2;
+
+	/**
+	 * The API call failed because the CUDA driver and runtime could not be initialized.
+	 */
+	public static final int cudaErrorInitializationError = 3;
+
+	/**
+	 * This indicates that a CUDA Runtime API call cannot be executed because it is being called during process shut
+	 * down, at a point in time after CUDA driver has been unloaded.
+	 */
+	public static final int cudaErrorCudartUnloading = 4;
+
+	/**
+	 * This indicates profiler is not initialized for this run. This can happen when the application is running with
+	 * external profiling tools like visual profiler.
+	 */
+	public static final int cudaErrorProfilerDisabled = 5;
+
+	/**
+	 * @deprecated
+	 * This error return is deprecated as of CUDA 5.0. It is no longer an error to attempt to enable/disable the
+	 * profiling via cudaProfilerStart or cudaProfilerStop without initialization.
+	 */
+	public static final int cudaErrorProfilerNotInitialized = 6;
+
+	/**
+	 * @deprecated
+	 * This error return is deprecated as of CUDA 5.0. It is no longer an error to call cudaProfilerStart() when
+	 * profiling is already enabled.
+	 */
+	public static final int cudaErrorProfilerAlreadyStarted = 7;
+
+	/**
+	 * @deprecated
+	 * This error return is deprecated as of CUDA 5.0. It is no longer an error to call cudaProfilerStop() when
+	 * profiling is already disabled.
+	 */
+	public static final int cudaErrorProfilerAlreadyStopped = 8;
+
+	/**
+	 * This indicates that a kernel launch is requesting resources that can never be satisfied by the current device.
+	 * Requesting more shared memory per block than the device supports will trigger this error, as will requesting too
+	 * many threads or blocks. See cudaDeviceProp for more device limitations.
+	 */
+	public static final int cudaErrorInvalidConfiguration = 9;
+
+	/**
+	 * This indicates that one or more of the pitch-related parameters passed to the API call is not within the
+	 * acceptable range for pitch.
+	 */
+	public static final int cudaErrorInvalidPitchValue = 12;
+
+	/**
+	 * This indicates that the symbol name/identifier passed to the API call is not a valid name or identifier.
+	 */
+	public static final int cudaErrorInvalidSymbol = 13;
+
+	/**
+	 * @deprecated
+	 * This indicates that at least one host pointer passed to the API call is not a valid host pointer. This error
+	 * return is deprecated as of CUDA 10.1.
+	 */
+	public static final int cudaErrorInvalidHostPointer = 16;
+
+	/**
+	 * @deprecated
+	 * This indicates that at least one device pointer passed to the API call is not a valid device pointer. This error
+	 * return is deprecated as of CUDA 10.1.
+	 */
+	public static final int cudaErrorInvalidDevicePointer = 17;
+
+	/**
+	 * This indicates that the texture passed to the API call is not a valid texture.
+	 */
+	public static final int cudaErrorInvalidTexture = 18;
+
+	/**
+	 * This indicates that the texture binding is not valid. This occurs if you call cudaGetTextureAlignmentOffset()
+	 * with an unbound texture.
+	 */
+	public static final int cudaErrorInvalidTextureBinding = 19;
+
+	/**
+	 * This indicates that the channel descriptor passed to the API call is not valid. This occurs if the format is not
+	 * one of the formats specified by cudaChannelFormatKind, or if one of the dimensions is invalid.
+	 */
+	public static final int cudaErrorInvalidChannelDescriptor = 20;
+
+	/**
+	 * This indicates that the direction of the memcpy passed to the API call is not one of the types specified by
+	 * cudaMemcpyKind.
+	 */
+	public static final int cudaErrorInvalidMemcpyDirection = 21;
+
+	/**
+	 * @deprecated
+	 * This indicated that the user has taken the address of a constant variable, which was forbidden up until the CUDA
+	 * 3.1 release. This error return is deprecated as of CUDA 3.1. Variables in constant memory may now have their
+	 * address taken by the runtime via cudaGetSymbolAddress().
+	 */
+	public static final int cudaErrorAddressOfConstant = 22;
+
+	/**
+	 * @deprecated
+	 * This indicated that a texture fetch was not able to be performed. This was previously used for device emulation
+	 * of texture operations. This error return is deprecated as of CUDA 3.1. Device emulation mode was removed with the
+	 * CUDA 3.1 release.
+	 */
+	public static final int cudaErrorTextureFetchFailed = 23;
+
+	/**
+	 * @deprecated
+	 * This indicated that a texture was not bound for access. This was previously used for device emulation of texture
+	 * operations. his error return is deprecated as of CUDA 3.1. Device emulation mode was removed with the CUDA 3.1
+	 * release
+	 */
+	public static final int cudaErrorTextureNotBound = 24;
+
+	/**
+	 * @deprecated
+	 * This indicated that a synchronization operation had failed. This was previously used for some device emulation
+	 * functions. This error return is deprecated as of CUDA 3.1. Device emulation mode was removed with the CUDA 3.1
+	 * release.
+	 */
+	public static final int cudaErrorSynchronizationError = 25;
+
+	/**
+	 * This indicates that a non-float texture was being accessed with linear filtering. This is not supported by CUDA.
+	 */
+	public static final int cudaErrorInvalidFilterSetting = 26;
+
+	/**
+	 * This indicates that an attempt was made to read an unsupported data type as a normalized float. This is not
+	 * supported by CUDA.
+	 */
+	public static final int cudaErrorInvalidNormSetting = 27;
+
+	/**
+	 * @deprecated
+	 * Mixing of device and device emulation code was not allowed. This error return is deprecated as of CUDA 3.1.
+	 * Device emulation mode was removed with the CUDA 3.1 release.
+	 */
+	public static final int cudaErrorMixedDeviceExecution = 28;
+
+	/**
+	 * @deprecated
+	 * This indicates that the API call is not yet implemented. Production releases of CUDA will never return this
+	 * error. This error return is deprecated as of CUDA 4.1.
+	 */
+	public static final int cudaErrorNotYetImplemented = 31;
+
+	/**
+	 * This indicated that an emulated device pointer exceeded the 32-bit address range. Deprecated This error return is
+	 * deprecated as of CUDA 3.1. Device emulation mode was removed with the CUDA 3.1 release.
+	 */
+	public static final int cudaErrorMemoryValueTooLarge = 32;
+
+	/**
+	 * This indicates that the CUDA driver that the application has loaded is a stub library. Applications that run with
+	 * the stub rather than a real driver loaded will result in CUDA API returning this error.
+	 */
+	public static final int cudaErrorStubLibrary = 34;
+
+	/**
+	 * This indicates that the installed NVIDIA CUDA driver is older than the CUDA runtime library. This is not a
+	 * supported configuration. Users should install an updated NVIDIA display driver to allow the application to run
+	 */
+	public static final int cudaErrorInsufficientDriver = 35;
+
+	/**
+	 * This indicates that the API call requires a newer CUDA driver than the one currently installed. Users should
+	 * install an updated NVIDIA CUDA driver to allow the API call to succeed.
+	 */
+	public static final int cudaErrorCallRequiresNewerDriver = 36;
+
+	/**
+	 * This indicates that the surface passed to the API call is not a valid surface.
+	 */
+	public static final int cudaErrorInvalidSurface = 37;
+
+	/**
+	 * This indicates that multiple global or constant variables (across separate CUDA source files in the application)
+	 * share the same string name.
+	 */
+	public static final int cudaErrorDuplicateVariableName = 43;
+
+	/**
+	 * This indicates that multiple textures (across separate CUDA source files in the application) share the same
+	 * string name.
+	 */
+	public static final int cudaErrorDuplicateTextureName = 44;
+
+	/**
+	 * This indicates that multiple surfaces (across separate CUDA source files in the application) share the same
+	 * string name.
+	 */
+	public static final int cudaErrorDuplicateSurfaceName = 45;
+
+	/**
+	 * This indicates that all CUDA devices are busy or unavailable at the current time. Devices are often
+	 * busy/unavailable due to use of cudaComputeModeProhibited, cudaComputeModeExclusiveProcess, or when long-running
+	 * CUDA kernels have filled up the GPU and are blocking new work from starting. They can also be unavailable due to
+	 * memory constraints on a device that already has active CUDA work being performed.
+	 */
+	public static final int cudaErrorDevicesUnavailable = 46;
+
+	/**
+	 * This indicates that the current context is not compatible with this the CUDA Runtime. This can only occur if you
+	 * are using CUDA Runtime/Driver interoperability and have created an existing Driver context using the driver API.
+	 * The Driver context may be incompatible either because the Driver context was created using an older version of
+	 * the API, because the Runtime API call expects a primary driver context and the Driver context is not primary, or
+	 * because the Driver context has been destroyed. Please see Interactions with the CUDA Driver API" for more
+	 * information.
+	 */
+	public static final int cudaErrorIncompatibleDriverContext = 49;
+
+	/**
+	 * The device function being invoked (usually via cudaLaunchKernel()) was not previously configured via the
+	 * cudaConfigureCall() function.
+	 */
+	public static final int cudaErrorMissingConfiguration = 52;
+
+	/**
+	 * @deprecated
+	 * This indicated that a previous kernel launch failed. This was previously used for device emulation of kernel
+	 * launches. This error return is deprecated as of CUDA 3.1. Device emulation mode was removed with the CUDA 3.1
+	 * release.
+	 */
+	public static final int cudaErrorPriorLaunchFailure = 53;
+
+	/**
+	 * This error indicates that a device runtime grid launch did not occur because the depth of the child grid would
+	 * exceed the maximum supported number of nested grid launches.
+	 */
+	public static final int cudaErrorLaunchMaxDepthExceeded = 65;
+
+	/**
+	 * This error indicates that a grid launch did not occur because the kernel uses file-scoped textures which are
+	 * unsupported by the device runtime. Kernels launched via the device runtime only support textures created with the
+	 * Texture Object API's.
+	 */
+	public static final int cudaErrorLaunchFileScopedTex = 66;
+
+	/**
+	 * This error indicates that a grid launch did not occur because the kernel uses file-scoped surfaces which are
+	 * unsupported by the device runtime. Kernels launched via the device runtime only support surfaces created with the
+	 * Surface Object API's.
+	 */
+	public static final int cudaErrorLaunchFileScopedSurf = 67;
+
+	/**
+	 * This error indicates that a call to cudaDeviceSynchronize made from the device runtime failed because the call
+	 * was made at grid depth greater than either the default (2 levels of grids) or user specified device limit
+	 * cudaLimitDevRuntimeSyncDepth. To be able to synchronize on launched grids at a greater depth successfully, the
+	 * maximum nested depth at which cudaDeviceSynchronize will be called must be specified with the
+	 * cudaLimitDevRuntimeSyncDepth limit to the cudaDeviceSetLimit api before the host-side launch of a kernel using
+	 * the device runtime. Keep in mind that additional levels of sync depth require the runtime to reserve large
+	 * amounts of device memory that cannot be used for user allocations. Note that cudaDeviceSynchronize made from
+	 * device runtime is only supported on devices of compute capability lass than 9.0.
+	 */
+	public static final int cudaErrorSyncDepthExceeded = 68;
+
+	/**
+	 * This error indicates that a device runtime grid launch failed because the launch would exceed the limit
+	 * cudaLimitDevRuntimePendingLaunchCount. For this launch to proceed successfully, cudaDeviceSetLimit must be called
+	 * to set the cudaLimitDevRuntimePendingLaunchCount to be higher than the upper bound of outstanding launches that
+	 * can be issued to the device runtime. Keep in mind that raising the limit of pending device runtime launches will
+	 * require the runtime to reserve device memory that cannot be used for user allocations.
+	 */
+	public static final int cudaErrorLaunchPendingCountExceeded = 69;
+
+	/**
+	 * The requested device function does not exist or is not compiled for the proper device architecture.
+	 */
+	public static final int cudaErrorInvalidDeviceFunction = 98;
+
+	/**
+	 * This indicates that no CUDA-capable devices were detected by the installed CUDA driver.
+	 */
+	public static final int cudaErrorNoDevice = 100;
+
+	/**
+	 * This indicates that the device ordinal supplied by the user does not correspond to a valid CUDA device or that
+	 * the action requested is invalid for the specified device.
+	 */
+	public static final int cudaErrorInvalidDevice = 101;
+
+	/**
+	 * This indicates that the device doesn't have a valid Grid License.
+	 */
+	public static final int cudaErrorDeviceNotLicensed = 102;
+
+	/**
+	 * By default, the CUDA runtime may perform a minimal set of self-tests, as well as CUDA driver tests, to establish
+	 * the validity of both. Introduced in CUDA 11.2, this error return indicates that at least one of these tests has
+	 * failed and the validity of either the runtime or the driver could not be established.
+	 */
+	public static final int cudaErrorSoftwareValidityNotEstablished = 103;
+
+	/**
+	 * This indicates an internal startup failure in the CUDA runtime.
+	 */
+	public static final int cudaErrorStartupFailure = 127;
+
+	/**
+	 * This indicates that the device kernel image is invalid.
+	 */
+	public static final int cudaErrorInvalidKernelImage = 200;
+
+	/**
+	 * This most frequently indicates that there is no context bound to the current thread. This can also be returned if
+	 * the context passed to an API call is not a valid handle (such as a context that has had cuCtxDestroy() invoked on
+	 * it). This can also be returned if a user mixes different API versions (i.e. 3010 context with 3020 API calls).
+	 * See cuCtxGetApiVersion() for more details.
+	 */
+	public static final int cudaErrorDeviceUninitialized = 201;
+
+	/**
+	 * This indicates that the buffer object could not be mapped.
+	 */
+	public static final int cudaErrorMapBufferObjectFailed = 205;
+
+	/**
+	 * This indicates that the buffer object could not be unmapped.
+	 */
+	public static final int cudaErrorUnmapBufferObjectFailed = 206;
+
+	/**
+	 * This indicates that the specified array is currently mapped and thus cannot be destroyed.
+	 */
+	public static final int cudaErrorArrayIsMapped = 207;
+
+	/**
+	 * This indicates that the resource is already mapped.
+	 */
+	public static final int cudaErrorAlreadyMapped = 208;
+
+	/**
+	 * This indicates that there is no kernel image available that is suitable for the device. This can occur when a
+	 * user specifies code generation options for a particular CUDA source file that do not include the corresponding
+	 * device configuration.
+	 */
+	public static final int cudaErrorNoKernelImageForDevice = 209;
+
+	/**
+	 * This indicates that a resource has already been acquired.
+	 */
+	public static final int cudaErrorAlreadyAcquired = 210;
+
+	/**
+	 * This indicates that a resource is not mapped.
+	 */
+	public static final int cudaErrorNotMapped = 211;
+
+	/**
+	 * This indicates that a mapped resource is not available for access as an array.
+	 */
+	public static final int cudaErrorNotMappedAsArray = 212;
+
+	/**
+	 * This indicates that a mapped resource is not available for access as a pointer.
+	 */
+	public static final int cudaErrorNotMappedAsPointer = 213;
+
+	/**
+	 * This indicates that an uncorrectable ECC error was detected during execution.
+	 */
+	public static final int cudaErrorECCUncorrectable = 214;
+
+	/**
+	 * This indicates that the cudaLimit passed to the API call is not supported by the active device.
+	 */
+	public static final int cudaErrorUnsupportedLimit = 215;
+
+	/**
+	 * This indicates that a call tried to access an exclusive-thread device that is already in use by a different
+	 * thread.
+	 */
+	public static final int cudaErrorDeviceAlreadyInUse = 216;
+
+	/**
+	 * This error indicates that P2P access is not supported across the given devices.
+	 */
+	public static final int cudaErrorPeerAccessUnsupported = 217;
+
+	/**
+	 * A PTX compilation failed. The runtime may fall back to compiling PTX if an application does not contain a
+	 * suitable binary for the current device.
+	 */
+	public static final int cudaErrorInvalidPtx = 218;
+
+	/**
+	 * This indicates an error with the OpenGL or DirectX context.
+	 */
+	public static final int cudaErrorInvalidGraphicsContext = 219;
+
+	/**
+	 * This indicates that an uncorrectable NVLink error was detected during the execution.
+	 */
+	public static final int cudaErrorNvlinkUncorrectable = 220;
+
+	/**
+	 * This indicates that the PTX JIT compiler library was not found. The JIT Compiler library is used for PTX
+	 * compilation. The runtime may fall back to compiling PTX if an application does not contain a suitable binary for
+	 * the current device.
+	 */
+	public static final int cudaErrorJitCompilerNotFound = 221;
+
+	/**
+	 * This indicates that the provided PTX was compiled with an unsupported toolchain. The most common reason for this,
+	 * is the PTX was generated by a compiler newer than what is supported by the CUDA driver and PTX JIT compiler.
+	 */
+	public static final int cudaErrorUnsupportedPtxVersion = 222;
+
+	/**
+	 * This indicates that the JIT compilation was disabled. The JIT compilation compiles PTX. The runtime may fall back
+	 * to compiling PTX if an application does not contain a suitable binary for the current device.
+	 */
+	public static final int cudaErrorJitCompilationDisabled = 223;
+
+	/**
+	 * This indicates that the provided execution affinity is not supported by the device.
+	 */
+	public static final int cudaErrorUnsupportedExecAffinity = 224;
+
+	/**
+	 * This indicates that the code to be compiled by the PTX JIT contains unsupported call to cudaDeviceSynchronize.
+	 */
+	public static final int cudaErrorUnsupportedDevSideSync = 225;
+
+	/**
+	 * This indicates that an exception occurred on the device that is now contained by the GPU's error containment
+	 * capability. Common causes are - a. Certain types of invalid accesses of peer GPU memory over nvlink b. Certain
+	 * classes of hardware errors This leaves the process in an inconsistent state and any further CUDA work will return
+	 * the same error. To continue using CUDA, the process must be terminated and relaunched
+	 */
+	public static final int cudaErrorContained = 226;
+
+	/**
+	 * This indicates that the device kernel source is invalid.
+	 */
+	public static final int cudaErrorInvalidSource = 300;
+
+	/**
+	 * This indicates that the file specified was not found.
+	 */
+	public static final int cudaErrorFileNotFound = 301;
+
+	/**
+	 * This indicates that a link to a shared object failed to resolve.
+	 */
+	public static final int cudaErrorSharedObjectSymbolNotFound = 302;
+
+	/**
+	 * This indicates that initialization of a shared object failed.
+	 */
+	public static final int cudaErrorSharedObjectInitFailed = 303;
+
+	/**
+	 * This error indicates that an OS call failed.
+	 */
+	public static final int cudaErrorOperatingSystem = 304;
+
+	/**
+	 * This indicates that a resource handle passed to the API call was not valid. Resource handles are opaque types
+	 * like cudaStream_t and cudaEvent_t.
+	 */
+	public static final int cudaErrorInvalidResourceHandle = 400;
+
+	/**
+	 * This indicates that a resource required by the API call is not in a valid state to perform the requested
+	 * operation.
+	 */
+	public static final int cudaErrorIllegalState = 401;
+
+	/**
+	 * This indicates an attempt was made to introspect an object in a way that would discard semantically important
+	 * information. This is either due to the object using funtionality newer than the API version used to introspect it
+	 * or omission of optional return arguments.
+	 */
+	public static final int cudaErrorLossyQuery = 402;
+
+	/**
+	 * This indicates that a named symbol was not found. Examples of symbols are global/constant variable names, driver
+	 * function names, texture names, and surface names.
+	 */
+	public static final int cudaErrorSymbolNotFound = 500;
+
+	/**
+	 * This indicates that asynchronous operations issued previously have not completed yet. This result is not actually
+	 * an error, but must be indicated differently than cudaSuccess (which indicates completion). Calls that may return
+	 * this value include cudaEventQuery() and cudaStreamQuery().
+	 */
+	public static final int cudaErrorNotReady = 600;
+
+	/**
+	 * The device encountered a load or store instruction on an invalid memory address. This leaves the process in an
+	 * inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must
+	 * be terminated and relaunched.
+	 */
+	public static final int cudaErrorIllegalAddress = 700;
+
+	/**
+	 * This indicates that a launch did not occur because it did not have appropriate resources. Although this error is
+	 * similar to cudaErrorInvalidConfiguration, this error usually indicates that the user has attempted to pass too
+	 * many arguments to the device kernel, or the kernel launch specifies too many threads for the kernel's register
+	 * count.
+	 */
+	public static final int cudaErrorLaunchOutOfResources = 701;
+
+	/**
+	 * This indicates that the device kernel took too long to execute. This can only occur if timeouts are enabled - see
+	 * the device property kernelExecTimeoutEnabled for more information. This leaves the process in an inconsistent
+	 * state and any further CUDA work will return the same error. To continue using CUDA, the process must be
+	 * terminated and relaunched.
+	 */
+	public static final int cudaErrorLaunchTimeout = 702;
+
+	/**
+	 * This error indicates a kernel launch that uses an incompatible texturing mode.
+	 */
+	public static final int cudaErrorLaunchIncompatibleTexturing = 703;
+
+	/**
+	 * This error indicates that a call to cudaDeviceEnablePeerAccess() is trying to re-enable peer addressing on from a
+	 * context which has already had peer addressing enabled.
+	 */
+	public static final int cudaErrorPeerAccessAlreadyEnabled = 704;
+
+	/**
+	 * This error indicates that cudaDeviceDisablePeerAccess() is trying to disable peer addressing which has not been
+	 * enabled yet via cudaDeviceEnablePeerAccess()
+	 */
+	public static final int cudaErrorPeerAccessNotEnabled = 705;
+
+	/**
+	 * This indicates that the user has called cudaSetValidDevices(), cudaSetDeviceFlags(), cudaD3D9SetDirect3DDevice(),
+	 * cudaD3D10SetDirect3DDevice, cudaD3D11SetDirect3DDevice(), or cudaVDPAUSetVDPAUDevice() after initializing the
+	 * CUDA runtime by calling non-device management operations (allocating memory and launching kernels are examples of
+	 * non-device management operations). This error can also be returned if using runtime/driver interoperability and
+	 * there is an existing CUcontext active on the host thread.
+	 */
+	public static final int cudaErrorSetOnActiveProcess = 708;
+
+	/**
+	 * This error indicates that the context current to the calling thread has been destroyed using cuCtxDestroy, or is
+	 * a primary context which has not yet been initialized.
+	 */
+	public static final int cudaErrorContextIsDestroyed = 709;
+
+	/**
+	 * An assert triggered in device code during kernel execution. The device cannot be used again. All existing
+	 * allocations are invalid. To continue using CUDA, the process must be terminated and relaunched
+	 */
+	public static final int cudaErrorAssert = 710;
+
+	/**
+	 * This error indicates that the hardware resources required to enable peer access have been exhausted for one or
+	 * more of the devices passed to cudaEnablePeerAccess().
+	 */
+	public static final int cudaErrorTooManyPeers = 711;
+
+	/**
+	 * This error indicates that the memory range passed to cudaHostRegister() has already been registered.
+	 */
+	public static final int cudaErrorHostMemoryAlreadyRegistered = 712;
+
+	/**
+	 * This error indicates that the pointer passed to cudaHostUnregister() does not correspond to any currently
+	 * registered memory region.
+	 */
+	public static final int cudaErrorHostMemoryNotRegistered = 713;
+
+	/**
+	 * Device encountered an error in the call stack during kernel execution, possibly due to stack corruption or
+	 * exceeding the stack size limit. This leaves the process in an inconsistent state and any further CUDA work will
+	 * return the same error. To continue using CUDA, the process must be terminated and relaunched.
+	 */
+	public static final int cudaErrorHardwareStackError = 714;
+
+	/**
+	 * The device encountered an illegal instruction during kernel execution This leaves the process in an inconsistent
+	 * state and any further CUDA work will return the same error. To continue using CUDA, the process must be
+	 * terminated and relaunched.
+	 */
+	public static final int cudaErrorIllegalInstruction = 715;
+
+	/**
+	 * The device encountered a load or store instruction on a memory address which is not aligned. This leaves the
+	 * process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA,
+	 * the process must be terminated and relaunched.
+	 */
+	public static final int cudaErrorMisalignedAddress = 716;
+
+	/**
+	 * While executing a kernel, the device encountered an instruction which can only operate on memory locations in
+	 * certain address spaces (global, shared, or local), but was supplied a memory address not belonging to an allowed
+	 * address space. This leaves the process in an inconsistent state and any further CUDA work will return the same
+	 * error. To continue using CUDA, the process must be terminated and relaunched.
+	 */
+	public static final int cudaErrorInvalidAddressSpace = 717;
+
+	/**
+	 * The device encountered an invalid program counter. This leaves the process in an inconsistent state and any
+	 * further CUDA work will return the same error. To continue using CUDA, the process must be terminated and
+	 * relaunched.
+	 */
+	public static final int cudaErrorInvalidPc = 718;
+
+	/**
+	 * An exception occurred on the device while executing a kernel. Common causes include dereferencing an invalid
+	 * device pointer and accessing out of bounds shared memory. Less common cases can be system specific - more
+	 * information about these cases can be found in the system specific user guide. This leaves the process in an
+	 * inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must
+	 * be terminated and relaunched.
+	 */
+	public static final int cudaErrorLaunchFailure = 719;
+
+	/**
+	 * This error indicates that the number of blocks launched per grid for a kernel that was launched via either
+	 * cudaLaunchCooperativeKernel or cudaLaunchCooperativeKernelMultiDevice exceeds the maximum number of blocks as
+	 * allowed by cudaOccupancyMaxActiveBlocksPerMultiprocessor or
+	 * cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors as specified by the
+	 * device attribute cudaDevAttrMultiProcessorCount.
+	 */
+	public static final int cudaErrorCooperativeLaunchTooLarge = 720;
+
+	/**
+	 * An exception occurred on the device while exiting a kernel using tensor memory: the tensor memory was not
+	 * completely deallocated. This leaves the process in an inconsistent state and any further CUDA work will return
+	 * the same error. To continue using CUDA, the process must be terminated and relaunched.
+	 */
+	public static final int cudaErrorTensorMemoryLeak = 721;
+
+	/**
+	 * This error indicates the attempted operation is not permitted.
+	 */
+	public static final int cudaErrorNotPermitted = 800;
+
+	/**
+	 * This error indicates the attempted operation is not supported on the current system or device.
+	 */
+	public static final int cudaErrorNotSupported = 801;
+
+	/**
+	 * This error indicates that the system is not yet ready to start any CUDA work. To continue using CUDA, verify the
+	 * system configuration is in a valid state and all required driver daemons are actively running. More information
+	 * about this error can be found in the system specific user guide.
+	 */
+	public static final int cudaErrorSystemNotReady = 802;
+
+	/**
+	 * This error indicates that there is a mismatch between the versions of the display driver and the CUDA driver.
+	 * Refer to the compatibility documentation for supported versions.
+	 */
+	public static final int cudaErrorSystemDriverMismatch = 803;
+
+	/**
+	 * This error indicates that the system was upgraded to run with forward compatibility but the visible hardware
+	 * detected by CUDA does not support this configuration. Refer to the compatibility documentation for the supported
+	 * hardware matrix or ensure that only supported hardware is visible during initialization via the
+	 * CUDA_VISIBLE_DEVICES environment variable.
+	 */
+	public static final int cudaErrorCompatNotSupportedOnDevice = 804;
+
+	/**
+	 * This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS server.
+	 */
+	public static final int cudaErrorMpsConnectionFailed = 805;
+
+	/**
+	 * This error indicates that the remote procedural call between the MPS server and the MPS client failed.
+	 */
+	public static final int cudaErrorMpsRpcFailure = 806;
+
+	/**
+	 * This error indicates that the MPS server is not ready to accept new MPS client requests. This error can be
+	 * returned when the MPS server is in the process of recovering from a fatal failure.
+	 */
+	public static final int cudaErrorMpsServerNotReady = 807;
+
+	/**
+	 * This error indicates that the hardware resources required to create MPS client have been exhausted.
+	 */
+	public static final int cudaErrorMpsMaxClientsReached = 808;
+
+	/**
+	 * This error indicates the hardware resources required to device connections have been exhausted.
+	 */
+	public static final int cudaErrorMpsMaxConnectionsReached = 809;
+
+	/**
+	 * This error indicates that the MPS client has been terminated by the server. To continue using CUDA, the process
+	 * must be terminated and relaunched.
+	 */
+	public static final int cudaErrorMpsClientTerminated = 810;
+
+	/**
+	 * This error indicates, that the program is using CUDA Dynamic Parallelism, but the current configuration, like
+	 * MPS, does not support it.
+	 */
+	public static final int cudaErrorCdpNotSupported = 811;
+
+	/**
+	 * This error indicates, that the program contains an unsupported interaction between different versions of CUDA
+	 * Dynamic Parallelism.
+	 */
+	public static final int cudaErrorCdpVersionMismatch = 812;
+
+	/**
+	 * The operation is not permitted when the stream is capturing.
+	 */
+	public static final int cudaErrorStreamCaptureUnsupported = 900;
+
+	/**
+	 * The current capture sequence on the stream has been invalidated due to a previous error.
+	 */
+	public static final int cudaErrorStreamCaptureInvalidated = 901;
+
+	/**
+	 * The operation would have resulted in a merge of two independent capture sequences.
+	 */
+	public static final int cudaErrorStreamCaptureMerge = 902;
+
+	/**
+	 * The capture was not initiated in this stream.
+	 */
+	public static final int cudaErrorStreamCaptureUnmatched = 903;
+
+	/**
+	 * The capture sequence contains a fork that was not joined to the primary stream.
+	 */
+	public static final int cudaErrorStreamCaptureUnjoined = 904;
+
+	/**
+	 * A dependency would have been created which crosses the capture sequence boundary. Only implicit in-stream
+	 * ordering dependencies are allowed to cross the boundary.
+	 */
+	public static final int cudaErrorStreamCaptureIsolation = 905;
+
+	/**
+	 * The operation would have resulted in a disallowed implicit dependency on a current capture sequence from
+	 * cudaStreamLegacy.
+	 */
+	public static final int cudaErrorStreamCaptureImplicit = 906;
+
+	/**
+	 * The operation is not permitted on an event which was last recorded in a capturing stream.
+	 */
+	public static final int cudaErrorCapturedEvent = 907;
+
+	/**
+	 * A stream capture sequence not initiated with the cudaStreamCaptureModeRelaxed argument to cudaStreamBeginCapture
+	 * was passed to cudaStreamEndCapture in a different thread.
+	 */
+	public static final int cudaErrorStreamCaptureWrongThread = 908;
+
+	/**
+	 * This indicates that the wait operation has timed out.
+	 */
+	public static final int cudaErrorTimeout = 909;
+
+	/**
+	 * This error indicates that the graph update was not performed because it included changes which violated
+	 * constraints specific to instantiated graph update.
+	 */
+	public static final int cudaErrorGraphExecUpdateFailure = 910;
+
+	/**
+	 * This indicates that an async error has occurred in a device outside of CUDA. If CUDA was waiting for an external
+	 * device's signal before consuming shared data, the external device signaled an error indicating that the data is
+	 * not valid for consumption. This leaves the process in an inconsistent state and any further CUDA work will return
+	 * the same error. To continue using CUDA, the process must be terminated and relaunched.
+	 */
+	public static final int cudaErrorExternalDevice = 911;
+
+	/**
+	 * This indicates that a kernel launch error has occurred due to cluster misconfiguration.
+	 */
+	public static final int cudaErrorInvalidClusterSize = 912;
+
+	/**
+	 * Indiciates a function handle is not loaded when calling an API that requires a loaded function.
+	 */
+	public static final int cudaErrorFunctionNotLoaded = 913;
+
+	/**
+	 * This error indicates one or more resources passed in are not valid resource types for the operation.
+	 */
+	public static final int cudaErrorInvalidResourceType = 914;
+
+	/**
+	 * This error indicates one or more resources are insufficient or non-applicable for the operation.
+	 */
+	public static final int cudaErrorInvalidResourceConfiguration = 915;
+
+	/**
+	 * This indicates that an unknown internal error has occurred.
+	 */
+	public static final int cudaErrorUnknown = 999;
+
+	public static final int cudaErrorApiFailureBase = 10000;
+
+	/**
+	 * Returns the string representation of the passes error code.
+	 */
+	public static String errorString(int err){
+		return switch(err){
+			case cudaSuccess -> "cudaSuccess";
+			case cudaErrorInvalidValue -> "cudaErrorInvalidValue";
+			case cudaErrorMemoryAllocation -> "cudaErrorMemoryAllocation";
+			case cudaErrorInitializationError -> "cudaErrorInitializationError";
+			case cudaErrorCudartUnloading -> "cudaErrorCudartUnloading";
+			case cudaErrorProfilerDisabled -> "cudaErrorProfilerDisabled";
+			case cudaErrorProfilerNotInitialized -> "cudaErrorProfilerNotInitialized";
+			case cudaErrorProfilerAlreadyStarted -> "cudaErrorProfilerAlreadyStarted";
+			case cudaErrorProfilerAlreadyStopped -> "cudaErrorProfilerAlreadyStopped";
+			case cudaErrorInvalidConfiguration -> "cudaErrorInvalidConfiguration";
+			case cudaErrorInvalidPitchValue -> "cudaErrorInvalidPitchValue";
+			case cudaErrorInvalidSymbol -> "cudaErrorInvalidSymbol";
+			case cudaErrorInvalidHostPointer -> "cudaErrorInvalidHostPointer";
+			case cudaErrorInvalidDevicePointer -> "cudaErrorInvalidDevicePointer";
+			case cudaErrorInvalidTexture -> "cudaErrorInvalidTexture";
+			case cudaErrorInvalidTextureBinding -> "cudaErrorInvalidTextureBinding";
+			case cudaErrorInvalidChannelDescriptor -> "cudaErrorInvalidChannelDescriptor";
+			case cudaErrorInvalidMemcpyDirection -> "cudaErrorInvalidMemcpyDirection";
+			case cudaErrorAddressOfConstant -> "cudaErrorAddressOfConstant";
+			case cudaErrorTextureFetchFailed -> "cudaErrorTextureFetchFailed";
+			case cudaErrorTextureNotBound -> "cudaErrorTextureNotBound";
+			case cudaErrorSynchronizationError -> "cudaErrorSynchronizationError";
+			case cudaErrorInvalidFilterSetting -> "cudaErrorInvalidFilterSetting";
+			case cudaErrorInvalidNormSetting -> "cudaErrorInvalidNormSetting";
+			case cudaErrorMixedDeviceExecution -> "cudaErrorMixedDeviceExecution";
+			case cudaErrorNotYetImplemented -> "cudaErrorNotYetImplemented";
+			case cudaErrorMemoryValueTooLarge -> "cudaErrorMemoryValueTooLarge";
+			case cudaErrorStubLibrary -> "cudaErrorStubLibrary";
+			case cudaErrorInsufficientDriver -> "cudaErrorInsufficientDriver";
+			case cudaErrorCallRequiresNewerDriver -> "cudaErrorCallRequiresNewerDriver";
+			case cudaErrorInvalidSurface -> "cudaErrorInvalidSurface";
+			case cudaErrorDuplicateVariableName -> "cudaErrorDuplicateVariableName";
+			case cudaErrorDuplicateTextureName -> "cudaErrorDuplicateTextureName";
+			case cudaErrorDuplicateSurfaceName -> "cudaErrorDuplicateSurfaceName";
+			case cudaErrorDevicesUnavailable -> "cudaErrorDevicesUnavailable";
+			case cudaErrorIncompatibleDriverContext -> "cudaErrorIncompatibleDriverContext";
+			case cudaErrorMissingConfiguration -> "cudaErrorMissingConfiguration";
+			case cudaErrorPriorLaunchFailure -> "cudaErrorPriorLaunchFailure";
+			case cudaErrorLaunchMaxDepthExceeded -> "cudaErrorLaunchMaxDepthExceeded";
+			case cudaErrorLaunchFileScopedTex -> "cudaErrorLaunchFileScopedTex";
+			case cudaErrorLaunchFileScopedSurf -> "cudaErrorLaunchFileScopedSurf";
+			case cudaErrorSyncDepthExceeded -> "cudaErrorSyncDepthExceeded";
+			case cudaErrorLaunchPendingCountExceeded -> "cudaErrorLaunchPendingCountExceeded";
+			case cudaErrorInvalidDeviceFunction -> "cudaErrorInvalidDeviceFunction";
+			case cudaErrorNoDevice -> "cudaErrorNoDevice";
+			case cudaErrorInvalidDevice -> "cudaErrorInvalidDevice";
+			case cudaErrorDeviceNotLicensed -> "cudaErrorDeviceNotLicensed";
+			case cudaErrorSoftwareValidityNotEstablished -> "cudaErrorSoftwareValidityNotEstablished";
+			case cudaErrorStartupFailure -> "cudaErrorStartupFailure";
+			case cudaErrorInvalidKernelImage -> "cudaErrorInvalidKernelImage";
+			case cudaErrorDeviceUninitialized -> "cudaErrorDeviceUninitialized";
+			case cudaErrorMapBufferObjectFailed -> "cudaErrorMapBufferObjectFailed";
+			case cudaErrorUnmapBufferObjectFailed -> "cudaErrorUnmapBufferObjectFailed";
+			case cudaErrorArrayIsMapped -> "cudaErrorArrayIsMapped";
+			case cudaErrorAlreadyMapped -> "cudaErrorAlreadyMapped";
+			case cudaErrorNoKernelImageForDevice -> "cudaErrorNoKernelImageForDevice";
+			case cudaErrorAlreadyAcquired -> "cudaErrorAlreadyAcquired";
+			case cudaErrorNotMapped -> "cudaErrorNotMapped";
+			case cudaErrorNotMappedAsArray -> "cudaErrorNotMappedAsArray";
+			case cudaErrorNotMappedAsPointer -> "cudaErrorNotMappedAsPointer";
+			case cudaErrorECCUncorrectable -> "cudaErrorECCUncorrectable";
+			case cudaErrorUnsupportedLimit -> "cudaErrorUnsupportedLimit";
+			case cudaErrorDeviceAlreadyInUse -> "cudaErrorDeviceAlreadyInUse";
+			case cudaErrorPeerAccessUnsupported -> "cudaErrorPeerAccessUnsupported";
+			case cudaErrorInvalidPtx -> "cudaErrorInvalidPtx";
+			case cudaErrorInvalidGraphicsContext -> "cudaErrorInvalidGraphicsContext";
+			case cudaErrorNvlinkUncorrectable -> "cudaErrorNvlinkUncorrectable";
+			case cudaErrorJitCompilerNotFound -> "cudaErrorJitCompilerNotFound";
+			case cudaErrorUnsupportedPtxVersion -> "cudaErrorUnsupportedPtxVersion";
+			case cudaErrorJitCompilationDisabled -> "cudaErrorJitCompilationDisabled";
+			case cudaErrorUnsupportedExecAffinity -> "cudaErrorUnsupportedExecAffinity";
+			case cudaErrorUnsupportedDevSideSync -> "cudaErrorUnsupportedDevSideSync";
+			case cudaErrorContained -> "cudaErrorContained";
+			case cudaErrorInvalidSource -> "cudaErrorInvalidSource";
+			case cudaErrorFileNotFound -> "cudaErrorFileNotFound";
+			case cudaErrorSharedObjectSymbolNotFound -> "cudaErrorSharedObjectSymbolNotFound";
+			case cudaErrorSharedObjectInitFailed -> "cudaErrorSharedObjectInitFailed";
+			case cudaErrorOperatingSystem -> "cudaErrorOperatingSystem";
+			case cudaErrorInvalidResourceHandle -> "cudaErrorInvalidResourceHandle";
+			case cudaErrorIllegalState -> "cudaErrorIllegalState";
+			case cudaErrorLossyQuery -> "cudaErrorLossyQuery";
+			case cudaErrorSymbolNotFound -> "cudaErrorSymbolNotFound";
+			case cudaErrorNotReady -> "cudaErrorNotReady";
+			case cudaErrorIllegalAddress -> "cudaErrorIllegalAddress";
+			case cudaErrorLaunchOutOfResources -> "cudaErrorLaunchOutOfResources";
+			case cudaErrorLaunchTimeout -> "cudaErrorLaunchTimeout";
+			case cudaErrorLaunchIncompatibleTexturing -> "cudaErrorLaunchIncompatibleTexturing";
+			case cudaErrorPeerAccessAlreadyEnabled -> "cudaErrorPeerAccessAlreadyEnabled";
+			case cudaErrorPeerAccessNotEnabled -> "cudaErrorPeerAccessNotEnabled";
+			case cudaErrorSetOnActiveProcess -> "cudaErrorSetOnActiveProcess";
+			case cudaErrorContextIsDestroyed -> "cudaErrorContextIsDestroyed";
+			case cudaErrorAssert -> "cudaErrorAssert";
+			case cudaErrorTooManyPeers -> "cudaErrorTooManyPeers";
+			case cudaErrorHostMemoryAlreadyRegistered -> "cudaErrorHostMemoryAlreadyRegistered";
+			case cudaErrorHostMemoryNotRegistered -> "cudaErrorHostMemoryNotRegistered";
+			case cudaErrorHardwareStackError -> "cudaErrorHardwareStackError";
+			case cudaErrorIllegalInstruction -> "cudaErrorIllegalInstruction";
+			case cudaErrorMisalignedAddress -> "cudaErrorMisalignedAddress";
+			case cudaErrorInvalidAddressSpace -> "cudaErrorInvalidAddressSpace";
+			case cudaErrorInvalidPc -> "cudaErrorInvalidPc";
+			case cudaErrorLaunchFailure -> "cudaErrorLaunchFailure";
+			case cudaErrorCooperativeLaunchTooLarge -> "cudaErrorCooperativeLaunchTooLarge";
+			case cudaErrorTensorMemoryLeak -> "cudaErrorTensorMemoryLeak";
+			case cudaErrorNotPermitted -> "cudaErrorNotPermitted";
+			case cudaErrorNotSupported -> "cudaErrorNotSupported";
+			case cudaErrorSystemNotReady -> "cudaErrorSystemNotReady";
+			case cudaErrorSystemDriverMismatch -> "cudaErrorSystemDriverMismatch";
+			case cudaErrorCompatNotSupportedOnDevice -> "cudaErrorCompatNotSupportedOnDevice";
+			case cudaErrorMpsConnectionFailed -> "cudaErrorMpsConnectionFailed";
+			case cudaErrorMpsRpcFailure -> "cudaErrorMpsRpcFailure";
+			case cudaErrorMpsServerNotReady -> "cudaErrorMpsServerNotReady";
+			case cudaErrorMpsMaxClientsReached -> "cudaErrorMpsMaxClientsReached";
+			case cudaErrorMpsMaxConnectionsReached -> "cudaErrorMpsMaxConnectionsReached";
+			case cudaErrorMpsClientTerminated -> "cudaErrorMpsClientTerminated";
+			case cudaErrorCdpNotSupported -> "cudaErrorCdpNotSupported";
+			case cudaErrorCdpVersionMismatch -> "cudaErrorCdpVersionMismatch";
+			case cudaErrorStreamCaptureUnsupported -> "cudaErrorStreamCaptureUnsupported";
+			case cudaErrorStreamCaptureInvalidated -> "cudaErrorStreamCaptureInvalidated";
+			case cudaErrorStreamCaptureMerge -> "cudaErrorStreamCaptureMerge";
+			case cudaErrorStreamCaptureUnmatched -> "cudaErrorStreamCaptureUnmatched";
+			case cudaErrorStreamCaptureUnjoined -> "cudaErrorStreamCaptureUnjoined";
+			case cudaErrorStreamCaptureIsolation -> "cudaErrorStreamCaptureIsolation";
+			case cudaErrorStreamCaptureImplicit -> "cudaErrorStreamCaptureImplicit";
+			case cudaErrorCapturedEvent -> "cudaErrorCapturedEvent";
+			case cudaErrorStreamCaptureWrongThread -> "cudaErrorStreamCaptureWrongThread";
+			case cudaErrorTimeout -> "cudaErrorTimeout";
+			case cudaErrorGraphExecUpdateFailure -> "cudaErrorGraphExecUpdateFailure";
+			case cudaErrorExternalDevice -> "cudaErrorExternalDevice";
+			case cudaErrorInvalidClusterSize -> "cudaErrorInvalidClusterSize";
+			case cudaErrorFunctionNotLoaded -> "cudaErrorFunctionNotLoaded";
+			case cudaErrorInvalidResourceType -> "cudaErrorInvalidResourceType";
+			case cudaErrorInvalidResourceConfiguration -> "cudaErrorInvalidResourceConfiguration";
+			case cudaErrorUnknown -> "cudaErrorUnknown";
+			case cudaErrorApiFailureBase -> "cudaErrorApiFailureBase";
+			default -> "Invalid error";
+		};
+	}
+
+	private CudaError() {
+		// prevent instantiation.
+	}
+
+}
diff --git a/src/main/java/org/apache/sysds/cujava/runtime/CudaMemcpyKind.java b/src/main/java/org/apache/sysds/cujava/runtime/CudaMemcpyKind.java
new file mode 100644
index 00000000000..900cc153e9a
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/runtime/CudaMemcpyKind.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.runtime;
+
+public class CudaMemcpyKind {
+
+	/**
+	 * Host -> Host
+	 */
+	public static final int cudaMemcpyHostToHost = 0;
+
+	/**
+	 * Host -> Device
+	 */
+	public static final int cudaMemcpyHostToDevice = 1;
+
+	/**
+	 * Device -> Host
+	 */
+	public static final int cudaMemcpyDeviceToHost = 2;
+
+	/**
+	 * Device -> Device
+	 */
+	public static final int cudaMemcpyDeviceToDevice = 3;
+
+	/**
+	 * Autodetect the copy direction (host↔device or device↔device) based on the source and destination pointers.
+	 * Requires Unified Virtual Addressing (UVA).
+	 */
+	public static final int cudaMemcpyDefault = 4;
+
+	private CudaMemcpyKind() {
+		// Private constructor to prevent instantiation.
+	}
+}
diff --git a/src/main/java/org/apache/sysds/cujava/runtime/cudaStream_t.java b/src/main/java/org/apache/sysds/cujava/runtime/cudaStream_t.java
new file mode 100644
index 00000000000..60a6f6e2abe
--- /dev/null
+++ b/src/main/java/org/apache/sysds/cujava/runtime/cudaStream_t.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.cujava.runtime;
+
+import org.apache.sysds.cujava.NativePointerObject;
+import org.apache.sysds.cujava.driver.CUstream;
+
+public class cudaStream_t extends NativePointerObject {
+
+	public cudaStream_t() {
+	}
+
+	public cudaStream_t(CUstream stream) {
+		super(stream);
+	}
+
+	cudaStream_t(long value) {
+		super(value);
+	}
+}