Add CI for inline-c-cuda

junjihashimoto · junjihashimoto · commit 6d4f4541eac2 · 2023-09-30T06:14:20.000+09:00
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -37,16 +37,19 @@ jobs:
       - name: Build
         run: |
           if [ ${{ matrix.os }} == "ubuntu-latest" ] ; then
-            stack build --stack-yaml stack-${{ matrix.stackage }}.yaml --flag inline-c:gsl-example --flag inline-c-cpp:std-vector-example
+            stack build --stack-yaml stack-${{ matrix.stackage }}.yaml --flag inline-c:gsl-example --flag inline-c-cpp:std-vector-example --flag inline-c-cuda:test-without-cuda
           else
-            stack build --stack-yaml stack-${{ matrix.stackage }}.yaml --flag inline-c-cpp:std-vector-example
+            stack build --stack-yaml stack-${{ matrix.stackage }}.yaml --flag inline-c-cpp:std-vector-example --flag inline-c-cuda:test-without-cuda
           fi
       - name: Test
         run: |
           if [ ${{ matrix.os }} == "ubuntu-latest" ] ; then
-            stack test --stack-yaml stack-${{ matrix.stackage }}.yaml --flag inline-c:gsl-example --flag inline-c-cpp:std-vector-example
+            stack test --stack-yaml stack-${{ matrix.stackage }}.yaml --flag inline-c:gsl-example --flag inline-c-cpp:std-vector-example --flag inline-c-cuda:test-without-cuda
             ./inline-c-cpp/test-error-message-line-numbers.sh --stack-yaml stack-${{ matrix.stackage }}.yaml
           else
-            stack test --stack-yaml stack-${{ matrix.stackage }}.yaml --flag inline-c-cpp:std-vector-example
+            stack test --stack-yaml stack-${{ matrix.stackage }}.yaml --flag inline-c-cpp:std-vector-example --flag inline-c-cuda:test-without-cuda
             ./inline-c-cpp/test-error-message-line-numbers.sh --stack-yaml stack-${{ matrix.stackage }}.yaml
           fi
+        env:
+          INLINE_C_CUDA_SUFFIX: cc
+          INLINE_C_CUDA_COMPILER: g++
diff --git a/inline-c-cuda/inline-c-cuda.cabal b/inline-c-cuda/inline-c-cuda.cabal
@@ -6,17 +6,21 @@ description:         Utilities to inline CUDA code into Haskell using inline-c.
                      tests for example on how to build.
 license:             MIT
 license-file:        LICENSE
-author:              Francesco Mazzoli
-maintainer:          f@mazzo.li
-copyright:           (c) 2015-2016 FP Complete Corporation, (c) 2017-2019 Francesco Mazzoli
+author:              Junji Hashimoto
+maintainer:          junji.hashimoto@gmail.com
+copyright:           (c) 2015-2016 FP Complete Corporation, (c) 2023 Junji Hashimoto
 category:            FFI
-tested-with:         GHC == 9.2.2
+tested-with:         GHC == 9.2.8, GHC == 9.4.7, GHC == 9.6.2
 build-type:          Simple
 
 source-repository head
   type:     git
   location: https://github.com/fpco/inline-c
 
+flag test-without-cuda
+  description:         Test without cuda
+  default:             False
+
 library
   exposed-modules:     Language.C.Inline.Cuda
   build-depends:       base >=4.7 && <5
@@ -31,7 +35,11 @@ library
   hs-source-dirs:      src
   default-language:    Haskell2010
   ghc-options:         -Wall
-  extra-libraries:     cudart
+  if flag(test-without-cuda)
+    cpp-options:       -DTEST_WITHOUT_CUDA
+  else
+    extra-libraries:   cudart
+
 
 test-suite tests
   type:                exitcode-stdio-1.0
@@ -48,3 +56,5 @@ test-suite tests
                      , template-haskell
                      , vector
   default-language:    Haskell2010
+  if flag(test-without-cuda)
+    cpp-options:       -DTEST_WITHOUT_CUDA
diff --git a/inline-c-cuda/src/Language/C/Inline/Cuda.hs b/inline-c-cuda/src/Language/C/Inline/Cuda.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE CPP #-}
 {-# LANGUAGE QuasiQuotes #-}
 {-# LANGUAGE TemplateHaskell #-}
 {-# LANGUAGE OverloadedStrings #-}
@@ -25,12 +26,21 @@ import qualified Data.Map as Map
 import           Control.Monad.IO.Class (liftIO)
 import           System.Exit (ExitCode(..))
 import           System.Process (readProcessWithExitCode)
+import           System.Environment (lookupEnv)
+import           Data.Maybe (fromMaybe)
 
 compileCuda :: String -> TH.Q FilePath
 compileCuda src = do
-  cuFile <- TH.addTempFile "cu"
+#ifdef TEST_WITHOUT_CUDA
+  nvcc <- fromMaybe "g++" <$> TH.runIO (lookupEnv "INLINE_C_CUDA_COMPILER")
+  cu <- fromMaybe "cc" <$> TH.runIO (lookupEnv "INLINE_C_CUDA_SUFFIX")
+#else
+  nvcc <- fromMaybe "nvcc" <$> TH.runIO (lookupEnv "INLINE_C_CUDA_COMPILER")
+  cu <- fromMaybe "cu" <$> TH.runIO (lookupEnv "INLINE_C_CUDA_SUFFIX")
+#endif
   oFile <- TH.addTempFile "o"
-  let (cmd,args) = ("nvcc", ["-c","-o",oFile, cuFile])
+  cuFile <- TH.addTempFile cu
+  let (cmd,args) = (nvcc, ["-c", "-o", oFile, cuFile])
   (code, stdout, stderr) <- liftIO $ do
     writeFile cuFile src
     readProcessWithExitCode cmd args ""
diff --git a/inline-c-cuda/test/tests.hs b/inline-c-cuda/test/tests.hs
@@ -37,6 +37,57 @@ C.context $ C.cudaCtx
 
 C.include "<iostream>"
 C.include "<stdexcept>"
+C.include "<cstring>"
+
+#ifdef TEST_WITHOUT_CUDA
+
+[C.emitBlock|
+
+void
+vectorAdd(int blocksPerGrid, int threadsPerBlock, const float *A, const float *B, float *C, int numElements)
+{
+    for(int blockIdx = 0; blockIdx < blocksPerGrid ; blockIdx++){
+      int blockDim = threadsPerBlock;
+      for(int threadIdx = 0; threadIdx < threadsPerBlock ; threadIdx++){
+        int i = blockDim * blockIdx + threadIdx;
+        
+        if (i < numElements)
+        {
+            C[i] = A[i] + B[i];
+        }
+      }
+    }
+}
+
+
+typedef int cudaError_t;
+const int cudaSuccess = 1;
+
+cudaError_t cudaMalloc(void** dst, size_t size){
+  *dst = malloc(size);
+  return cudaSuccess;
+}
+
+cudaError_t cudaFree(void* dst){
+  free(dst);
+  return cudaSuccess;
+}
+
+const int cudaMemcpyHostToDevice = 0;
+const int cudaMemcpyDeviceToHost = 1;
+
+cudaError_t cudaMemcpy(void *dst, void *src, size_t nbytes, int direction){
+  memcpy(dst, src, nbytes);
+  return cudaSuccess;
+}
+
+char* cudaGetErrorString(cudaError_t err){
+  return "";
+}
+
+|]
+
+#else
 
 [C.emitBlock|
 __global__ void
@@ -51,6 +102,8 @@ vectorAdd(const float *A, const float *B, float *C, int numElements)
 }
 |]
 
+#endif
+
 cudaAllocaArray :: forall b. Int -> (Ptr C.CFloat -> IO b) -> IO b
 cudaAllocaArray size func = do
   let csize = fromIntegral size
@@ -121,11 +174,19 @@ main = Hspec.hspec $ do
                   } |]
                   cudaMemcpyHostToDevice numElements h_A d_A
                   cudaMemcpyHostToDevice numElements h_B d_B
+#ifdef TEST_WITHOUT_CUDA
+                  [C.block| void {
+                    const int threadsPerBlock = 256;
+                    const int blocksPerGrid =($(int cNumElements) + threadsPerBlock - 1) / threadsPerBlock;
+                    vectorAdd(blocksPerGrid, threadsPerBlock, $(float* d_A), $(float* d_B), $(float* d_C), $(int cNumElements));
+                  } |]
+#else
                   [C.block| void {
-                    int threadsPerBlock = 256;
-                    int blocksPerGrid =($(int cNumElements) + threadsPerBlock - 1) / threadsPerBlock;
+                    const int threadsPerBlock = 256;
+                    const int blocksPerGrid =($(int cNumElements) + threadsPerBlock - 1) / threadsPerBlock;
                     vectorAdd<<<blocksPerGrid, threadsPerBlock>>>($(float* d_A), $(float* d_B), $(float* d_C), $(int cNumElements));
                   } |]
+#endif
                   cudaMemcpyDeviceToHost numElements d_C h_C
                   lA <- peekArray numElements h_A
                   lB <- peekArray numElements h_B
diff --git a/stack-lts-20.yaml b/stack-lts-20.yaml
@@ -3,4 +3,5 @@ packages:
 - inline-c
 - inline-c-cpp
 - inline-c-objc
+- inline-c-cuda
 - sample-cabal-project
diff --git a/stack-lts-21.yaml b/stack-lts-21.yaml
@@ -3,4 +3,5 @@ packages:
 - inline-c
 - inline-c-cpp
 - inline-c-objc
+- inline-c-cuda
 - sample-cabal-project
diff --git a/stack-nightly.yaml b/stack-nightly.yaml
@@ -3,4 +3,5 @@ packages:
 - inline-c
 - inline-c-cpp
 - inline-c-objc
+- inline-c-cuda
 - sample-cabal-project