Skip to content

Commit cc60d93

Browse files
Merge pull request #147 from junjihashimoto/feature/cuda
Add inline-c-cuda package for CUDA
2 parents cc71806 + 63ac4ff commit cc60d93

File tree

9 files changed

+282
-2
lines changed

9 files changed

+282
-2
lines changed

inline-c-cuda/LICENSE

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
Copyright (c) 2015 FP Complete Corporation.
2+
3+
Permission is hereby granted, free of charge, to any person obtaining
4+
a copy of this software and associated documentation files (the
5+
"Software"), to deal in the Software without restriction, including
6+
without limitation the rights to use, copy, modify, merge, publish,
7+
distribute, sublicense, and/or sell copies of the Software, and to
8+
permit persons to whom the Software is furnished to do so, subject to
9+
the following conditions:
10+
11+
The above copyright notice and this permission notice shall be
12+
included in all copies or substantial portions of the Software.
13+
14+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

inline-c-cuda/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Small set of utilities to inline CUDA code. See tests for example.

inline-c-cuda/Setup.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
import Distribution.Simple
2+
main = defaultMain

inline-c-cuda/inline-c-cuda.cabal

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
cabal-version: 2.2
2+
name: inline-c-cuda
3+
version: 0.1.0.0
4+
synopsis: Lets you embed CUDA code into Haskell.
5+
description: Utilities to inline CUDA code into Haskell using inline-c. See
6+
tests for example on how to build.
7+
license: MIT
8+
license-file: LICENSE
9+
author: Francesco Mazzoli
10+
maintainer: [email protected]
11+
copyright: (c) 2015-2016 FP Complete Corporation, (c) 2017-2019 Francesco Mazzoli
12+
category: FFI
13+
tested-with: GHC == 9.2.2
14+
build-type: Simple
15+
16+
source-repository head
17+
type: git
18+
location: https://github.com/fpco/inline-c
19+
20+
library
21+
exposed-modules: Language.C.Inline.Cuda
22+
build-depends: base >=4.7 && <5
23+
, bytestring
24+
, inline-c >= 0.9.0.0
25+
, inline-c-cpp
26+
, template-haskell
27+
, text
28+
, safe-exceptions
29+
, containers
30+
, process
31+
hs-source-dirs: src
32+
default-language: Haskell2010
33+
ghc-options: -Wall
34+
extra-libraries: cudart
35+
36+
test-suite tests
37+
type: exitcode-stdio-1.0
38+
hs-source-dirs: test
39+
main-is: tests.hs
40+
build-depends: base >=4 && <5
41+
, bytestring
42+
, inline-c
43+
, inline-c-cpp
44+
, inline-c-cuda
45+
, safe-exceptions
46+
, hspec
47+
, containers
48+
, template-haskell
49+
, vector
50+
default-language: Haskell2010
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
{-# LANGUAGE QuasiQuotes #-}
2+
{-# LANGUAGE TemplateHaskell #-}
3+
{-# LANGUAGE OverloadedStrings #-}
4+
5+
-- | Module exposing a 'Context' to inline CUDA code. We only have used
6+
-- this for experiments, so use with caution. See the CUDA tests to see
7+
-- how to build inline CUDA code.
8+
module Language.C.Inline.Cuda
9+
( module Language.C.Inline
10+
, cudaCtx
11+
, Cpp.cppTypePairs
12+
, Cpp.using
13+
, Cpp.AbstractCppExceptionPtr
14+
) where
15+
16+
import qualified Language.Haskell.TH as TH
17+
import qualified Language.Haskell.TH.Syntax as TH
18+
19+
import Language.C.Inline
20+
import Language.C.Inline.Context
21+
import qualified Language.C.Types as CT
22+
import qualified Language.C.Inline.Cpp as Cpp
23+
24+
import qualified Data.Map as Map
25+
import Control.Monad.IO.Class (liftIO)
26+
import System.Exit (ExitCode(..))
27+
import System.Process (readProcessWithExitCode)
28+
29+
compileCuda :: String -> TH.Q FilePath
30+
compileCuda src = do
31+
cuFile <- TH.addTempFile "cu"
32+
oFile <- TH.addTempFile "o"
33+
let (cmd,args) = ("nvcc", ["-c","-o",oFile, cuFile])
34+
(code, stdout, stderr) <- liftIO $ do
35+
writeFile cuFile src
36+
readProcessWithExitCode cmd args ""
37+
case code of
38+
ExitFailure _ -> fail $ "Compile Command: " ++ (foldl (\a b -> a ++ " " ++ b) " " (cmd : args)) ++ "\n" ++ " Output: " ++ stdout ++ "\n" ++ " Error: " ++ stderr
39+
ExitSuccess -> return oFile
40+
41+
-- | The equivalent of 'C.baseCtx' for CUDA. It specifies the @.cu@
42+
-- file extension for the CUDA file, so that nvcc will decide to build CUDA
43+
-- instead of C. See the @.cabal@ test target for an example on how to
44+
-- build.
45+
cudaCtx :: Context
46+
cudaCtx = Cpp.cppCtx <> mempty
47+
{ ctxForeignSrcLang = Just TH.RawObject
48+
, ctxOutput = Just $ \s -> "extern \"C\" {\n" ++ s ++ "\n}"
49+
, ctxEnableCpp = True
50+
, ctxRawObjectCompile = Just compileCuda
51+
, ctxTypesTable = Map.singleton (CT.TypeName "std::exception_ptr") [t|Cpp.AbstractCppExceptionPtr|]
52+
}
53+

inline-c-cuda/test/tests.hs

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
{-# LANGUAGE CPP #-}
2+
{-# LANGUAGE ConstraintKinds #-}
3+
{-# LANGUAGE DataKinds #-}
4+
{-# LANGUAGE FlexibleContexts #-}
5+
{-# LANGUAGE FlexibleInstances #-}
6+
{-# LANGUAGE GADTs #-}
7+
{-# LANGUAGE KindSignatures #-}
8+
{-# LANGUAGE MultiParamTypeClasses #-}
9+
{-# LANGUAGE OverloadedStrings #-}
10+
{-# LANGUAGE PolyKinds #-}
11+
{-# LANGUAGE QuasiQuotes #-}
12+
{-# LANGUAGE RankNTypes #-}
13+
{-# LANGUAGE ScopedTypeVariables #-}
14+
{-# LANGUAGE TemplateHaskell #-}
15+
{-# LANGUAGE TypeFamilies #-}
16+
{-# LANGUAGE TypeInType #-}
17+
{-# LANGUAGE TypeOperators #-}
18+
{-# LANGUAGE UndecidableInstances #-}
19+
{-# LANGUAGE TypeApplications #-}
20+
{-# OPTIONS_GHC -Wno-deprecations #-}
21+
22+
import Control.Exception.Safe
23+
import Control.Monad
24+
import qualified Language.C.Inline.Context as CC
25+
import qualified Language.C.Types as CT
26+
import qualified Language.C.Inline.Cuda as C
27+
import qualified Test.Hspec as Hspec
28+
import Test.Hspec (shouldBe)
29+
import Foreign.Ptr (Ptr)
30+
import Data.Monoid
31+
import Foreign.Marshal.Array
32+
import Foreign.Marshal.Alloc
33+
import Foreign.Storable
34+
35+
36+
C.context $ C.cudaCtx
37+
38+
C.include "<iostream>"
39+
C.include "<stdexcept>"
40+
41+
[C.emitBlock|
42+
__global__ void
43+
vectorAdd(const float *A, const float *B, float *C, int numElements)
44+
{
45+
int i = blockDim.x * blockIdx.x + threadIdx.x;
46+
47+
if (i < numElements)
48+
{
49+
C[i] = A[i] + B[i];
50+
}
51+
}
52+
|]
53+
54+
cudaAllocaArray :: forall b. Int -> (Ptr C.CFloat -> IO b) -> IO b
55+
cudaAllocaArray size func = do
56+
let csize = fromIntegral size
57+
alloca $ \(ptr_d_A :: Ptr (Ptr C.CFloat)) -> do
58+
[C.block| void {
59+
cudaError_t err = cudaMalloc((void **)$(float** ptr_d_A), $(int csize) * sizeof(float));
60+
if (err != cudaSuccess)
61+
{
62+
fprintf(stderr, "Failed to allocate device vector C (error code %s)!\n", cudaGetErrorString(err));
63+
exit(EXIT_FAILURE);
64+
}
65+
} |]
66+
d_A <- peekElemOff ptr_d_A 0
67+
ret <- func d_A
68+
[C.block| void {
69+
cudaError_t err = cudaFree($(float* d_A));
70+
if (err != cudaSuccess)
71+
{
72+
fprintf(stderr, "Failed to free device vector A (error code %s)!\n", cudaGetErrorString(err));
73+
exit(EXIT_FAILURE);
74+
}
75+
} |]
76+
return ret
77+
78+
cudaMemcpyHostToDevice :: Int -> Ptr C.CFloat -> Ptr C.CFloat -> IO ()
79+
cudaMemcpyHostToDevice num host device = do
80+
let cnum = fromIntegral num
81+
[C.block| void {
82+
cudaError_t err = cudaMemcpy($(float* device), $(float* host), $(int cnum) * sizeof(float), cudaMemcpyHostToDevice);
83+
if (err != cudaSuccess)
84+
{
85+
fprintf(stderr, "Failed to copy vector from host to device (error code %s)!\n", cudaGetErrorString(err));
86+
exit(EXIT_FAILURE);
87+
}
88+
} |]
89+
90+
cudaMemcpyDeviceToHost :: Int -> Ptr C.CFloat -> Ptr C.CFloat -> IO ()
91+
cudaMemcpyDeviceToHost num device host = do
92+
let cnum = fromIntegral num
93+
[C.block| void {
94+
cudaError_t err = cudaMemcpy($(float* host), $(float* device), $(int cnum) * sizeof(float), cudaMemcpyDeviceToHost);
95+
if (err != cudaSuccess)
96+
{
97+
fprintf(stderr, "Failed to copy vector C from device to host (error code %s)!\n", cudaGetErrorString(err));
98+
exit(EXIT_FAILURE);
99+
}
100+
} |]
101+
102+
103+
main :: IO ()
104+
main = Hspec.hspec $ do
105+
Hspec.describe "Basic CUDA" $ do
106+
Hspec.it "Add vectors on device" $ do
107+
let numElements = 50000
108+
cNumElements = fromIntegral numElements
109+
allocaArray numElements $ \(h_A :: Ptr C.CFloat) -> do
110+
allocaArray numElements $ \(h_B :: Ptr C.CFloat) -> do
111+
allocaArray numElements $ \(h_C :: Ptr C.CFloat) -> do
112+
cudaAllocaArray numElements $ \(d_A :: Ptr C.CFloat) -> do
113+
cudaAllocaArray numElements $ \(d_B :: Ptr C.CFloat) -> do
114+
cudaAllocaArray numElements $ \(d_C :: Ptr C.CFloat) -> do
115+
[C.block| void {
116+
for (int i = 0; i < $(int cNumElements); ++i)
117+
{
118+
$(float* h_A)[i] = rand()/(float)RAND_MAX;
119+
$(float* h_B)[i] = rand()/(float)RAND_MAX;
120+
}
121+
} |]
122+
cudaMemcpyHostToDevice numElements h_A d_A
123+
cudaMemcpyHostToDevice numElements h_B d_B
124+
[C.block| void {
125+
int threadsPerBlock = 256;
126+
int blocksPerGrid =($(int cNumElements) + threadsPerBlock - 1) / threadsPerBlock;
127+
vectorAdd<<<blocksPerGrid, threadsPerBlock>>>($(float* d_A), $(float* d_B), $(float* d_C), $(int cNumElements));
128+
} |]
129+
cudaMemcpyDeviceToHost numElements d_C h_C
130+
lA <- peekArray numElements h_A
131+
lB <- peekArray numElements h_B
132+
lC <- peekArray numElements h_C
133+
all (< 1e-5) (map (\((a,b),c) -> abs(a + b - c)) (zip (zip lA lB) lC)) `shouldBe` True

inline-c/src/Language/C/Inline.hs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ module Language.C.Inline
4242
, block
4343
, include
4444
, verbatim
45+
, emitBlock
4546

4647
-- * 'Ptr' utils
4748
, withPtr

inline-c/src/Language/C/Inline/Context.hs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,8 @@ data Context = Context
161161
, ctxForeignSrcLang :: Maybe TH.ForeignSrcLang
162162
-- ^ TH.LangC by default
163163
, ctxEnableCpp :: Bool
164+
-- ^ Compile source code to raw object.
165+
, ctxRawObjectCompile :: Maybe (String -> TH.Q FilePath)
164166
}
165167

166168

@@ -172,6 +174,7 @@ instance Semigroup Context where
172174
, ctxOutput = ctxOutput ctx1 <|> ctxOutput ctx2
173175
, ctxForeignSrcLang = ctxForeignSrcLang ctx1 <|> ctxForeignSrcLang ctx2
174176
, ctxEnableCpp = ctxEnableCpp ctx1 || ctxEnableCpp ctx2
177+
, ctxRawObjectCompile = ctxRawObjectCompile ctx1 <|> ctxRawObjectCompile ctx2
175178
}
176179
#endif
177180

@@ -182,6 +185,7 @@ instance Monoid Context where
182185
, ctxOutput = Nothing
183186
, ctxForeignSrcLang = Nothing
184187
, ctxEnableCpp = False
188+
, ctxRawObjectCompile = Nothing
185189
}
186190

187191
#if !MIN_VERSION_base(4,11,0)
@@ -191,6 +195,7 @@ instance Monoid Context where
191195
, ctxOutput = ctxOutput ctx1 <|> ctxOutput ctx2
192196
, ctxForeignSrcLang = ctxForeignSrcLang ctx1 <|> ctxForeignSrcLang ctx2
193197
, ctxEnableCpp = ctxEnableCpp ctx1 || ctxEnableCpp ctx2
198+
, ctxRawObjectCompile = ctxRawObjectCompile ctx1 <|> ctxRawObjectCompile ctx2
194199
}
195200
#endif
196201

inline-c/src/Language/C/Inline/Internal.hs

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ module Language.C.Inline.Internal
3232

3333
-- ** Emitting C code
3434
, emitVerbatim
35+
, emitBlock
3536

3637
-- ** Inlining C code
3738
-- $embedding
@@ -168,11 +169,16 @@ initialiseModuleState mbContext = do
168169
Nothing -> fail "inline-c: ModuleState not present (initialiseModuleState)"
169170
Just ms -> return ms
170171
let lang = fromMaybe TH.LangC (ctxForeignSrcLang context)
172+
addForeignSource =
171173
#if MIN_VERSION_base(4,12,0)
172-
TH.addForeignSource lang (concat (reverse (msFileChunks ms)))
174+
TH.addForeignSource
173175
#else
174-
TH.addForeignFile lang (concat (reverse (msFileChunks ms)))
176+
TH.addForeignFile
175177
#endif
178+
src = (concat (reverse (msFileChunks ms)))
179+
case (lang, ctxRawObjectCompile context) of
180+
(TH.RawObject, Just compile) -> compile src >>= TH.addForeignFilePath lang
181+
(_, _) -> addForeignSource lang src
176182
let moduleState = ModuleState
177183
{ msContext = context
178184
, msGeneratedNames = 0
@@ -234,6 +240,15 @@ emitVerbatim s = do
234240
(ms{msFileChunks = chunk : msFileChunks ms}, ())
235241
return []
236242

243+
-- | Simply appends some string of block to the module's C file. Use with care.
244+
emitBlock :: TH.QuasiQuoter
245+
emitBlock = TH.QuasiQuoter
246+
{ TH.quoteExp = const $ fail "inline-c: quoteExp not implemented (quoteCode)"
247+
, TH.quotePat = const $ fail "inline-c: quotePat not implemented (quoteCode)"
248+
, TH.quoteType = const $ fail "inline-c: quoteType not implemented (quoteCode)"
249+
, TH.quoteDec = emitVerbatim
250+
}
251+
237252
------------------------------------------------------------------------
238253
-- Inlining
239254

0 commit comments

Comments
 (0)