diff --git a/vector/tests-inspect/Inspect/Alloc.hs b/vector/tests-inspect/Inspect/Alloc.hs new file mode 100644 index 00000000..be399e5f --- /dev/null +++ b/vector/tests-inspect/Inspect/Alloc.hs @@ -0,0 +1,102 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} +{- | +Here we test that GHC is able to optimize well construction of vector +using monadic\/applicative actions. Well is understood as able to +generate code which does not allocate except for buffer and some +constant overhead. +-} +module Inspect.Alloc where + +import Control.Monad.ST +import Data.Int +-- import Data.Monoid +import Data.Functor.Identity +import Test.Tasty +import Test.Tasty.HUnit +import System.Mem +import Test.Alloc + +import qualified Data.Vector.Unboxed as VU + + +tests :: TestTree +tests = testGroup "allocations" + [ testGroup "traversable" + [ testCase "IO" + $ checkAllocations (linear 8) + $ whnfIO (VU.traverse (\_ -> getAllocationCounter) vector) + +#if MIN_VERSION_base(4,17,0) + -- GHC<9.4 doesn't optimize well. + , testCase "ST" + $ checkAllocations (linear 8) + $ (\v -> runST $ VU.traverse (pureST . fromIntegral) v) `whnf` vector +#endif + +#if MIN_VERSION_base(4,15,0) + -- GHC<9.0 doesn't optimize this well. And there's no appetite + -- for finding out why. Thus it's disabled for them. We'll still + -- catch regression going forward. + , testCase "Identity" + $ checkAllocations (linear 8) + $ VU.traverse (\n -> Identity (10*n)) `whnf` vector +#endif + + -- NOTE: Naive traversal is lazy and allocated 2 words per element + -- + -- , testCase "Const Sum" + -- $ checkAllocations constant + -- $ whnf (VU.traverse (Const @_ @() . Sum)) vector + ] + , testGroup "unstreamM" + [ testCase "IO" + $ checkAllocations (linear 8) + $ whnfIO (VU.replicateM size getAllocationCounter) + +#if MIN_VERSION_base(4,17,0) + -- GHC<9.4 doesn't optimize well. + , testCase "ST" + $ checkAllocations (linear 8) + $ (\sz -> runST $ VU.generateM sz pureST) `whnf` size +#endif + + -- , testCase "Identity" + -- $ checkAllocations (linear 8) + -- $ (\sz -> VU.generateM sz (\n -> Identity (fromIntegral n :: Int64))) `whnf` size + ] + ] + + +pureST :: Int -> ST s Int64 +{-# NOINLINE pureST #-} +pureST i = pure $! fromIntegral i + +-- | Constant overhead. Measurement precision is 4k +overhead :: Int64 +overhead = 4096*2 + +-- | Vector size. It should be large so 1byte per element will be +-- large than page. +size :: Int +size = 100000 + +vector :: VU.Vector Int64 +{-# NOINLINE vector #-} +vector = VU.generate size fromIntegral + +-- | N bytes per element + constant overhead. We also check that bound +-- is tight. +linear :: Int -> Range +linear n = Range + { allocHi = fromIntegral (n * size) + overhead + , allocLo = fromIntegral (n * size) + } + +-- | Only constant overhead +constant :: Range +constant = Range { allocHi = overhead + , allocLo = 0 + } diff --git a/vector/tests-inspect/Test/Alloc.hs b/vector/tests-inspect/Test/Alloc.hs new file mode 100644 index 00000000..6720cbc4 --- /dev/null +++ b/vector/tests-inspect/Test/Alloc.hs @@ -0,0 +1,59 @@ +{-# LANGUAGE RecordWildCards #-} +-- | +-- Test that function allocates is in range. This is good way to test +-- that GHC produces tight non-allocating loops. +module Test.Alloc where + +import Control.Exception +import Data.Int +import System.Mem +import Test.Tasty.HUnit +import Text.Printf + +---------------------------------------------------------------- +-- Benchmarking machinery copied from tasty-bench +---------------------------------------------------------------- + +newtype Benchmarkable = Benchmarkable (IO ()) + +whnf :: (a -> b) -> a -> Benchmarkable +{-# NOINLINE whnf #-} +whnf f a = Benchmarkable $ do _ <- evaluate (f a) + return () + +whnfIO :: IO a -> Benchmarkable +{-# NOINLINE whnfIO #-} +whnfIO io = Benchmarkable $ do _ <- evaluate =<< io + return () + + +-- | Measure allocations. Measurements use 'getAllocationCounter' so +-- it's accurate up to 4k bytes. +allocations :: Benchmarkable -> IO Int64 +allocations (Benchmarkable io) = do + -- We need to run `io` twice in order to ensure that all constant + -- parameters are evaluated. + io + n1 <- getAllocationCounter + io + n2 <- getAllocationCounter + return $! n1 - n2 + + +-- | Expected allocations range +data Range = Range { allocLo :: !Int64 + , allocHi :: !Int64 + } + deriving Show + +-- | Check that computation's allocations lie in range +checkAllocations :: Range -> Benchmarkable -> IO () +checkAllocations Range{..} bench = do + alloc <- allocations bench + let msg = unlines [ printf "allocated = %12d" alloc + , printf "Low bound = %12d" allocLo + , printf "Hi bound = %12d" allocHi + ] + assertBool msg $ alloc <= allocHi + && alloc >= allocLo + diff --git a/vector/tests-inspect/main.hs b/vector/tests-inspect/main.hs index 52dd6db0..0a67eb9f 100644 --- a/vector/tests-inspect/main.hs +++ b/vector/tests-inspect/main.hs @@ -1,6 +1,7 @@ module Main (main) where import qualified Inspect +import qualified Inspect.Alloc import qualified Inspect.DerivingVia import Test.Tasty (defaultMain,testGroup) @@ -8,4 +9,5 @@ main :: IO () main = defaultMain $ testGroup "tests" [ Inspect.tests , Inspect.DerivingVia.tests + , Inspect.Alloc.tests ] diff --git a/vector/tests/Utilities.hs b/vector/tests/Utilities.hs index 3c88926c..e4e6dc2d 100644 --- a/vector/tests/Utilities.hs +++ b/vector/tests/Utilities.hs @@ -266,8 +266,6 @@ xs // ps = go xs ps' 0 go [] _ _ = [] --- withIndexFirst :: (Int -> a -> [a]) -> [a] -> [a] - withIndexFirst :: (((Int, a) -> b) -> [(Int, a)] -> c) -> ((Int -> a -> b) -> [a] -> c) withIndexFirst m f = m (uncurry f) . zip [0::Int ..] diff --git a/vector/vector.cabal b/vector/vector.cabal index 62a4369b..479b35fe 100644 --- a/vector/vector.cabal +++ b/vector/vector.cabal @@ -252,13 +252,16 @@ test-suite vector-inspection main-is: main.hs default-language: Haskell2010 Other-modules: Inspect + Inspect.Alloc Inspect.DerivingVia Inspect.DerivingVia.OtherFoo + Test.Alloc build-depends: base -any , primitive >= 0.6.4.0 && < 0.10 , vector -any , tasty + , tasty-hunit , tasty-inspection-testing >= 0.1 library benchmarks-O2