Skip to content

Commit

Permalink
Use unicode-data's faster versions of Data.Char functions.
Browse files Browse the repository at this point in the history
This speeds up benchmarks for tokenize considerably;
little difference in other benchmarks.

unicode-data is already a transitive dependency, via
unicode-transforms.
  • Loading branch information
jgm committed Jan 16, 2022
1 parent 3a63680 commit e04e1d6
Show file tree
Hide file tree
Showing 8 changed files with 16 additions and 11 deletions.
1 change: 1 addition & 0 deletions commonmark/commonmark.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ library
, transformers
, parsec
, unicode-transforms
, unicode-data
exposed-modules:
Commonmark
Commonmark.Parser
Expand Down
3 changes: 2 additions & 1 deletion commonmark/src/Commonmark/Blocks.hs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ import Control.Monad (foldM, guard, mzero, void, unless,
when)
import Control.Monad.Trans.Class (lift)
import Data.Foldable (foldrM)
import Data.Char (isAsciiUpper, isDigit, isSpace)
import Unicode.Char (isAsciiUpper, isDigit)
import Unicode.Char.General.Compat (isSpace)
import Data.Dynamic
import Data.Text (Text)
import qualified Data.Map.Strict as M
Expand Down
2 changes: 1 addition & 1 deletion commonmark/src/Commonmark/Entity.hs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import qualified Data.Text as T
import Data.Text (Text)
import qualified Data.Text.Read as TR
import Control.Monad (guard, mzero)
import Data.Char (isDigit, isHexDigit, chr)
import Unicode.Char (chr, isDigit, isHexDigit)
import Data.Maybe (isJust)

-- | Lookup an entity, using 'lookupNumericEntity' if it starts with
Expand Down
3 changes: 2 additions & 1 deletion commonmark/src/Commonmark/Html.hs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ import Data.Text.Encoding (encodeUtf8)
import qualified Data.ByteString.Char8 as B
import qualified Data.Set as Set
import Text.Printf (printf)
import Data.Char (ord, isAlphaNum, isAscii, isSpace)
import Unicode.Char (ord, isAlphaNum, isAscii)
import Unicode.Char.General.Compat (isSpace)
import Data.Maybe (fromMaybe)

data ElementType =
Expand Down
4 changes: 2 additions & 2 deletions commonmark/src/Commonmark/Inlines.hs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ import Commonmark.Types
import Control.Monad (guard, mzero)
import Control.Monad.Trans.State.Strict
import Data.List (foldl')
import Data.Char (isAscii, isLetter)
import Unicode.Char (isAscii, isAlpha)
import qualified Data.IntMap.Strict as IntMap
import qualified Data.Map.Strict as M
import Data.Maybe (isJust, mapMaybe, listToMaybe)
Expand Down Expand Up @@ -489,7 +489,7 @@ pScheme :: Monad m => InlineParser m Text
pScheme = do
t <- satisfyWord (\t -> case T.uncons t of
Nothing -> False
Just (c,rest) -> isAscii c && isLetter c &&
Just (c,rest) -> isAscii c && isAlpha c &&
T.all isAscii rest)
ts <- many $ oneOfToks [WordChars, Symbol '+', Symbol '.', Symbol '-']
let s = untokenize (t:ts)
Expand Down
8 changes: 4 additions & 4 deletions commonmark/src/Commonmark/Tag.hs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import Commonmark.TokParsers
import Control.Monad (liftM2, guard)
import Control.Monad.Trans.State.Strict
import Control.Monad.Trans.Class (lift)
import Data.Char (isAscii, isLetter)
import Unicode.Char (isAscii, isAlpha)
import qualified Data.Text as T
import Text.Parsec hiding (State)

Expand All @@ -38,7 +38,7 @@ defaultEnders = Enders { scannedForCDATA = False
htmlTagName :: Monad m => ParsecT [Tok] s m [Tok]
htmlTagName = try $ do
let isTagText = T.all isAscii
let startsWithLetter t' = not (T.null t') && isLetter (T.head t')
let startsWithLetter t' = not (T.null t') && isAlpha (T.head t')
t <- satisfyWord (isTagText .&&. startsWithLetter)
rest <- many (symbol '-' <|> satisfyWord isTagText)
return (t:rest)
Expand All @@ -49,7 +49,7 @@ htmlTagName = try $ do
htmlAttributeName :: Monad m => ParsecT [Tok] s m [Tok]
htmlAttributeName = try $ do
let isTagText t' = T.all isAscii t'
let startsWithLetter t' = not (T.null t') && isLetter (T.head t')
let startsWithLetter t' = not (T.null t') && isAlpha (T.head t')
t <- satisfyWord (startsWithLetter .&&. isTagText) <|>
symbol '_' <|>
symbol ':'
Expand Down Expand Up @@ -184,7 +184,7 @@ htmlDeclaration = try $ do
op <- symbol '!'
alreadyScanned <- lift $ gets scannedForDeclaration
guard $ not alreadyScanned
let isDeclName t = not (T.null t) && T.all (isAscii .&&. isLetter) t
let isDeclName t = not (T.null t) && T.all (isAscii .&&. isAlpha) t
name <- satisfyWord isDeclName
ws <- whitespace
contents <- many (satisfyTok (not . hasType (Symbol '>')))
Expand Down
3 changes: 2 additions & 1 deletion commonmark/src/Commonmark/Tokens.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ module Commonmark.Tokens
, untokenize
) where

import Data.Char (isAlphaNum, isSpace)
import Unicode.Char (isAlphaNum)
import Unicode.Char.General.Compat (isSpace)
import Data.Text (Text)
import qualified Data.Text as T
import Data.Data (Data, Typeable)
Expand Down
3 changes: 2 additions & 1 deletion stack.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
resolver: lts-18.10
extra-deps: []
extra-deps:
- unicode-data-0.3.0
ghc-options:
"$locals": -fhide-source-paths
packages:
Expand Down

0 comments on commit e04e1d6

Please sign in to comment.