Skip to content

Commit 83e6bc6

Browse files
committed
2 parents a3c84b1 + 9b474dc commit 83e6bc6

22 files changed

+588
-292
lines changed

lichen.cabal

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,13 @@ library
2929
, Lichen.Plagiarism.Walk
3030
, Lichen.Plagiarism.Concatenate
3131
, Lichen.Plagiarism.Highlight
32+
, Lichen.Plagiarism.Report
3233
, Lichen.Plagiarism.AssignmentSettings
3334
, Lichen.Plagiarism.Render
3435
, Lichen.Plagiarism.Render.Index
3536
, Lichen.Plagiarism.Render.Compare
3637
, Lichen.Count.Main
38+
, Lichen.Count.Counters
3739
build-depends: base >= 4.7 && < 5
3840
, containers
3941
, split
@@ -44,12 +46,14 @@ library
4446
, process
4547
, directory
4648
, filepath
47-
, json
49+
, aeson
4850
, megaparsec
4951
, optparse-applicative
5052
, language-python
5153
, blaze-html
5254
, blaze-markup
55+
, clay
56+
, jmacro
5357
ghc-options: -Wall -Werror -fwarn-incomplete-patterns
5458
default-language: Haskell2010
5559

src/Lichen/Config/Count.hs

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,34 @@
1-
{-# LANGUAGE OverloadedStrings #-}
1+
{-# LANGUAGE OverloadedStrings, RecordWildCards #-}
22

33
module Lichen.Config.Count where
44

5-
import Control.Monad.Except
5+
import Data.Maybe
6+
import Data.Aeson
67

7-
import Lichen.Error
88
import Lichen.Config
99
import Lichen.Config.Languages
10-
11-
type Counter = Language -> String -> FilePath -> Erring Integer
12-
13-
counterDummy :: Counter
14-
counterDummy _ _ _ = throwError $ InvocationError "Invalid counting method specified"
10+
import Lichen.Count.Counters
1511

1612
data Config = Config
17-
{ language :: Language
18-
, method :: Counter
13+
{ dataDir :: FilePath
14+
, language :: Language
15+
, counter :: Counter
1916
, toCount :: Maybe String
2017
, sourceFiles :: [FilePath]
2118
}
19+
instance FromJSON Config where
20+
parseJSON = withObject "config_count" $ \o -> do
21+
dataDir <- fromMaybe (dataDir defaultConfig) <$> o .:? "data_dir"
22+
language <- fromMaybe (language defaultConfig) <$> o .:? "language"
23+
counter <- fromMaybe (counter defaultConfig) <$> o .:? "counter"
24+
toCount <- fromMaybe (toCount defaultConfig) <$> o .:? "to_count"
25+
sourceFiles <- fromMaybe (sourceFiles defaultConfig) <$> o .:? "source_files"
26+
return Config{..}
2227

2328
defaultConfig :: Config
24-
defaultConfig = Config { language = langDummy
25-
, method = counterDummy
29+
defaultConfig = Config { dataDir = ".lichen"
30+
, language = langDummy
31+
, counter = counterDummy
2632
, toCount = Nothing
2733
, sourceFiles = []
2834
}

src/Lichen/Config/Languages.hs

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
1-
{-# LANGUAGE OverloadedStrings, GADTs #-}
1+
{-# LANGUAGE OverloadedStrings, GADTs, DeriveGeneric, StandaloneDeriving #-}
22

33
module Lichen.Config.Languages where
44

5+
import GHC.Generics
6+
57
import Data.Hashable
8+
import Data.Aeson
9+
import qualified Data.Text as T
610

711
import Control.Monad.Except
812

13+
import Text.Read (readMaybe)
14+
915
import Lichen.Error
1016
import Lichen.Lexer
1117
import Lichen.Parser
@@ -19,7 +25,8 @@ import qualified Lichen.Parser.Python as Python
1925
data WinnowConfig = WinnowConfig
2026
{ signalThreshold :: Int
2127
, noiseThreshold :: Int
22-
}
28+
} deriving Generic
29+
instance FromJSON WinnowConfig
2330

2431
-- Configuration for a given language. Should typically not need to be
2532
-- modified, but can be overwritten in the case of unexpected instructor
@@ -30,27 +37,28 @@ data Language where
3037
Language :: (Hashable a, Show a) => { exts :: [FilePath]
3138
, lexer :: Lexer a
3239
, winnowConfig :: WinnowConfig
33-
, readToken :: String -> a
40+
, readToken :: String -> Erring a
3441
, parser :: Parser Node
3542
} -> Language
43+
instance FromJSON Language where
44+
parseJSON (String s) = pure $ languageChoice langDummy (Just $ T.unpack s)
45+
parseJSON _ = pure langDummy
3646

3747
dummy :: a -> b -> Erring c
3848
dummy _ _ = throwError $ InvocationError "Specified analysis method is undefined for language"
3949

40-
langDummy :: Language
41-
langDummy = Language [] dummy (WinnowConfig 0 0) (const ()) dummy
50+
smartRead :: Read a => String -> Erring a
51+
smartRead s = case readMaybe s of Just t -> pure t
52+
Nothing -> throwError . InvalidTokenError $ T.pack s
4253

43-
readC :: String -> C.Tok
44-
readC = read
54+
langDummy :: Language
55+
langDummy = Language [] dummy (WinnowConfig 0 0) (const $ pure ()) dummy
4556

4657
langC :: Language
47-
langC = Language [".c", ".h", ".cpp", ".hpp", ".C", ".H", ".cc"] C.lex (WinnowConfig 9 5) readC dummy
48-
49-
readPython :: String -> Python.Tok
50-
readPython = read
58+
langC = Language [".c", ".h", ".cpp", ".hpp", ".C", ".H", ".cc"] C.lex (WinnowConfig 9 5) (smartRead :: String -> Erring C.Tok) dummy
5159

5260
langPython :: Language
53-
langPython = Language [".py"] Python.lex (WinnowConfig 9 5) readPython Python.parse
61+
langPython = Language [".py"] Python.lex (WinnowConfig 9 5) (smartRead :: String -> Erring Python.Tok) Python.parse
5462

5563
languageChoice :: Language -> Maybe String -> Language
5664
languageChoice d Nothing = d

src/Lichen/Config/Plagiarism.hs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,40 @@
1+
{-# LANGUAGE OverloadedStrings, RecordWildCards #-}
2+
13
module Lichen.Config.Plagiarism where
24

5+
import Data.Maybe
6+
import Data.Aeson
7+
import qualified Data.Text as T
8+
39
import Lichen.Config
410
import Lichen.Config.Languages
511

612
data Config = Config
713
{ dataDir :: FilePath
814
, concatDir :: FilePath
915
, highlightDir :: FilePath
16+
, reportDir :: FilePath
17+
, reportTitle :: T.Text
1018
, language :: Language
1119
, sourceDir :: Maybe FilePath
1220
}
21+
instance FromJSON Config where
22+
parseJSON = withObject "config_plagiarism" $ \o -> do
23+
dataDir <- fromMaybe (dataDir defaultConfig) <$> o .:? "data_dir"
24+
concatDir <- fromMaybe (concatDir defaultConfig) <$> o .:? "concat_dir"
25+
highlightDir <- fromMaybe (highlightDir defaultConfig) <$> o .:? "highlight_dir"
26+
reportDir <- fromMaybe (reportDir defaultConfig) <$> o .:? "report_dir"
27+
reportTitle <- fromMaybe (reportTitle defaultConfig) <$> o .:? "report_tkitle"
28+
language <- fromMaybe (language defaultConfig) <$> o .:? "language"
29+
sourceDir <- fromMaybe (sourceDir defaultConfig) <$> o .:? "source_dir"
30+
return Config{..}
1331

1432
defaultConfig :: Config
1533
defaultConfig = Config { dataDir = ".lichen"
1634
, concatDir = "concatenated"
1735
, highlightDir = "highlighted"
36+
, reportDir = "report"
37+
, reportTitle = "Plagiarism Detection"
1838
, language = langDummy
1939
, sourceDir = Nothing
2040
}

src/Lichen/Count/Counters.hs

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
{-# LANGUAGE OverloadedStrings #-}
2+
3+
module Lichen.Count.Counters where
4+
5+
import Data.Hashable
6+
import Data.Aeson
7+
import qualified Data.Text as T
8+
import qualified Data.ByteString as BS
9+
10+
import Control.Monad.Except
11+
12+
import Lichen.Error
13+
import Lichen.Config.Languages
14+
import Lichen.Lexer
15+
import qualified Lichen.Parser as P
16+
17+
newtype Counter = Counter { runCounter :: Language -> String -> FilePath -> Erring Integer }
18+
instance FromJSON Counter where
19+
parseJSON (String s) = pure $ counterChoice counterDummy (Just $ T.unpack s)
20+
parseJSON _ = pure counterDummy
21+
22+
counterDummy :: Counter
23+
counterDummy = Counter $ \_ _ _ -> throwError $ InvocationError "Invalid counting method specified"
24+
25+
counterToken :: Counter
26+
counterToken = Counter $ \(Language _ l _ readTok _) t p -> do
27+
src <- liftIO $ BS.readFile p
28+
tokens <- l p src
29+
rt <- readTok t
30+
return . fromIntegral . length . filter (hash rt ==) . fmap (hash . tdata) $ tokens
31+
32+
counterNode :: Counter
33+
counterNode = Counter $ \l t p -> do
34+
src <- liftIO $ BS.readFile p
35+
tree <- parser l p src
36+
return $ P.countTag (T.pack t) tree
37+
38+
counterCall :: Counter
39+
counterCall = Counter $ \l t p -> do
40+
src <- liftIO $ BS.readFile p
41+
tree <- parser l p src
42+
return $ P.countCall (T.pack t) tree
43+
44+
counterChoice :: Counter -> Maybe String -> Counter
45+
counterChoice d Nothing = d
46+
counterChoice _ (Just "token") = counterToken
47+
counterChoice _ (Just "node") = counterNode
48+
counterChoice _ (Just "call") = counterCall
49+
counterChoice _ _ = counterDummy

src/Lichen/Count/Main.hs

Lines changed: 18 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3,62 +3,46 @@
33
module Lichen.Count.Main where
44

55
import System.Directory
6+
import System.FilePath
67

7-
import Data.Hashable
8+
import Data.Aeson
89
import Data.Semigroup ((<>))
910
import qualified Data.Text as T
10-
import qualified Data.ByteString as BS
11+
import qualified Data.ByteString.Lazy as BS
1112

1213
import Control.Monad.Reader
1314
import Control.Monad.Except
1415

1516
import Options.Applicative
1617

18+
import Lichen.Util
1719
import Lichen.Error
1820
import Lichen.Config
1921
import Lichen.Config.Languages
2022
import Lichen.Config.Count
21-
import qualified Lichen.Parser as P
22-
23-
countToken :: Language -> String -> FilePath -> Erring Integer
24-
countToken (Language _ l _ readTok _) t p = do
25-
src <- liftIO $ BS.readFile p
26-
tokens <- l p src
27-
return . fromIntegral . length . filter (hash (readTok t) ==) . fmap hash $ tokens
28-
29-
countNode :: Language -> String -> FilePath -> Erring Integer
30-
countNode l t p = do
31-
src <- liftIO $ BS.readFile p
32-
tree <- parser l p src
33-
return $ P.countTag (T.pack t) tree
34-
35-
countCall :: Language -> String -> FilePath -> Erring Integer
36-
countCall l t p = do
37-
src <- liftIO $ BS.readFile p
38-
tree <- parser l p src
39-
return $ P.countCall (T.pack t) tree
40-
41-
dispatchCount :: String -> Language -> String -> FilePath -> Erring Integer
42-
dispatchCount "token" = countToken
43-
dispatchCount "node" = countNode
44-
dispatchCount "call" = countCall
45-
dispatchCount "function" = countCall
46-
dispatchCount _ = counterDummy
23+
import Lichen.Count.Counters
4724

4825
parseOptions :: Config -> Parser Config
4926
parseOptions dc = Config
50-
<$> (languageChoice (language dc) <$> (optional . strOption $ long "language" <> short 'l' <> metavar "LANG" <> help "Language of student code"))
51-
<*> fmap dispatchCount (argument str (metavar "COUNTER"))
27+
<$> strOption (long "data-dir" <> short 'd' <> metavar "DIR" <> showDefault <> value (dataDir dc) <> help "Directory to store internal data")
28+
<*> (languageChoice (language dc) <$> (optional . strOption $ long "language" <> short 'l' <> metavar "LANG" <> help "Language of student code"))
29+
<*> (counterChoice (counter dc) <$> (optional . strOption $ long "counter" <> short 'c' <> metavar "COUNTER" <> help "Counting method"))
5230
<*> optional (argument str (metavar "ELEMENT"))
5331
<*> many (argument str (metavar "SOURCE"))
5432

5533
realMain :: Config -> IO ()
56-
realMain c = do
57-
options <- liftIO $ execParser opts
34+
realMain ic = do
35+
iopts <- liftIO . execParser $ opts ic
36+
mcsrc <- readSafe BS.readFile Nothing (dataDir iopts </> "config_count.json")
37+
options <- case mcsrc of Just csrc -> do
38+
c <- case eitherDecode csrc of Left e -> (printError . JSONDecodingError $ T.pack e) >> pure ic
39+
Right t -> pure t
40+
liftIO . execParser $ opts c
41+
Nothing -> pure iopts
5842
flip runConfigured options $ do
5943
config <- ask
6044
t <- case toCount config of Just t -> return t; Nothing -> throwError $ InvocationError "No countable element specified"
6145
ps <- liftIO . mapM canonicalizePath $ sourceFiles config
62-
counts <- lift $ mapM (method config (language config) t) ps
46+
counts <- lift $ mapM (runCounter (counter config) (language config) t) ps
6347
liftIO . print $ sum counts
64-
where opts = info (helper <*> parseOptions c) (fullDesc <> progDesc "Count occurences of a specific AST node" <> header "lichen-count-node - token counting")
48+
where opts c = info (helper <*> parseOptions c) (fullDesc <> progDesc "Count occurences of a specific AST node" <> header "lichen-count-node - token counting")

src/Lichen/Error.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,14 @@ type Erring = ExceptT LichenError IO
1818

1919
data LichenError = LexError (ParseError (Token BS.ByteString) Dec)
2020
| ParseError T.Text
21+
| InvalidTokenError T.Text
2122
| InvocationError T.Text
2223
| JSONDecodingError T.Text
2324
deriving Show
2425

2526
printError :: LichenError -> IO ()
2627
printError (LexError e) = T.IO.hPutStrLn stderr "Lexer error: " >> putStrLn (parseErrorPretty e)
2728
printError (ParseError t) = T.IO.hPutStrLn stderr ("Parser error: " <> t)
29+
printError (InvalidTokenError t) = T.IO.hPutStrLn stderr ("Invalid token error: " <> t)
2830
printError (InvocationError t) = T.IO.hPutStrLn stderr ("Invocation error: " <> t)
2931
printError (JSONDecodingError t) = T.IO.hPutStrLn stderr ("JSON decoding error: " <> t)

src/Lichen/Lexer.hs

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
module Lichen.Lexer where
22

3+
import Data.Foldable()
4+
import Data.Semigroup ((<>))
5+
import qualified Data.List.NonEmpty as NE
36
import qualified Data.ByteString as BS
47

58
import Text.Megaparsec
@@ -8,7 +11,25 @@ import qualified Text.Megaparsec.Lexer as L
811

912
import Lichen.Error
1013

11-
type Lexer a = FilePath -> BS.ByteString -> Erring [a]
14+
type Lexer a = FilePath -> BS.ByteString -> Erring [Tagged a]
15+
16+
data TokPos = TokPos
17+
{ startLine :: !Pos
18+
, endLine :: !Pos
19+
, startCol :: !Pos
20+
, endCol :: !Pos
21+
} deriving (Show, Eq, Ord)
22+
data Tagged a = Tagged { tdata :: a, tpos :: TokPos } deriving (Show, Eq)
23+
instance Ord a => Ord (Tagged a) where
24+
compare (Tagged x _) (Tagged y _) = compare x y
25+
26+
wrap :: Foldable t => Parser (t a) -> b -> Parser (Tagged b)
27+
wrap p x = do
28+
pos <- NE.head . statePos <$> getParserState
29+
s <- p
30+
--_ <- p
31+
return . Tagged x $ TokPos (sourceLine pos) (sourceLine pos) (sourceColumn pos) (sourceColumn pos <> unsafePos (fromIntegral $ length s))
32+
--return (x, TokPos (sourceLine pos) (sourceLine pos) (sourceColumn pos) (sourceColumn pos <> unsafePos 1))
1233

1334
-- Parse a C-style character literal. Ex: 'a', '@'.
1435
charLit :: Parser Char

0 commit comments

Comments
 (0)