Correctly parse ado without any statements in it (#46)

MonoidMusician · natefaubion · web-flow · commit a7ca658b4fee · 2023-03-05T08:19:46.000-08:00
* Correctly parse ado without any statements in it

* Tests for ado/in, with empty cases and recovery

* Inline tweaked layout helper with comments explaining why

* Update src/PureScript/CST/Parser.purs

---------

Co-authored-by: Nathan Faubion &lt;natefaubion@users.noreply.github.com&gt;
diff --git a/README.md b/README.md
@@ -67,8 +67,9 @@ slowest parse times along with the mean parse time for the set.
 npm run parse-package-set
 ```
 
-You can also benchmark a single file:
+You can also benchmark or parse a single file:
 
 ```sh
 npm run bench-file MyModule.purs
+npm run parse-file -- MyModule.purs --tokens
 ```
diff --git a/bench/ParseFile.purs b/bench/ParseFile.purs
@@ -25,7 +25,7 @@ import PureScript.CST.Types (SourceToken)
 
 main :: Effect Unit
 main = launchAff_ do
-  args <- Array.drop 2 <$> liftEffect Process.argv
+  args <- Array.drop 1 <$> liftEffect Process.argv
   let printTokens = (elem "--tokens" || elem "-t") args
   case Array.head args of
     Just fileName -> do
diff --git a/package.json b/package.json
@@ -3,6 +3,7 @@
   "scripts": {
     "parse-package-set": "spago -x parse-package-set/parse-package-set.dhall run",
     "bench-file": "spago -x bench/bench.dhall build && node --expose-gc --input-type=\"module\" -e \"import { main } from './output/BenchFile/index.js';main()\"",
+    "parse-file": "spago -x bench/bench.dhall build && node --input-type=\"module\" -e \"import { main } from './output/ParseFile/index.js';main()\" --",
     "format": "purs-tidy format-in-place src test bench parse-package-set",
     "check": "purs-tidy check src test bench parse-package-set"
   },
diff --git a/src/PureScript/CST/Parser.purs b/src/PureScript/CST/Parser.purs
@@ -95,14 +95,6 @@ layoutNonEmpty valueParser = ado
   tail <- many (tokLayoutSep *> valueParser) <* tokLayoutEnd
   in NonEmptyArray.cons' head tail
 
-layout :: forall a. Parser a -> Parser (Array a)
-layout valueParser =
-  tokLayoutStart *> values <* tokLayoutEnd
-  where
-  values = (go =<< valueParser) <|> pure []
-  tail = many (tokLayoutSep *> valueParser)
-  go head = Array.cons head <$> tail
-
 parseModule :: Parser (Recovered Module)
 parseModule = do
   header <- parseModuleHeader
@@ -673,7 +665,20 @@ parseDo = do
 parseAdo :: Parser (Recovered Expr)
 parseAdo = do
   keyword <- tokQualifiedKeyword "ado"
-  statements <- layout (recoverDoStatement parseDoStatement)
+  -- A possibly-empty version of `layoutNonEmpty` to handle empty `ado in`
+  statements <- do
+    let
+      -- `recoverDoStatement` recovers too much if it is immediately
+      -- confronted with `TokLayoutEnd`, since that is associated with a
+      -- `layoutStack` _of the parent_ as opposed to the stuff we actually
+      -- want to recover, which we would correctly guess if we saw a statement
+      -- or two inside the block
+      valueParser = recoverDoStatement parseDoStatement
+      nonEmptyCase =
+        Array.cons <$> valueParser <*> many (tokLayoutSep *> valueParser)
+    _ <- tokLayoutStart
+    -- So we explicitly handle `TokLayoutEnd` ahead of time:
+    [] <$ tokLayoutEnd <|> nonEmptyCase <* tokLayoutEnd
   in_ <- tokKeyword "in"
   result <- parseExpr
   pure $ ExprAdo { keyword, statements, in: in_, result }
diff --git a/test/Main.purs b/test/Main.purs
@@ -77,6 +77,77 @@ main = do
       _ ->
         false
 
+  assertParse "Recovered ado statements"
+    """
+    ado
+      foo <- bar
+      a b c +
+      foo
+      in 5
+    """
+    case _ of
+      ParseSucceededWithErrors (ExprAdo { statements }) _
+        | [ DoBind _ _ _
+          , DoError _
+          , DoDiscard _
+          ] <- statements ->
+            true
+      _ ->
+        false
+
+  assertParse "Recovered ado last statement"
+    """
+    ado
+      foo <- bar
+      a b c +
+      in 5
+    """
+    case _ of
+      ParseSucceededWithErrors (ExprAdo { statements }) _
+        | [ DoBind _ _ _
+          , DoError _
+          ] <- statements ->
+            true
+      _ ->
+        false
+
+  assertParse "Recovered ado first statement"
+    """
+    ado
+      a b c +
+      foo <- bar
+      in 5
+    """
+    case _ of
+      ParseSucceededWithErrors (ExprAdo { statements }) _
+        | [ DoError _
+          , DoBind _ _ _
+          ] <- statements ->
+            true
+      _ ->
+        false
+
+  assertParse "Empty ado in"
+    """
+    ado in 1
+    """
+    case _ of
+      (ParseSucceeded _ :: RecoveredParserResult Expr) ->
+        true
+      _ ->
+        false
+
+  assertParse "Empty ado \\n in"
+    """
+    ado
+      in 1
+    """
+    case _ of
+      (ParseSucceeded _ :: RecoveredParserResult Expr) ->
+        true
+      _ ->
+        false
+
   assertParse "Recovered let bindings"
     """
     let