Browse Source

Added a syntax check in the comments

Chimrod 2 months ago
parent
commit
8b4eb01afa
5 changed files with 107 additions and 21 deletions
  1. 69 12
      lib/qparser/lexer.ml
  2. 3 0
      lib/qparser/lexer.mli
  3. 2 1
      test/qsp_parser_test.ml
  4. 26 8
      test/syntax.ml
  5. 7 0
      test/syntax_error.ml

+ 69 - 12
lib/qparser/lexer.ml

@@ -63,7 +63,18 @@ let location_ident = [%sedlex.regexp? letters | digit]
 let location_prefix = [%sedlex.regexp? '!' | '$' | '#' | '^']
 let location = [%sedlex.regexp? Opt location_prefix, Plus location_ident]
 
-let rec read_long_string ?(nested = false) level buf buffer =
+(** Read a quote started with '{'
+
+    The function return the parsed string, but the closing token has been
+    rollbacked, leaving the state in [Lexbuf.EndString _].
+
+    The next call to [main] will call the associated function, effectively
+    leaving the string mode in the parser.
+
+    @param nested tell with started another block of string inside this one *)
+let rec read_long_string : ?nested:bool -> int -> Buffer.t -> Lexbuf.t -> token
+    =
+ fun ?(nested = false) level buf buffer ->
   let lexbuf = Lexbuf.buffer buffer in
   match%sedlex lexbuf with
   | "<<" ->
@@ -89,6 +100,7 @@ let rec read_long_string ?(nested = false) level buf buffer =
           Lexbuf.leave_state buffer;
           Lexbuf.enter_state buffer
             (Lexbuf.EndString Lex_state.readLongStringWraper);
+          (* rollback the latest token *)
           Lexbuf.rollback buffer;
           LITERAL (Buffer.contents buf)
       | _ ->
@@ -141,34 +153,79 @@ let rec read_quoted_string : Lexbuf.stringWraper -> Lexbuf.buffer_builder =
       (f.wrap (read_quoted_string f)) buf buffer
   | _ -> raise Not_found
 
+(** Track the kind of nested string inside a multiline string inside a
+    comment.
+
+    Some constructions are not allowed in this specific case (see later)
+*)
+type commentedString = None | Quote | DQuote
+
 let rec skip_comment buffer =
   (* Simplified way to skip the content of a string until the end marker.
      (expect the string to be well formed) *)
-  let rec parse_until_end f =
-    let _ = wait_balance f buffer in
+  let rec parse_until_end : (Buffer.t -> Lexbuf.t -> token) -> token =
+   fun f ->
+    let token = wait_balance f buffer in
     match Lexbuf.state buffer with
     | Some Lexbuf.Token ->
         Lexbuf.leave_state buffer;
         parse_until_end f
-    | Some (Lexbuf.EndString _) -> ()
+    | Some (Lexbuf.EndString _) -> token
     | _ -> parse_until_end f
   in
   let lexbuf = Lexbuf.buffer buffer in
   match%sedlex lexbuf with
   | '{' ->
-      parse_until_end (read_long_string 0);
-      let _ = Lex_state.readLongStringWraper.end_string buffer in
+      (* There are illegal constructions inside a comment containing {}
+         block.
+
+         Every opening text marker shall have the corresponding closing token
+         (but inside a nested block we can have unmatched double quote).
+
+         [! { ' }] this gives an error
+         [! { ' ' }] is ok
+      *)
+      let token = parse_until_end (read_long_string 0) in
+      let () =
+        match token with
+        | Tokens.LITERAL (content : string) -> (
+            (* Ensure every opening quote is closed before the end of the
+               comment *)
+            let string_state =
+              String.fold_left content ~init:None ~f:(fun state c ->
+                  match (state, c) with
+                  | None, '\'' -> Quote
+                  | None, '"' -> DQuote
+                  | Quote, '\'' -> None
+                  | DQuote, '"' -> None
+                  | _ -> state)
+            in
+            match string_state with
+            | None -> ()
+            | _ ->
+                Lexbuf.leave_state buffer;
+                raise UnclosedQuote)
+        | _ -> ()
+      in
+      let () =
+        try ignore (Lex_state.readLongStringWraper.end_string buffer)
+        with _ -> ()
+      in
       skip_comment buffer
   | '\'' ->
-      parse_until_end
-        (Lex_state.quotedStringWraper.wrap
-           (read_quoted_string Lex_state.quotedStringWraper));
+      let _ =
+        parse_until_end
+          (Lex_state.quotedStringWraper.wrap
+             (read_quoted_string Lex_state.quotedStringWraper))
+      in
       let _ = Lex_state.quotedStringWraper.end_string buffer in
       skip_comment buffer
   | '"' ->
-      parse_until_end
-        (Lex_state.dQuotedStringWraper.wrap
-           (read_quoted_string Lex_state.dQuotedStringWraper));
+      let _ =
+        parse_until_end
+          (Lex_state.dQuotedStringWraper.wrap
+             (read_quoted_string Lex_state.dQuotedStringWraper))
+      in
       let _ = Lex_state.dQuotedStringWraper.end_string buffer in
       skip_comment buffer
   | eol ->

+ 3 - 0
lib/qparser/lexer.mli

@@ -6,7 +6,10 @@
     working. *)
 
 exception EOF
+
 exception UnclosedQuote
+(** Error reported when a string is not closed properly *)
+
 exception LexError of string
 
 val discard : Lexbuf.t -> unit

+ 2 - 1
test/qsp_parser_test.ml

@@ -1,7 +1,8 @@
 let () =
   Alcotest.run "qsp_parser"
     [
-      Syntax.test;
+      Syntax.test_syntax;
+      Syntax.test_comments;
       Literals.test;
       Syntax_error.test;
       Get_type.test;

+ 26 - 8
test/syntax.ml

@@ -400,6 +400,18 @@ life is unfair." Oh yeah, {curly brackets
 also count}. This is still the same comment. |}
     [ Comment _position ]
 
+let test_comment_string () =
+  _test_instruction {|! {}|} [ Comment _position ];
+  _test_instruction {|! ''|} [ Comment _position ];
+  _test_instruction {|! ""|} [ Comment _position ];
+  _test_instruction {|! {''}|} [ Comment _position ];
+  _test_instruction {|! {""}|} [ Comment _position ];
+  _test_instruction {|! "{"|} [ Comment _position ];
+  _test_instruction {|! '{'|} [ Comment _position ];
+  _test_instruction {|! "'"|} [ Comment _position ];
+  _test_instruction {|! '"'|} [ Comment _position ];
+  ()
+
 (** This test ensure that the unary operator is applied to the whole expression
  *)
 let test_precedence () =
@@ -882,7 +894,7 @@ let test_stattxt () =
             { Tree.Ast.pos = _position; name = "$STATTXT"; index = None } );
     ]
 
-let test =
+let test_syntax =
   ( "Syntax",
     [
       Alcotest.test_case "Location" `Quick test_empty_location;
@@ -915,13 +927,6 @@ let test =
       Alcotest.test_case "Plus_litt" `Quick test_plus_litt;
       Alcotest.test_case "PlusChained" `Quick test_concat;
       Alcotest.test_case "Mod operator" `Quick test_mod;
-      Alcotest.test_case "Comment" `Quick test_comment;
-      Alcotest.test_case "Comment2" `Quick test_comment2;
-      Alcotest.test_case "Comment3" `Quick test_comment3;
-      Alcotest.test_case "Comment4" `Quick test_comment4;
-      Alcotest.test_case "Comment5" `Quick test_comment5;
-      Alcotest.test_case "Comment6" `Quick test_comment6;
-      Alcotest.test_case "Multiline Comment" `Quick test_long_comment;
       Alcotest.test_case "If" `Quick test_if;
       Alcotest.test_case "If_chained" `Quick test_if_chained;
       Alcotest.test_case "If_equality" `Quick test_if_equality;
@@ -952,3 +957,16 @@ let test =
       Alcotest.test_case "Precedence8" `Quick test_precedence8;
       Alcotest.test_case "stattxt" `Quick test_stattxt;
     ] )
+
+let test_comments =
+  ( "Comments",
+    [
+      Alcotest.test_case "Simple Comment" `Quick test_comment;
+      Alcotest.test_case "& Comment" `Quick test_comment2;
+      Alcotest.test_case "Double Comment" `Quick test_comment3;
+      Alcotest.test_case "Comment vs operation" `Quick test_comment4;
+      Alcotest.test_case "Comment5" `Quick test_comment5;
+      Alcotest.test_case "Comment6" `Quick test_comment6;
+      Alcotest.test_case "Multiline Comment" `Quick test_long_comment;
+      Alcotest.test_case "Comments with strings" `Quick test_comment_string;
+    ] )

+ 7 - 0
test/syntax_error.ml

@@ -94,6 +94,12 @@ let unclose_comment () =
   _test_instruction {| ! that's it|}
     { level = Error; loc = _position; message = "Unclosed string" }
 
+(* Same but with nested string *)
+
+let unclose_comment2 () =
+  _test_instruction {|  !{ that's it }|}
+    { level = Error; loc = _position; message = "Unclosed string" }
+
 let syntax_error () =
   _test_instruction {|*clr $ cla|}
     { level = Error; loc = _position; message = "Unexpected character \"\"" }
@@ -275,6 +281,7 @@ let test =
       Alcotest.test_case "act 1" `Quick act_no_column;
       Alcotest.test_case "no &" `Quick missing_ampersand;
       Alcotest.test_case "unclose_comment" `Quick unclose_comment;
+      Alcotest.test_case "unclose_comment2" `Quick unclose_comment2;
       Alcotest.test_case "Syntax error $" `Quick syntax_error;
       Alcotest.test_case "Missing operand" `Quick missing_operand;
       Alcotest.test_case "Unknown function" `Quick unknow_function;