QSLexer.fsl 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. {
  2. module QSLexer
  3. open System
  4. open QSParser
  5. open Microsoft.FSharp.Text.Lexing
  6. //
  7. let keywords =
  8. [
  9. "end", END;
  10. "and", AND;
  11. "or", OR; "else", ELSE; "elseif", ELSEIF; "if", IF; "act", ACT;
  12. "set", SET; "let", LET; "no", NO; "obj", OBJ; "mod", MOD;
  13. ] |> Map.ofList
  14. let ops =
  15. [
  16. "+=", EQP; "=+", EQP;
  17. "-=", EQM; "=-", EQM; // x =-7 -> x = x - 7 или x = -7 ?
  18. "++", INC; "--", DECR;
  19. "=", EQ;
  20. "<", LT;
  21. "<=", LE; "=<", LE;
  22. ">", GT;
  23. ">=", GE; "=>", GE; // точно есть
  24. "!", NEQ; "<>", NEQ;
  25. "{", LBRACE;
  26. "}", RBRACE;
  27. "[", LBRACK;
  28. "]", RBRACK;
  29. "(", LPAREN;
  30. ")", RPAREN;
  31. "*", TIMES;
  32. "/", DIVIDE;
  33. "+", PLUS;
  34. "-", MINUS;
  35. ] |> Map.ofList
  36. let brace_depth = ref 0
  37. let comment_depth = ref 0
  38. let in_pattern () = !brace_depth = 0 && !comment_depth = 0
  39. exception Lexical_error of string * string * int * int
  40. let string_buff = new System.Text.StringBuilder(256)
  41. let reset_string_buffer () = string_buff.Clear() |> ignore
  42. let store_string_char (c:char []) = string_buff.Append(c) |> ignore
  43. let store_string_chars (s:string) = string_buff.Append(s) |> ignore
  44. let get_stored_string () = string_buff.ToString() //Buffer.contents string_buff
  45. let char_for_backslash = function
  46. | 'n' -> '\010'
  47. | 'r' -> '\013'
  48. | 'b' -> '\008'
  49. | 't' -> '\009'
  50. | c -> c
  51. let raise_lexical_error (lexbuf:LexBuffer<_>) msg =
  52. let p = lexbuf.StartPos in
  53. raise (Lexical_error (msg,
  54. p.pos_fname,
  55. p.pos_lnum,
  56. p.pos_cnum - p.pos_bol + 1))
  57. let handle_lexical_error fn (lexbuf:LexBuffer<_>) =
  58. let p = lexbuf.StartPos in
  59. let line = p.pos_lnum
  60. let column = p.pos_cnum - p.pos_bol + 1
  61. let file = p.pos_fname
  62. try fn lexbuf
  63. with Lexical_error (msg, "", 0, 0) -> raise(Lexical_error(msg, file, line, column))
  64. let warning (lexbuf:LexBuffer<_>) msg =
  65. let p = lexbuf.StartPos in
  66. Printf.eprintf "ocamllex warning:\nFile \"%s\", line %d, character %d: %s.\n" p.pos_fname p.pos_lnum (p.pos_cnum - p.pos_bol + 1) msg;
  67. //flush stderr
  68. let inline decimal_code c d u =
  69. 100 * (int c - 48) + 10 * (int d - 48) + (int u - 48)
  70. let char_for_hexadecimal_code d u =
  71. let d1 = int d in
  72. let val1 = if d1 >= 97 then d1 - 87
  73. else if d1 >= 65 then d1 - 55
  74. else d1 - 48
  75. in
  76. let d2 = int u in
  77. let val2 = if d2 >= 97 then d2 - 87
  78. else if d2 >= 65 then d2 - 55
  79. else d2 - 48
  80. in
  81. int (val1 * 16 + val2)
  82. let incr_loc (lexbuf:LexBuffer<_>) delta =
  83. //let pos = lexbuf
  84. (*
  85. let incr_loc lexbuf delta =
  86. let pos = lexbuf.Lexing.lex_curr_p in
  87. lexbuf.Lexing.lex_curr_p <- { pos with
  88. Lexing.pos_lnum = pos.Lexing.pos_lnum + 1;
  89. Lexing.pos_bol = pos.Lexing.pos_cnum - delta;
  90. }*)
  91. let pos = lexbuf.StartPos
  92. printfn "curr %d" pos.pos_lnum
  93. lexbuf.StartPos <- { pos with Lexing.pos_lnum = pos.pos_lnum + 1; Lexing.pos_bol = pos.pos_cnum - delta; }
  94. printfn "CURR %d" lexbuf.StartPos.pos_lnum
  95. let update_loc (lexbuf:LexBuffer<_>) opt_file line =
  96. let pos = lexbuf.StartPos
  97. let new_file = match opt_file with
  98. | None -> pos.pos_fname
  99. | Some f -> f
  100. lexbuf.StartPos <- { pos with Lexing.pos_fname = new_file; Lexing.pos_lnum = line; Lexing.pos_bol = pos.pos_cnum; }
  101. let decr (lexbuf:LexBuffer<_>) a =
  102. let pos = lexbuf.StartPos
  103. lexbuf.StartPos <- { pos with Lexing.pos_cnum = pos.pos_cnum - Array.length a; }
  104. let comment = ref true
  105. let newlineL (lexbuf:LexBuffer<_>) = lexbuf.EndPos <- lexbuf.EndPos.NextLine
  106. let newlinen (lexbuf:LexBuffer<_>) = lexbuf.Lexeme |> Array.sumBy(function '\r' | '\n' -> 1 | _ -> 0) |> fun x -> x / 2 |> fun x -> for i = 1 to x do newlineL lexbuf
  107. }
  108. let char = ['a'-'z' 'A'-'Z' 'а'-'я' 'А'-'Я']
  109. let digit = ['0'-'9']
  110. //let int = '-'?digit+ // 1 - 1 -> [Int 1; Int -1]
  111. let int = digit+
  112. //let float = '-'?digit+ '.' digit+
  113. let float = digit+ '.' digit+
  114. //let identifier = char (char|digit|['-' '_' '.'])* // ловит "chars--"
  115. let identifier = char (char|digit|['_' '.']|('-'(char|digit)+))* // ловит "chars--"
  116. let whitespace = [' ' '\t']
  117. let newline = "\r\n" | '\n' //"\n\r" | '\n' | '\r'
  118. let operator = "!" | "(" | ")" | "*" | "+" | "++" | "+=" | "-" | "--" | "-=" | "/" | "<" | "<=" | "<>" | "=" | "=+" | "=-" | "=<" | "=>" | ">" | ">=" | "[" | "]" | "{" | "}"
  119. let backslash_escapes = ['\\' '\'' '"' 'n' 't' 'b' 'r' ' ']
  120. rule tokenize = parse
  121. | '!' { if !comment then // если перед ним было ('&'|nl) {whitespace}
  122. comment := false; reset_string_buffer(); handle_lexical_error comm lexbuf; COMMENT(get_stored_string())
  123. else NEQ }
  124. | whitespace { tokenize lexbuf }
  125. | newline(newline|whitespace)* { comment := true; newlinen lexbuf; NEWLINE }
  126. | int { comment := false; INT(Int32.Parse(LexBuffer<_>.LexemeString lexbuf)) }
  127. | float { comment := false; FLOAT(Double.Parse(LexBuffer<_>.LexemeString lexbuf)) }
  128. //| '$' { comment := false; DOLLAR }
  129. | '&' { comment := true; AMP }
  130. | '#' [^'\r''\n']+ { comment := false;
  131. let join sep (xs:seq<char>) = System.String.Join(sep, xs)
  132. let r = lexbuf.Lexeme |> Seq.skip 2 |> join "" |> STARTLOC
  133. //printfn "%A" r;
  134. r}
  135. | '\'' { comment := false; reset_string_buffer(); handle_lexical_error string lexbuf; TSTRING(get_stored_string()) }
  136. | '"' { comment := false; reset_string_buffer(); handle_lexical_error string2 lexbuf; TSTRING(get_stored_string()) }
  137. | ',' { comment := false; COMMA }
  138. | operator { comment := false; ops.[LexBuffer<_>.LexemeString lexbuf] }
  139. | identifier { comment := false;
  140. match keywords.TryFind((LexBuffer<_>.LexemeString lexbuf).ToLower()) with
  141. | Some(token) -> token
  142. | None -> ID(LexBuffer<_>.LexemeString lexbuf) }
  143. | '$' identifier { comment := false; ID(LexBuffer<_>.LexemeString lexbuf) }
  144. //| "--- " identifier " ---------------------------------" { comment := true; ENDLOC }
  145. | "--- " [^'\r''\n']+ { comment := true; ENDLOC }
  146. | ':' { comment := false; COLON }
  147. | eof { EOF }
  148. | _ { failwith "tokenize error" }
  149. (*
  150. and nl = parse
  151. | newline { newlineL lexbuf; nl lexbuf }
  152. | whitespace { nl lexbuf }
  153. | _ { decr lexbuf [|1..10|]; () } *)
  154. and comm = parse
  155. | '\'' { handle_lexical_error string lexbuf; comm lexbuf }
  156. | '"' { handle_lexical_error string2 lexbuf; comm lexbuf }
  157. | '{' { handle_lexical_error stringBrace lexbuf; comm lexbuf }
  158. //| newline { decr lexbuf lexbuf.Lexeme; () }
  159. //| eof { printfn "eof"; decr lexbuf lexbuf.Lexeme; () }
  160. | [^'\n''\r'] { store_string_char lexbuf.Lexeme; comm lexbuf }
  161. | '\r' { newlineL lexbuf; () }
  162. and stringBrace = parse
  163. | '}' { () }
  164. | '\\' backslash_escapes
  165. { store_string_char [| char_for_backslash lexbuf.Lexeme.[1] |]; stringBrace lexbuf }
  166. | eof
  167. { raise(Lexical_error("unterminated string", "", 0, 0)) }
  168. | newline { newlineL lexbuf; stringBrace lexbuf }
  169. | _
  170. { store_string_char lexbuf.Lexeme; stringBrace lexbuf }
  171. and string = parse
  172. | "''" { store_string_char lexbuf.Lexeme; string lexbuf }
  173. | '\'' { () }
  174. | '\\' backslash_escapes
  175. { store_string_char [| char_for_backslash lexbuf.Lexeme.[1] |]; string lexbuf }
  176. | eof
  177. { raise(Lexical_error("unterminated string", "", 0, 0)) }
  178. | newline { newlineL lexbuf; string lexbuf }
  179. | _
  180. { store_string_char lexbuf.Lexeme; string lexbuf }
  181. and string2 = parse
  182. | "\"\"" { store_string_char lexbuf.Lexeme; string2 lexbuf }
  183. | '"' { () }
  184. | '\\' backslash_escapes
  185. { store_string_char [| char_for_backslash lexbuf.Lexeme.[1] |]; string2 lexbuf }
  186. | eof
  187. { raise(Lexical_error("unterminated string", "", 0, 0)) }
  188. | newline { newlineL lexbuf; string2 lexbuf }
  189. | _
  190. { store_string_char lexbuf.Lexeme; string2 lexbuf }
  191. (*
  192. and location = parse
  193. | newline { newlineL lexbuf; string lexbuf }
  194. | whitespace { location lexbuf }
  195. | '#' identifier { reset_string_buffer(); store_string_char lexbuf.Lexeme; handle_lexical_error locCounter lexbuf; LOCATIONRAW(get_stored_string()) }
  196. | _ { failwithf "перед началом локации обнаружено это %A" lexbuf.Lexeme }
  197. and locCounter = parse
  198. | "--- " identifier " ---------------------------------" { store_string_char lexbuf.Lexeme }
  199. | eof { raise(Lexical_error("конец файла наступил раньше чем конец локации", "", 0, 0)) }
  200. | _ { store_string_char lexbuf.Lexeme; locCounter lexbuf } *)