123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225 |
- {
- module QSLexer
- open System
- open QSParser
- open Microsoft.FSharp.Text.Lexing
- //
- let keywords =
- [
- "end", END;
- "and", AND;
- "or", OR; "else", ELSE; "elseif", ELSEIF; "if", IF; "act", ACT;
- "set", SET; "let", LET; "no", NO; "obj", OBJ; "mod", MOD;
- ] |> Map.ofList
- let ops =
- [
- "+=", EQP; "=+", EQP;
- "-=", EQM; "=-", EQM; // x =-7 -> x = x - 7 или x = -7 ?
- "++", INC; "--", DECR;
- "=", EQ;
- "<", LT;
- "<=", LE; "=<", LE;
- ">", GT;
- ">=", GE; "=>", GE; // точно есть
- "!", NEQ; "<>", NEQ;
- "{", LBRACE;
- "}", RBRACE;
- "[", LBRACK;
- "]", RBRACK;
- "(", LPAREN;
- ")", RPAREN;
- "*", TIMES;
- "/", DIVIDE;
- "+", PLUS;
- "-", MINUS;
-
- ] |> Map.ofList
- let brace_depth = ref 0
- let comment_depth = ref 0
- let in_pattern () = !brace_depth = 0 && !comment_depth = 0
- exception Lexical_error of string * string * int * int
- let string_buff = new System.Text.StringBuilder(256)
- let reset_string_buffer () = string_buff.Clear() |> ignore
- let store_string_char (c:char []) = string_buff.Append(c) |> ignore
- let store_string_chars (s:string) = string_buff.Append(s) |> ignore
- let get_stored_string () = string_buff.ToString() //Buffer.contents string_buff
- let char_for_backslash = function
- | 'n' -> '\010'
- | 'r' -> '\013'
- | 'b' -> '\008'
- | 't' -> '\009'
- | c -> c
- let raise_lexical_error (lexbuf:LexBuffer<_>) msg =
- let p = lexbuf.StartPos in
- raise (Lexical_error (msg,
- p.pos_fname,
- p.pos_lnum,
- p.pos_cnum - p.pos_bol + 1))
- let handle_lexical_error fn (lexbuf:LexBuffer<_>) =
- let p = lexbuf.StartPos in
- let line = p.pos_lnum
- let column = p.pos_cnum - p.pos_bol + 1
- let file = p.pos_fname
- try fn lexbuf
- with Lexical_error (msg, "", 0, 0) -> raise(Lexical_error(msg, file, line, column))
- let warning (lexbuf:LexBuffer<_>) msg =
- let p = lexbuf.StartPos in
- Printf.eprintf "ocamllex warning:\nFile \"%s\", line %d, character %d: %s.\n" p.pos_fname p.pos_lnum (p.pos_cnum - p.pos_bol + 1) msg;
- //flush stderr
- let inline decimal_code c d u =
- 100 * (int c - 48) + 10 * (int d - 48) + (int u - 48)
- let char_for_hexadecimal_code d u =
- let d1 = int d in
- let val1 = if d1 >= 97 then d1 - 87
- else if d1 >= 65 then d1 - 55
- else d1 - 48
- in
- let d2 = int u in
- let val2 = if d2 >= 97 then d2 - 87
- else if d2 >= 65 then d2 - 55
- else d2 - 48
- in
- int (val1 * 16 + val2)
- let incr_loc (lexbuf:LexBuffer<_>) delta =
- //let pos = lexbuf
- (*
- let incr_loc lexbuf delta =
- let pos = lexbuf.Lexing.lex_curr_p in
- lexbuf.Lexing.lex_curr_p <- { pos with
- Lexing.pos_lnum = pos.Lexing.pos_lnum + 1;
- Lexing.pos_bol = pos.Lexing.pos_cnum - delta;
- }*)
- let pos = lexbuf.StartPos
- printfn "curr %d" pos.pos_lnum
- lexbuf.StartPos <- { pos with Lexing.pos_lnum = pos.pos_lnum + 1; Lexing.pos_bol = pos.pos_cnum - delta; }
- printfn "CURR %d" lexbuf.StartPos.pos_lnum
- let update_loc (lexbuf:LexBuffer<_>) opt_file line =
- let pos = lexbuf.StartPos
- let new_file = match opt_file with
- | None -> pos.pos_fname
- | Some f -> f
- lexbuf.StartPos <- { pos with Lexing.pos_fname = new_file; Lexing.pos_lnum = line; Lexing.pos_bol = pos.pos_cnum; }
- let decr (lexbuf:LexBuffer<_>) a =
- let pos = lexbuf.StartPos
- lexbuf.StartPos <- { pos with Lexing.pos_cnum = pos.pos_cnum - Array.length a; }
- let comment = ref true
- let newlineL (lexbuf:LexBuffer<_>) = lexbuf.EndPos <- lexbuf.EndPos.NextLine
- let newlinen (lexbuf:LexBuffer<_>) = lexbuf.Lexeme |> Array.sumBy(function '\r' | '\n' -> 1 | _ -> 0) |> fun x -> x / 2 |> fun x -> for i = 1 to x do newlineL lexbuf
- }
- let char = ['a'-'z' 'A'-'Z' 'а'-'я' 'А'-'Я']
- let digit = ['0'-'9']
- //let int = '-'?digit+ // 1 - 1 -> [Int 1; Int -1]
- let int = digit+
- //let float = '-'?digit+ '.' digit+
- let float = digit+ '.' digit+
- //let identifier = char (char|digit|['-' '_' '.'])* // ловит "chars--"
- let identifier = char (char|digit|['_' '.']|('-'(char|digit)+))* // ловит "chars--"
- let whitespace = [' ' '\t']
- let newline = "\r\n" | '\n' //"\n\r" | '\n' | '\r'
- let operator = "!" | "(" | ")" | "*" | "+" | "++" | "+=" | "-" | "--" | "-=" | "/" | "<" | "<=" | "<>" | "=" | "=+" | "=-" | "=<" | "=>" | ">" | ">=" | "[" | "]" | "{" | "}"
- let backslash_escapes = ['\\' '\'' '"' 'n' 't' 'b' 'r' ' ']
- rule tokenize = parse
- | '!' { if !comment then // если перед ним было ('&'|nl) {whitespace}
- comment := false; reset_string_buffer(); handle_lexical_error comm lexbuf; COMMENT(get_stored_string())
- else NEQ }
- | whitespace { tokenize lexbuf }
- | newline(newline|whitespace)* { comment := true; newlinen lexbuf; NEWLINE }
- | int { comment := false; INT(Int32.Parse(LexBuffer<_>.LexemeString lexbuf)) }
- | float { comment := false; FLOAT(Double.Parse(LexBuffer<_>.LexemeString lexbuf)) }
- //| '$' { comment := false; DOLLAR }
- | '&' { comment := true; AMP }
- | '#' [^'\r''\n']+ { comment := false;
- let join sep (xs:seq<char>) = System.String.Join(sep, xs)
- let r = lexbuf.Lexeme |> Seq.skip 2 |> join "" |> STARTLOC
- //printfn "%A" r;
- r}
- | '\'' { comment := false; reset_string_buffer(); handle_lexical_error string lexbuf; TSTRING(get_stored_string()) }
- | '"' { comment := false; reset_string_buffer(); handle_lexical_error string2 lexbuf; TSTRING(get_stored_string()) }
- | ',' { comment := false; COMMA }
- | operator { comment := false; ops.[LexBuffer<_>.LexemeString lexbuf] }
- | identifier { comment := false;
- match keywords.TryFind((LexBuffer<_>.LexemeString lexbuf).ToLower()) with
- | Some(token) -> token
- | None -> ID(LexBuffer<_>.LexemeString lexbuf) }
- | '$' identifier { comment := false; ID(LexBuffer<_>.LexemeString lexbuf) }
- //| "--- " identifier " ---------------------------------" { comment := true; ENDLOC }
- | "--- " [^'\r''\n']+ { comment := true; ENDLOC }
- | ':' { comment := false; COLON }
- | eof { EOF }
- | _ { failwith "tokenize error" }
- (*
- and nl = parse
- | newline { newlineL lexbuf; nl lexbuf }
- | whitespace { nl lexbuf }
- | _ { decr lexbuf [|1..10|]; () } *)
- and comm = parse
- | '\'' { handle_lexical_error string lexbuf; comm lexbuf }
- | '"' { handle_lexical_error string2 lexbuf; comm lexbuf }
- | '{' { handle_lexical_error stringBrace lexbuf; comm lexbuf }
- //| newline { decr lexbuf lexbuf.Lexeme; () }
- //| eof { printfn "eof"; decr lexbuf lexbuf.Lexeme; () }
- | [^'\n''\r'] { store_string_char lexbuf.Lexeme; comm lexbuf }
- | '\r' { newlineL lexbuf; () }
- and stringBrace = parse
- | '}' { () }
- | '\\' backslash_escapes
- { store_string_char [| char_for_backslash lexbuf.Lexeme.[1] |]; stringBrace lexbuf }
- | eof
- { raise(Lexical_error("unterminated string", "", 0, 0)) }
- | newline { newlineL lexbuf; stringBrace lexbuf }
- | _
- { store_string_char lexbuf.Lexeme; stringBrace lexbuf }
- and string = parse
- | "''" { store_string_char lexbuf.Lexeme; string lexbuf }
- | '\'' { () }
- | '\\' backslash_escapes
- { store_string_char [| char_for_backslash lexbuf.Lexeme.[1] |]; string lexbuf }
- | eof
- { raise(Lexical_error("unterminated string", "", 0, 0)) }
- | newline { newlineL lexbuf; string lexbuf }
- | _
- { store_string_char lexbuf.Lexeme; string lexbuf }
- and string2 = parse
- | "\"\"" { store_string_char lexbuf.Lexeme; string2 lexbuf }
- | '"' { () }
- | '\\' backslash_escapes
- { store_string_char [| char_for_backslash lexbuf.Lexeme.[1] |]; string2 lexbuf }
- | eof
- { raise(Lexical_error("unterminated string", "", 0, 0)) }
- | newline { newlineL lexbuf; string2 lexbuf }
- | _
- { store_string_char lexbuf.Lexeme; string2 lexbuf }
- (*
- and location = parse
- | newline { newlineL lexbuf; string lexbuf }
- | whitespace { location lexbuf }
- | '#' identifier { reset_string_buffer(); store_string_char lexbuf.Lexeme; handle_lexical_error locCounter lexbuf; LOCATIONRAW(get_stored_string()) }
- | _ { failwithf "перед началом локации обнаружено это %A" lexbuf.Lexeme }
- and locCounter = parse
- | "--- " identifier " ---------------------------------" { store_string_char lexbuf.Lexeme }
- | eof { raise(Lexical_error("конец файла наступил раньше чем конец локации", "", 0, 0)) }
- | _ { store_string_char lexbuf.Lexeme; locCounter lexbuf } *)
|