123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110 |
- open StdLabels
- type t = {
- buffer : Sedlexing.lexbuf;
- mutable start_p : Lexing.position option;
- state : state Stack.t;
- reset_line : bool;
- mutable recovering : bool;
- }
- and lexer = t -> Tokens.token
- and buffer_builder = ?nested:bool -> Buffer.t -> lexer
- and stringWraper = {
- start_string : lexer -> lexer;
- (** Start a new string. This function is used insed the token lexer, in
- order to identify how to start a new string *)
- wrap : buffer_builder -> buffer_builder;
- (** function used to escape the character and add it to the buffer. This
- function is used inside the string lexer. *)
- end_string : lexer;
- (** Function used to match the end of the string. This function is used
- after the string lexer, in order to identify the end patten for a
- string *)
- }
- and state =
- | Token
- | String of stringWraper
- | MString of int
- | EndString of stringWraper
- | Expression
- let pp_state format = function
- | Token -> Format.fprintf format "Token"
- | String _ -> Format.fprintf format "String"
- | MString _ -> Format.fprintf format "MString"
- | EndString _ -> Format.fprintf format "EndString"
- | Expression -> Format.fprintf format "Expression"
- let state : t -> state option = fun t -> Stack.top_opt t.state
- let enter_state : t -> state -> unit = fun t state -> Stack.push state t.state
- let leave_state : t -> unit = fun t -> ignore @@ Stack.pop_opt t.state
- let buffer : t -> Sedlexing.lexbuf = fun t -> t.buffer
- let start : t -> unit =
- fun t ->
- let _start_pos, end_pos = Sedlexing.lexing_positions t.buffer in
- let () =
- if not t.reset_line then
- Sedlexing.set_position t.buffer { end_pos with Lexing.pos_lnum = 1 }
- in
- Stack.clear t.state;
- t.start_p <- None;
- t.recovering <- false
- let positions : t -> Lexing.position * Lexing.position =
- fun t ->
- let default, end_p = Sedlexing.lexing_positions t.buffer in
- let start_p = Option.value ~default t.start_p in
- (start_p, end_p)
- let content : t -> string = fun t -> Sedlexing.Utf8.lexeme t.buffer
- let from_lexbuf : ?reset_line:bool -> Sedlexing.lexbuf -> t =
- fun ?(reset_line = true) t ->
- {
- buffer = t;
- start_p = None;
- reset_line;
- state = Stack.create ();
- recovering = false;
- }
- let set_start_position : t -> Lexing.position -> unit =
- fun t position ->
- match t.start_p with
- | None -> t.start_p <- Some position
- | _ ->
- (* We are already inside a block code, don’t stack it *)
- ()
- let tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position
- =
- fun f t ->
- let lexer () =
- (* Clear the previous registered start position if any *)
- t.start_p <- None;
- let token = f t in
- let default, curr_p = positions t in
- let start_p = Option.value ~default t.start_p in
- t.start_p <- None;
- (token, start_p, curr_p)
- in
- lexer
- let rollback : t -> unit = fun t -> Sedlexing.rollback t.buffer
- let overlay : t -> lexer -> lexer =
- fun t lexer ->
- let rev_list = Stack.fold (fun acc a -> a :: acc) [] t.state in
- List.fold_left rev_list ~init:lexer ~f:(fun (acc : lexer) layer ->
- match layer with
- | String wraper | EndString wraper -> wraper.start_string acc
- | _ -> acc)
- let start_recovery : t -> unit = fun t -> t.recovering <- true
- let is_recovery : t -> bool = fun t -> t.recovering
|