Chimrod
/
qsp_parser


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
							open StdLabels

type t = {
  buffer : Sedlexing.lexbuf;
  mutable start_p : Lexing.position option;
  state : state Stack.t;
  reset_line : bool;
  mutable recovering : bool;
}

and lexer = t -> Tokens.token
and buffer_builder = ?nested:bool -> Buffer.t -> lexer

and stringWraper = {
  start_string : lexer -> lexer;
      (** Start a new string. This function is used insed the token lexer, in
          order to identify how to start a new string *)
  wrap : buffer_builder -> buffer_builder;
      (** function used to escape the character and add it to the buffer. This
          function is used inside the string lexer. *)
  end_string : lexer;
      (** Function used to match the end of the string. This function is used
          after the string lexer, in order to identify the end patten for a
          string *)
}

and state =
  | Token
  | String of stringWraper
  | MString of int
  | EndString of stringWraper
  | Expression

let pp_state format = function
  | Token -> Format.fprintf format "Token"
  | String _ -> Format.fprintf format "String"
  | MString _ -> Format.fprintf format "MString"
  | EndString _ -> Format.fprintf format "EndString"
  | Expression -> Format.fprintf format "Expression"

let state : t -> state option = fun t -> Stack.top_opt t.state
let enter_state : t -> state -> unit = fun t state -> Stack.push state t.state
let leave_state : t -> unit = fun t -> ignore @@ Stack.pop_opt t.state
let buffer : t -> Sedlexing.lexbuf = fun t -> t.buffer

let start : t -> unit =
 fun t ->
  let _start_pos, end_pos = Sedlexing.lexing_positions t.buffer in
  let () =
    if not t.reset_line then
      Sedlexing.set_position t.buffer { end_pos with Lexing.pos_lnum = 1 }
  in
  Stack.clear t.state;
  t.start_p <- None;
  t.recovering <- false

let positions : t -> Lexing.position * Lexing.position =
 fun t ->
  let default, end_p = Sedlexing.lexing_positions t.buffer in
  let start_p = Option.value ~default t.start_p in
  (start_p, end_p)

let content : t -> string = fun t -> Sedlexing.Utf8.lexeme t.buffer

let from_lexbuf : ?reset_line:bool -> Sedlexing.lexbuf -> t =
 fun ?(reset_line = true) t ->
  {
    buffer = t;
    start_p = None;
    reset_line;
    state = Stack.create ();
    recovering = false;
  }

let set_start_position : t -> Lexing.position -> unit =
 fun t position ->
  match t.start_p with
  | None -> t.start_p <- Some position
  | _ ->
      (* We are already inside a block code, don’t stack it *)
      ()

let tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position
    =
 fun f t ->
  let lexer () =
    (* Clear the previous registered start position if any *)
    t.start_p <- None;
    let token = f t in
    let default, curr_p = positions t in

    let start_p = Option.value ~default t.start_p in
    t.start_p <- None;

    (token, start_p, curr_p)
  in
  lexer

let rollback : t -> unit = fun t -> Sedlexing.rollback t.buffer

let overlay : t -> lexer -> lexer =
 fun t lexer ->
  let rev_list = Stack.fold (fun acc a -> a :: acc) [] t.state in
  List.fold_left rev_list ~init:lexer ~f:(fun (acc : lexer) layer ->
      match layer with
      | String wraper | EndString wraper -> wraper.start_string acc
      | _ -> acc)

let start_recovery : t -> unit = fun t -> t.recovering <- true
let is_recovery : t -> bool = fun t -> t.recovering