lexbuf.ml 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. open StdLabels
  2. type t = {
  3. buffer : Sedlexing.lexbuf;
  4. mutable start_p : Lexing.position option;
  5. state : state Stack.t;
  6. reset_line : bool;
  7. }
  8. and lexer = t -> Tokens.token
  9. and buffer_builder = ?nested:bool -> Buffer.t -> lexer
  10. and stringWraper = {
  11. start_string : lexer -> lexer;
  12. (** Start a new string. This function is used insed the token lexer, in
  13. order to identify how to start a new string *)
  14. wrap : buffer_builder -> buffer_builder;
  15. (** function used to escape the character and add it to the buffer. This
  16. function is used inside the string lexer. *)
  17. end_string : lexer;
  18. (** Function used to match the end of the string. This function is used
  19. after the string lexer, in order to identify the end patten for a
  20. string *)
  21. }
  22. and state =
  23. | Token
  24. | String of stringWraper
  25. | MString of int
  26. | EndString of stringWraper
  27. | Expression
  28. let pp_state format = function
  29. | Token -> Format.fprintf format "Token"
  30. | String _ -> Format.fprintf format "String"
  31. | MString _ -> Format.fprintf format "MString"
  32. | EndString _ -> Format.fprintf format "EndString"
  33. | Expression -> Format.fprintf format "Expression"
  34. let state : t -> state option = fun t -> Stack.top_opt t.state
  35. let enter_state : t -> state -> unit = fun t state -> Stack.push state t.state
  36. let leave_state : t -> unit = fun t -> ignore @@ Stack.pop_opt t.state
  37. let buffer : t -> Sedlexing.lexbuf = fun t -> t.buffer
  38. let start : t -> unit =
  39. fun t ->
  40. let _start_pos, end_pos = Sedlexing.lexing_positions t.buffer in
  41. let () =
  42. if not t.reset_line then
  43. Sedlexing.set_position t.buffer { end_pos with Lexing.pos_lnum = 1 }
  44. in
  45. Stack.clear t.state;
  46. t.start_p <- None
  47. let positions : t -> Lexing.position * Lexing.position =
  48. fun t ->
  49. let default, end_p = Sedlexing.lexing_positions t.buffer in
  50. let start_p = Option.value ~default t.start_p in
  51. (start_p, end_p)
  52. let content : t -> string = fun t -> Sedlexing.Utf8.lexeme t.buffer
  53. let from_lexbuf : ?reset_line:bool -> Sedlexing.lexbuf -> t =
  54. fun ?(reset_line = true) t ->
  55. { buffer = t; start_p = None; reset_line; state = Stack.create () }
  56. let set_start_position : t -> Lexing.position -> unit =
  57. fun t position ->
  58. match t.start_p with
  59. | None -> t.start_p <- Some position
  60. | _ ->
  61. (* We are already inside a block code, don’t stack it *)
  62. ()
  63. let tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position
  64. =
  65. fun f t ->
  66. let lexer () =
  67. (* Clear the previous registered start position if any *)
  68. t.start_p <- None;
  69. let token = f t in
  70. let default, curr_p = positions t in
  71. let start_p = Option.value ~default t.start_p in
  72. t.start_p <- None;
  73. (token, start_p, curr_p)
  74. in
  75. lexer
  76. let rollback : t -> unit = fun t -> Sedlexing.rollback t.buffer
  77. let overlay : t -> lexer -> lexer =
  78. fun t lexer ->
  79. let rev_list = Stack.fold (fun acc a -> a :: acc) [] t.state in
  80. List.fold_left rev_list ~init:lexer ~f:(fun (acc : lexer) layer ->
  81. match layer with
  82. | String wraper | EndString wraper -> wraper.start_string acc
  83. | _ -> acc)