lexbuf.ml 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. open StdLabels
  2. type t = {
  3. buffer : Sedlexing.lexbuf;
  4. mutable start_p : Lexing.position option;
  5. state : state Stack.t;
  6. reset_line : bool;
  7. mutable recovering : bool;
  8. }
  9. and lexer = t -> Tokens.token
  10. and buffer_builder = ?nested:bool -> Buffer.t -> lexer
  11. and stringWraper = {
  12. start_string : lexer -> lexer;
  13. (** Start a new string. This function is used insed the token lexer, in
  14. order to identify how to start a new string *)
  15. wrap : buffer_builder -> buffer_builder;
  16. (** function used to escape the character and add it to the buffer. This
  17. function is used inside the string lexer. *)
  18. end_string : lexer;
  19. (** Function used to match the end of the string. This function is used
  20. after the string lexer, in order to identify the end patten for a
  21. string *)
  22. }
  23. and state =
  24. | Token
  25. | String of stringWraper
  26. | MString of int
  27. | EndString of stringWraper
  28. | Expression
  29. let pp_state format = function
  30. | Token -> Format.fprintf format "Token"
  31. | String _ -> Format.fprintf format "String"
  32. | MString _ -> Format.fprintf format "MString"
  33. | EndString _ -> Format.fprintf format "EndString"
  34. | Expression -> Format.fprintf format "Expression"
  35. let state : t -> state option = fun t -> Stack.top_opt t.state
  36. let enter_state : t -> state -> unit = fun t state -> Stack.push state t.state
  37. let leave_state : t -> unit = fun t -> ignore @@ Stack.pop_opt t.state
  38. let buffer : t -> Sedlexing.lexbuf = fun t -> t.buffer
  39. let start : t -> unit =
  40. fun t ->
  41. let _start_pos, end_pos = Sedlexing.lexing_positions t.buffer in
  42. let () =
  43. if not t.reset_line then
  44. Sedlexing.set_position t.buffer { end_pos with Lexing.pos_lnum = 1 }
  45. in
  46. Stack.clear t.state;
  47. t.start_p <- None;
  48. t.recovering <- false
  49. let positions : t -> Lexing.position * Lexing.position =
  50. fun t ->
  51. let default, end_p = Sedlexing.lexing_positions t.buffer in
  52. let start_p = Option.value ~default t.start_p in
  53. (start_p, end_p)
  54. let content : t -> string = fun t -> Sedlexing.Utf8.lexeme t.buffer
  55. let from_lexbuf : ?reset_line:bool -> Sedlexing.lexbuf -> t =
  56. fun ?(reset_line = true) t ->
  57. {
  58. buffer = t;
  59. start_p = None;
  60. reset_line;
  61. state = Stack.create ();
  62. recovering = false;
  63. }
  64. let set_start_position : t -> Lexing.position -> unit =
  65. fun t position ->
  66. match t.start_p with
  67. | None -> t.start_p <- Some position
  68. | _ ->
  69. (* We are already inside a block code, don’t stack it *)
  70. ()
  71. let tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position
  72. =
  73. fun f t ->
  74. let lexer () =
  75. (* Clear the previous registered start position if any *)
  76. t.start_p <- None;
  77. let token = f t in
  78. let default, curr_p = positions t in
  79. let start_p = Option.value ~default t.start_p in
  80. t.start_p <- None;
  81. (token, start_p, curr_p)
  82. in
  83. lexer
  84. let rollback : t -> unit = fun t -> Sedlexing.rollback t.buffer
  85. let overlay : t -> lexer -> lexer =
  86. fun t lexer ->
  87. let rev_list = Stack.fold (fun acc a -> a :: acc) [] t.state in
  88. List.fold_left rev_list ~init:lexer ~f:(fun (acc : lexer) layer ->
  89. match layer with
  90. | String wraper | EndString wraper -> wraper.start_string acc
  91. | _ -> acc)
  92. let start_recovery : t -> unit = fun t -> t.recovering <- true
  93. let is_recovery : t -> bool = fun t -> t.recovering