diff --git a/README.md b/README.md index 2c5bb99..0612d63 100644 --- a/README.md +++ b/README.md @@ -142,12 +142,14 @@ The syntax follow Perl's syntax: ## Limitations -### No Pattern Guards +### No Pattern Guards for `ppx_tyre` -Pattern guards are not supported. This is due to the fact that all match -cases are combined into a single regular expression, so if one of the -patterns succeed, the match is committed before we can check the guard -condition. +Pattern guards are not supported in `ppx_tyre`. This is due to the fact that all match +cases are combined into a single regular expression, so if one of the patterns succeed, +the match is committed before we can check the guard condition. + +`ppx_regexp` does support pattern guards by grouping cases with identical patterns +and generating monadic handler functions that evaluate guards sequentially after a pattern matches. ### No Exhaustiveness Check diff --git a/ppx_regexp.opam b/ppx_regexp.opam index 2b629e6..0f0b1ba 100644 --- a/ppx_regexp.opam +++ b/ppx_regexp.opam @@ -10,7 +10,7 @@ bug-reports: "https://github.com/paurkedal/ppx_regexp/issues" depends: [ "ocaml" {>= "4.02.3"} "dune" {>= "1.11"} - "ppxlib" {>= "0.9.0"} + "ppxlib" {>= "0.9.0" & <= "0.35.0"} "re" {>= "1.7.2"} "qcheck" {with-test} ] diff --git a/ppx_regexp/ppx_regexp.ml b/ppx_regexp/ppx_regexp.ml index 6b095da..e49dcf3 100644 --- a/ppx_regexp/ppx_regexp.ml +++ b/ppx_regexp/ppx_regexp.ml @@ -137,69 +137,166 @@ let rec wrap_group_bindings ~loc rhs offG = function let [%p ppat_var ~loc varG] = [%e eG] in [%e wrap_group_bindings ~loc rhs offG bs]] +let rec separate_defaults acc = function + | [] -> List.rev acc, [] + | ({ pc_lhs = { ppat_desc = Ppat_any; _ }; _ } as case) :: rest -> acc, case :: rest + | ({ pc_lhs = { ppat_desc = Ppat_var _; _ }; _ } as case) :: rest -> acc, case :: rest + | case :: rest -> separate_defaults (case :: acc) rest + +let make_default_rhs ~loc = function + | [] -> + let open Lexing in + let pos = loc.Location.loc_start in + let e0 = estring ~loc pos.pos_fname in + let e1 = eint ~loc pos.pos_lnum in + let e2 = eint ~loc (pos.pos_cnum - pos.pos_bol) in + let e = [%expr raise (Match_failure ([%e e0], [%e e1], [%e e2]))] in + warn ~loc "A universal case is recommended." e + | default_cases -> + let transformed = + List.map + (fun case -> + match case.pc_lhs.ppat_desc with + | Ppat_var var -> + { + case with + pc_lhs = ppat_any ~loc; + pc_rhs = + [%expr + let [%p ppat_var ~loc var] = _ppx_regexp_v in + [%e case.pc_rhs]]; + } + | _ -> case) + default_cases + in + match transformed with + | [{ pc_lhs = { ppat_desc = Ppat_any; _ }; pc_guard = None; pc_rhs; _ }] -> + pc_rhs + | _ -> + pexp_match ~loc [%expr _ppx_regexp_v] transformed + let transform_cases ~loc cases = let aux case = - if case.pc_guard <> None then - error ~loc "Guards are not implemented for match%%pcre." - else - Ast_pattern.(parse (pstring __')) loc case.pc_lhs - begin fun {txt = re_src; loc = {loc_start; loc_end; _}} -> - let re_offset = - (loc_end.pos_cnum - loc_start.pos_cnum - String.length re_src) / 2 - in - let pos = {loc_start with pos_cnum = loc_start.pos_cnum + re_offset} in - let re, bs, nG = extract_bindings ~pos re_src in - (re, nG, bs, case.pc_rhs) + Ast_pattern.(parse (pstring __')) + loc case.pc_lhs + begin + fun { txt = re_src; loc = { loc_start; loc_end; _ } } -> + let re_offset = (loc_end.pos_cnum - loc_start.pos_cnum - String.length re_src) / 2 in + let pos = { loc_start with pos_cnum = loc_start.pos_cnum + re_offset; pos_lnum = loc_end.pos_lnum } in + let re, bs, nG = extract_bindings ~pos re_src in + re, nG, bs, case.pc_rhs, case.pc_guard + end + in + let group_by_pattern cases = + List.fold_left + begin + fun acc (re, nG, bs, rhs, guard) -> + let found, groups = + List.fold_left + begin + fun (found, acc_groups) (re', cases) -> + if found then found, (re', cases) :: acc_groups + else if re = re' then true, (re', (nG, bs, rhs, guard) :: cases) :: acc_groups + else false, (re', cases) :: acc_groups + end + (false, []) acc + in + if found then groups else (re, [ nG, bs, rhs, guard ]) :: groups end + [] cases in - let cases, default_rhs = - (match List.rev (*_map rewrite_case*) cases with - | {pc_lhs = {ppat_desc = Ppat_any; _}; pc_rhs; pc_guard = None} :: cases -> - (cases, pc_rhs) - | {pc_lhs = {ppat_desc = Ppat_var var; _}; pc_rhs; pc_guard = None} :: - cases -> - let rhs = - [%expr let [%p ppat_var ~loc var] = _ppx_regexp_v in [%e pc_rhs]] in - (cases, rhs) - | cases -> - let open Lexing in - let pos = loc.Location.loc_start in - let e0 = estring ~loc pos.pos_fname in - let e1 = eint ~loc pos.pos_lnum in - let e2 = eint ~loc (pos.pos_cnum - pos.pos_bol) in - let e = [%expr raise (Match_failure ([%e e0], [%e e1], [%e e2]))] in - (cases, warn ~loc "A universal case is recommended for %pcre." e)) + + let compute_offsets l = + let result, _ = + List.fold_left + begin + fun (acc, offG) (re, case_group) -> + let nG = + let n, _, _, _ = List.hd (List.rev case_group) in + n + in + (re, case_group, offG) :: acc, offG + nG + end + ([], 0) l + in + List.rev result in - let cases = List.rev_map aux cases in - let res = pexp_array ~loc (List.map (fun (re, _, _, _) -> re) cases) in - let comp = [%expr - let a = Array.map (fun s -> Re.mark (Re.Perl.re s)) [%e res] in - let marks = Array.map fst a in - let re = Re.compile (Re.alt (Array.to_list (Array.map snd a))) in - (re, marks) - ] in + + let cases, default_cases = separate_defaults [] cases in + let default_rhs = make_default_rhs ~loc default_cases in + let processed_cases = List.map aux cases |> group_by_pattern |> compute_offsets in + + let res = pexp_array ~loc @@ List.map (fun (re, _, _) -> re) processed_cases in + + let comp = + [%expr + let a = Array.map (fun s -> Re.mark (Re.Perl.re s)) [%e res] in + let marks = Array.map fst a in + let re = Re.compile (Re.alt (Array.to_list (Array.map snd a))) in + re, marks] + in + let var = fresh_var () in - let re_binding = - value_binding ~loc ~pat:(ppat_var ~loc {txt = var; loc}) ~expr:comp + let re_binding = value_binding ~loc ~pat:(ppat_var ~loc { txt = var; loc }) ~expr:comp in + let e_comp = pexp_ident ~loc { txt = Lident var; loc } in + + let case_handlers = + List.mapi + begin + fun i (_, case_group, offG) -> + let handler_name = Printf.sprintf "_case_%d" i in + let handler_body = + let rec mk_guard_chains = function + | [] -> [%expr None] + | (_, bs, rhs, guard) :: rest -> + let bs = List.rev bs in + begin + match guard with + | None -> [%expr Some [%e wrap_group_bindings ~loc rhs offG bs]] + | Some guard_expr -> + let guarded = [%expr if [%e guard_expr] then Some [%e rhs] else [%e mk_guard_chains rest]] in + wrap_group_bindings ~loc guarded offG bs + end + in + [%expr fun _g -> [%e mk_guard_chains (List.rev case_group)]] + in + handler_name, handler_body + end in - let e_comp = pexp_ident ~loc {txt = Lident var; loc} in - - let rec handle_cases i offG = function - | [] -> [%expr assert false] - | (_, nG, bs, rhs) :: cases -> - [%expr - if Re.Mark.test _g (snd [%e e_comp]).([%e eint ~loc i]) then - [%e wrap_group_bindings ~loc rhs offG bs] - else - [%e handle_cases (i + 1) (offG + nG) cases]] + + let mk_checks cases_with_offsets = + let indexed = List.mapi (fun i x -> i, x) cases_with_offsets in + List.fold_right + begin + fun (i, _) acc -> + let handler_name = Printf.sprintf "_case_%d" i in + [%expr + if Re.Mark.test _g (snd [%e e_comp]).([%e eint ~loc i]) then [%e pexp_ident ~loc { txt = Lident handler_name; loc }] _g + else [%e acc]] + end + indexed [%expr None] in - let cases = + + let handlers = case_handlers processed_cases in + let dispatchers = mk_checks processed_cases in + + let match_expr = [%expr - (match Re.exec_opt (fst [%e e_comp]) _ppx_regexp_v with - | None -> [%e default_rhs] - | Some _g -> [%e handle_cases 0 0 cases])] + match Re.exec_opt (fst [%e e_comp]) _ppx_regexp_v with + | None -> [%e default_rhs] + | Some _g -> + [%e + List.fold_left + begin + fun acc (name, body) -> + [%expr + let [%p ppat_var ~loc { txt = name; loc }] = [%e body] in + [%e acc]] + end + [%expr match [%e dispatchers] with Some result -> result | None -> [%e default_rhs]] + handlers]] in - (cases, re_binding) + match_expr, re_binding let transformation = object inherit [value_binding list] Ast_traverse.fold_map as super