Regular Expressions in OCaml

Our first program demonstrates the use of regular expressions in OCaml. Here’s the full source code:

open Str

let main () =
  (* This tests whether a pattern matches a string. *)
  let match_result = string_match (regexp "p[a-z]+ch") "peach" 0 in
  Printf.printf "%b\n" match_result;

  (* We use the Str.regexp function to compile an optimized Str.regexp struct. *)
  let r = regexp "p[a-z]+ch" in

  (* Many functions are available on these structs. Here's a match test like we saw earlier. *)
  Printf.printf "%b\n" (string_match r "peach" 0);

  (* This finds the match for the regexp. *)
  Printf.printf "%s\n" (matched_string (string_match r "peach punch" 0));

  (* This also finds the first match but returns the start and end indexes for the match. *)
  let (start_idx, end_idx) = matched_group 0 (string_match r "peach punch" 0) in
  Printf.printf "idx: [%d %d]\n" start_idx end_idx;

  (* The group variants include information about both the whole-pattern matches and the submatches. *)
  let result = string_match r "peach punch" 0 in
  Printf.printf "[%s %s]\n" (matched_string result) (matched_group 1 result);

  (* To find all matches for a regexp, we can use a recursive function. *)
  let rec find_all_matches str pos =
    try
      let result = search_forward r str pos in
      (matched_string result) :: (find_all_matches str (result + 1))
    with Not_found -> []
  in
  let all_matches = find_all_matches "peach punch pinch" 0 in
  Printf.printf "%s\n" (String.concat " " all_matches);

  (* The Str module can also be used to replace subsets of strings with other values. *)
  Printf.printf "%s\n" (global_replace (regexp "p[a-z]+ch") "<fruit>" "a peach");

  (* We can use a custom function to transform matched text. *)
  let upper_case_match s = String.uppercase_ascii (matched_string s) in
  Printf.printf "%s\n" (global_substitute (regexp "p[a-z]+ch") upper_case_match "a peach")

let _ = main ()

To run the program, save it as regular_expressions.ml and use ocamlc to compile it:

$ ocamlc -o regular_expressions str.cma regular_expressions.ml
$ ./regular_expressions
true
true
peach
idx: [0 5]
[peach ea]
peach punch pinch
a <fruit>
a PEACH

This example demonstrates various regular expression operations in OCaml using the Str module. Note that OCaml’s regular expression support is not as extensive as some other languages, so some advanced features may not be directly available.

For a complete reference on OCaml regular expressions, check the Str module documentation in the OCaml manual.