Date Tags ocaml

A simple module to parse a uri (well, a small subset of the RFC 3986). The Ocamlnet library contains a better implementation, but it also comes with a lot of dependencies…

Grab it, use it, change it, give it away but remember me :) …

(* ref : http://labs.apache.org/webarch/uri/rfc/rfc3986.html#collected-abnf *)
(* ref : http://stackoverflow.com/questions/161738/what-is-the-best-regular-expression-to-check-if-a-string-is-a-valid-url *)

type authority = {
  userinfo : string option;
  host : string;
  port : int option
}

type url = {
  url : string;
  scheme : string;
  authority : authority option;
  path   : string; 
  query  : (string * string) list; 
};;

let try_with f =
  try Some(Lazy.force f) with Not_found -> None
;;

let parse_authority s =
  let re_s = 
    "(?:(?P<userinfo>(?:[A-Za-z0-9\\-._~!$&\\'()*+,;=:]|%[0-9A-Fa-f]{2})*)@)?" ^
    "(?P<host>(?:[A-Za-z0-9\\-._~!$&\\'()*+,;=]|%[0-9A-Fa-f]{2})+)" ^
    "(?::(?P<port>[0-9]*))?"
  in
  let rex = Pcre.regexp re_s in
  let res = Pcre.exec ~rex s in
  {
    userinfo = try_with (lazy(Pcre.get_substring res 1));
    host = Pcre.get_substring res 2;
    port = try_with (lazy(int_of_string (Pcre.get_substring res 3)))
  }
;;

let parse_query s = 
  let re_s = 
    "(?:\\?(?P<query>(?:[A-Za-z0-9\\-._~!$&\\'()*+,;=:@\\\\/?]|%[0-9A-Fa-f]{2})*))?"
  in
  let rex = Pcre.regexp re_s in
  let res = Pcre.exec ~rex s in
  List.map (fun s ->
    let a = Pcre.asplit ~rex:(Pcre.regexp "=") s in
    (a.(0),a.(1))
  )
  (Pcre.split ~rex:(Pcre.regexp ";") (Pcre.get_substring res 1))
;;

(* we parse uri of type schema://user:pass@machine:port/path/to/file?a=1;b=2 *)
let parse_remote_uri s =
  let re_s = "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?" in
  let rex = Pcre.regexp re_s in
  let res = Pcre.exec ~rex s in
  try
    {
      url = Pcre.get_substring res 1;
      scheme = Pcre.get_substring res 2;
      authority = try_with (lazy(parse_authority (Pcre.get_substring res 4)));
      path = Pcre.get_substring res 5;
      query = try parse_query (Pcre.get_substring res 6) with Not_found -> []
    }
  with Not_found -> failwith (Printf.sprintf "Malformed Url : %s" s)
;;

(* we parse uri of the type schema:/path/to/file *)
let parse_local_uri s =
  let re_s = "^(([^:/?#]+):)?(([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?" in
  let rex = Pcre.regexp re_s in
  let res = Pcre.exec ~rex s in
  try
    {
      url = Pcre.get_substring res 1;
      scheme = Pcre.get_substring res 2;
      authority = None;
      path = (Pcre.get_substring res 4)^(Pcre.get_substring res 5);
      query = []
    }
  with Not_found -> failwith (Printf.sprintf "Malformed Url : %s" s)
;;

let parse_uri ?(local=true) s =
  if local then parse_local_uri s
  else parse_remote_uri s
;;
# parse_url "sqlite://user:secret@machine:42/a:c/b/c?a=1;b=2";;
- : url =
{url = "sqlite:"; scheme = "sqlite";
 authority =
  Some {userinfo = Some "user:secret"; host = "machine"; port = Some 42};
 path = "/a:c/b/c"; query = [("a", "1"); ("b", "2")]}

# parse_url "file:/aa/vv/bb";;
- : url =
{url = "file:"; scheme = "file"; authority = None; path = "/aa/vv/bb"; query = []}