A simple module to parse a uri (well, a small subset of the RFC 3986). The Ocamlnet library contains a better implementation, but it also comes with a lot of dependencies…
Grab it, use it, change it, give it away but remember me :) …
(* ref : http://labs.apache.org/webarch/uri/rfc/rfc3986.html#collected-abnf *)
(* ref : http://stackoverflow.com/questions/161738/what-is-the-best-regular-expression-to-check-if-a-string-is-a-valid-url *)
type authority = {
userinfo : string option;
host : string;
port : int option
}
type url = {
url : string;
scheme : string;
authority : authority option;
path : string;
query : (string * string) list;
};;
let try_with f =
try Some(Lazy.force f) with Not_found -> None
;;
let parse_authority s =
let re_s =
"(?:(?P<userinfo>(?:[A-Za-z0-9\\-._~!$&\\'()*+,;=:]|%[0-9A-Fa-f]{2})*)@)?" ^
"(?P<host>(?:[A-Za-z0-9\\-._~!$&\\'()*+,;=]|%[0-9A-Fa-f]{2})+)" ^
"(?::(?P<port>[0-9]*))?"
in
let rex = Pcre.regexp re_s in
let res = Pcre.exec ~rex s in
{
userinfo = try_with (lazy(Pcre.get_substring res 1));
host = Pcre.get_substring res 2;
port = try_with (lazy(int_of_string (Pcre.get_substring res 3)))
}
;;
let parse_query s =
let re_s =
"(?:\\?(?P<query>(?:[A-Za-z0-9\\-._~!$&\\'()*+,;=:@\\\\/?]|%[0-9A-Fa-f]{2})*))?"
in
let rex = Pcre.regexp re_s in
let res = Pcre.exec ~rex s in
List.map (fun s ->
let a = Pcre.asplit ~rex:(Pcre.regexp "=") s in
(a.(0),a.(1))
)
(Pcre.split ~rex:(Pcre.regexp ";") (Pcre.get_substring res 1))
;;
(* we parse uri of type schema://user:pass@machine:port/path/to/file?a=1;b=2 *)
let parse_remote_uri s =
let re_s = "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?" in
let rex = Pcre.regexp re_s in
let res = Pcre.exec ~rex s in
try
{
url = Pcre.get_substring res 1;
scheme = Pcre.get_substring res 2;
authority = try_with (lazy(parse_authority (Pcre.get_substring res 4)));
path = Pcre.get_substring res 5;
query = try parse_query (Pcre.get_substring res 6) with Not_found -> []
}
with Not_found -> failwith (Printf.sprintf "Malformed Url : %s" s)
;;
(* we parse uri of the type schema:/path/to/file *)
let parse_local_uri s =
let re_s = "^(([^:/?#]+):)?(([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?" in
let rex = Pcre.regexp re_s in
let res = Pcre.exec ~rex s in
try
{
url = Pcre.get_substring res 1;
scheme = Pcre.get_substring res 2;
authority = None;
path = (Pcre.get_substring res 4)^(Pcre.get_substring res 5);
query = []
}
with Not_found -> failwith (Printf.sprintf "Malformed Url : %s" s)
;;
let parse_uri ?(local=true) s =
if local then parse_local_uri s
else parse_remote_uri s
;;
# parse_url "sqlite://user:secret@machine:42/a:c/b/c?a=1;b=2";;
- : url =
{url = "sqlite:"; scheme = "sqlite";
authority =
Some {userinfo = Some "user:secret"; host = "machine"; port = Some 42};
path = "/a:c/b/c"; query = [("a", "1"); ("b", "2")]}
# parse_url "file:/aa/vv/bb";;
- : url =
{url = "file:"; scheme = "file"; authority = None; path = "/aa/vv/bb"; query = []}