transparently open compressed files in ocaml

The Pervasive.in_channel in the ocaml std library is not extensible. If you want to mix different in_channel, for example, one from Pervasive and on from the Gzip library, then you are in trouble. The good fellows of extlib solved this problem providing an extensible stream data type in the IO library [1] . This is a small example using that module.

let gzip_open_in file =
    let ch = Gzip.open_in file in
    IO.create_in
    ~read:(fun () -> Gzip.input_char ch)
    ~input:(Gzip.input ch)
    ~close:(fun () -> Gzip.close_in ch)

let std_open_in file =
    let ch = open_in file in
    IO.create_in
    ~read:(fun () -> input_char ch)
    ~input:(input ch)
    ~close:(fun () -> close_in ch)

let main () =
    let file = Sys.argv.(1) in
    let ch =
        if Filename.check_suffix file ".gz" then gzip_open_in file 
        else std_open_in file
    in
    try
        while true do
            print_endline (IO.read_line ch)
        done
    with End_of_file ->
        IO.close_in ch
;;

main ();;

untested compile commands (I use ocamlbuild + custom myocamlbuild.ml):

ocamlfind ocamldep -package extlib -package zip -modules test.ml > test.ml.depends
ocamlfind ocamlc -c -package extlib -package zip -o test.cmo test.ml
ocamlfind ocamlc -linkpkg -package extlib -package zip test.cmo -o test.byte

[1] http://ocaml-lib.sourceforge.net/doc/IO.html


avalaible bdd libraries

UPDATE

In the end I found ocaml bindings for the Buddy BDD library. yuppi \o/


BDDs or Binary Decision Diagrams are a method of representing boolean expressions. I searched the net for available BDD libraries (I’ve considered different BDD variants in my research). In particular I focused on OCaml implementations. My conclusion is that as today there is no viable native implementation of an efficient bdd library. It seems common knowledge (take this cum granis salis , I haven’t done any work in this direction) that the fastest bdd library is buddy, but there are not OCaml bindings to it.

Next step would be to run few tests and to evaluate the available OCaml implementations.

These are my findings. I’m sure the list is not exhaustive, also because many bdd implementations are usually anonymously embedded as part of larger projects. In particular the model checking community and the planning community do extensive use of BDDs :

First of all what is a bdd : Wikipedia .

Ocaml libraries (bindings and native) :

  • Jean-Christophe Filliâtre (ocaml implementation) Paper Code

  • bindings to the CUDD BDD library Code

  • Olivier Michel (ocaml implementation) Code

  • Xavier Leroy (part of an experimental sat solver) Code

  • John Harrison Code

  • Ocaml implementation (who is the author ?) Wiki

C/C++ Libraries

Other Languages

Relevant Mailing list Messages

The ocaml ml has several references to BDDs. These are 3 interesting threads that I’ve used as a starting point for my research.

  • In this Thread there is mention of a possible binding for Jørn Lind-Nielsen’s BDD library BuDDy. I’m wondering if this binding was ever released.

  • In this Thread Alain Frish points out that none of existing ocaml libraries implements automatic reordering of variables… And I don’t know if the state of affairs is changed at this regard.

  • In this Thread David Mentre announces a preliminary work on binding for the cudd library, but the link is broken… and there is a mention to a caml-light implementation of a robdd library that I was also not able to retrieve.


how to add a custom lexer to camlp4

Date Tags ocaml

Adding a custom parser in the old camlp4 (now camlp5) was relatively easy. The new camlp4 is quite different. The problem was discussed in two recent threads in the ocaml mailing list here and here.

The main point is to provide a new Lexer module with a compatible signature with the Camlp4 lexer.

There are 3 camlp4 modules that should be defined, namely Loc, Token and Error. The signature to redefine a camlp4 lexer is

open Camlp4.Sig

type token =
  | KWD of string
  | CHAR of char
  | EOI

exception Error of int * int * string

module Loc   : Loc with type t = int * int
module Token : Token with module Loc = Loc and type t = token
module Error : Error

val mk : unit -> (Loc.t -> char Stream.t -> (Token.t * Loc.t) Stream.t)

I still don’t understand enough about the camlp4 internal to comment about it. I’ve reused the cduce lexer as starting point and added a small lexer for regular expressions.

The complete code (lexer + parser) is Here


Recursive subtyping revealed

The other day I read the article: “Recursive Subtyping revealed”. A Functional Pearl by Gapeyev, Levin and Pierce. This is a bit of code I wrote to convince myself of the algorithm described in the paper.

You can get the article here

type at =
    |Top |Nat |Bool |Even | Odd
    |Times of ( at * at )
    |Arrow of ( at * at )
    |Mu of ( string * at )
    |Var of string

module S = SortedList.Make(
    struct
        type t = (at * at)
        let compare = compare
        let hash = Hashtbl.hash
        let equal (t1,t2) (s1,s2)  =
          ((compare t1 s1) = 0) && ((compare t2 s2) = 0)
    end
    )

module B = SortedList.Make(
    struct
        type t = string
        let compare = compare
        let hash = Hashtbl.hash
        let equal t s  = (compare t s) = 0
    end
    )

(* canonical substitution *)
let rec sub x t = function
    |Var s when s = x -> t
    |Times(t1,t2) -> Times(sub x t t1,sub x t t2)
    |Arrow(t1,t2) -> Arrow(sub x t t1,sub x t t2)
    |_ as t -> t

(* free variables *)
let rec fv = function
    |Var t -> B.add t B.empty
    |Times(t1,t2) -> B.cup (fv t1) (fv t2)
    |Arrow(t1,t2) -> B.cup (fv t1) (fv t2)
    |_ -> B.empty

exception Undef of (at * at)

let rec subtype (a,s,t) =
    if s = t then S.add (s,t) a
    else if List.mem (s,t) a then a
    else
        let a0 = S.add (s,t) a in
        match (s,t) with
        |_,Top -> a0
        |Mu(x,s1),_ ->
                subtype(a0,sub x s s1,t)
        |_,Mu(x,t1) ->
                subtype(a0,s,sub x t t1)
        |Times(s1,s2),Times(t1,t2) ->
                let a1 = subtype(a0,s1,t1) in
                subtype(a1,s2,t2)
        |Arrow(s1,s2),Arrow(t1,t2) ->
                let a1 = subtype(a0,t1,s1) in
                subtype(a1,s2,t2)
        |_ -> raise (Undef (s,t))

let (<:) t s = subtype([],t,s);;

(* ------------- parser -------------------*)
open Camlp4.PreCast
module TypeGram = MakeGram(Lexer)

let expression = TypeGram.Entry.mk "expression"

EXTEND TypeGram
  GLOBAL: expression;

  expression: [
      "arrow" [ e1 = SELF; "->"; e2 = SELF -> Arrow(e1, e2) ]
    | "prod"  [ e1 = SELF; "x";  e2 = SELF -> Times(e1, e2) ]
    | "var" [
          "Nat" -> Nat
        | "Even" -> Even
        | "Odd" -> Odd
        | "Bool" -> Bool
        | "T" -> Top
        | "mu"; `UIDENT x ; "." ; e = SELF -> Mu(x,e)
        | `UIDENT x -> Var x
        | "("; e = SELF; ")" -> e
    ]
  ];

END

let p s = TypeGram.parse_string expression (Loc.mk "<string>") s
(* ------------- parser -------------------*)

(* Examples *)
let a = p "mu X . ( Nat -> (Nat x X))" ;;
let b = p "Nat -> (Nat x (mu X . (Nat -> (Nat x X))))" ;;

let d = p "mu X . ( Nat -> (Even x X))" ;;
let e = p "mu X . ( Even -> (Nat x X))" ;;

(* suppose Even is subtype of Nat *)
let (<::) t s = subtype([Even,Nat],t,s);;
d <:: e;;

ocamlbuild + camlp4 + ocamlfind

Date Tags ocaml

Recently I tried to answer to a problem posed on the OCaml mailing list. Basically the problem is how to compile using ocamlbuild using ocamlfind and camlp4. The Camlp4 wiki has already all the ingredients. Here I mix them up in a short example.

_tags : is the ocamlbuild dep file

bar.ml : is a ml file that uses the syntax extension pa_float to

compile.

foo.ml : is a ml file that depends on bar.ml and the str module

(fetched via ocamlfind)

pa_float.ml : is the source code of the camlp4 syntax extension.

The code

_tags

"pa_float.ml": use_camlp4, pp(camlp4of)
"bar.ml": camlp4o, use_float

bar.ml

let x = Float.( 3/2 - sqrt (1/3) )
let f x =
  Float.(
    let pi = acos(-1) in
    x/(2*pi) - x**(2/3)
  )

foo.ml

open Str
let x = Bar.x

myocamlbuild.ml

open Ocamlbuild_plugin;;
open Command;;

let packages = "str";;

let ocamlfind x = S[A"ocamlfind"; x; A"-package"; A packages];;

dispatch begin function
| Before_options ->
    Options.ocamlc := ocamlfind& A"ocamlc";
    Options.ocamlopt := ocamlfind& A"ocamlopt";

| After_rules ->
    flag ["ocaml"; "pp"; "use_float"] (A"pa_float.cmo");
    flag ["ocaml"; "link"] (A"-linkpkg");
    dep  ["ocaml"; "ocamldep"; "use_float"] ["pa_float.cmo"];
| _ -> ()
end;;

pa_float.ml

module Id = struct
  let name = "pa_float"
  let version = "1.0"
end

open Camlp4

module Make (Syntax : Sig.Camlp4Syntax) = struct
  open Sig
  include Syntax

  class float_subst _loc = object
    inherit Ast.map as super
    method _Loc_t _ = _loc
    method expr =
      function
      | <:expr< ( + ) >> -> <:expr< ( +. ) >>
      | <:expr< ( - ) >> -> <:expr< ( -. ) >>
      | <:expr< ( * ) >> -> <:expr< ( *. ) >>
      | <:expr< ( / ) >> -> <:expr< ( /. ) >>
      | <:expr< $int:i$ >> ->
        let f = float(int_of_string i) in <:expr< $`flo:f$ >>
      | e -> super#expr e
  end;;

  EXTEND Gram
    GLOBAL: expr;

    expr: LEVEL "simple"
    [ [ "Float"; "."; "("; e = SELF; ")" -> (new float_subst _loc)#expr e ]
    ]
    ;
  END
end

let module M = Register.OCamlSyntaxExtension Id Make in ()

To Compile

$ocamlbuild foo.byte -classic-display
/usr/bin/ocamlopt -I /usr/lib/ocaml/3.10.0/ocamlbuild unix.cmxa /usr/lib/ocaml/3.10.0/ocamlbuild/ocamlbuildlib.cmxa myocamlbuild.ml
+/usr/lib/ocaml/3.10.0/ocamlbuild/ocamlbuild.cmx -o myocamlbuild
/usr/bin/ocamldep -modules foo.ml > foo.ml.depends
/usr/bin/ocamldep -pp camlp4of -modules pa_float.ml > pa_float.ml.depends
ocamlfind ocamlc -package str -c -I +camlp4 -pp camlp4of -o pa_float.cmo pa_float.ml
/usr/bin/ocamldep -pp 'camlp4o pa_float.cmo' -modules bar.ml > bar.ml.depends
ocamlfind ocamlc -package str -c -pp 'camlp4o pa_float.cmo' -o bar.cmo bar.ml
ocamlfind ocamlc -package str -c -o foo.cmo foo.ml
ocamlfind ocamlc -package str -linkpkg bar.cmo foo.cmo -o foo.byte