transparently open compressed files ocaml (now with bz2 support)

Date Tags ocaml

A long time ago I wrote about how to handle compressed files in ocaml using extlib : http://mancoosi.org/~abate/transparently-open-compressed-files-ocaml

Today I got back to it and added bz2 support. The code is trivial. The only small problem to notice is that since the bz2 interface does not support a char input function, I’ve to simulate it using Bz2.read. A bit of a hack. I want to look at the bz2 bindings to fix this small shortfall. This is the code :

open ExtLib

let gzip_open_file file =
  let ch = Gzip.open_in file in
  let input_char ch = try Gzip.input_char ch with End_of_file -> raise IO.No_more_input in
  let read ch = try Gzip.input ch with End_of_file -> raise IO.No_more_input in
  IO.create_in
  ~read:(fun () -> input_char ch)
  ~input:(read ch)
  ~close:(fun () -> Gzip.close_in ch)
;;

let bzip_open_file file =
  let ch = Bz2.open_in (open_in file) in
  let input_char ch =
   (** XXX ugly ! *)
    try let s = " " in ignore (Bz2.read ch s 0 1) ; s.[0]
    with End_of_file -> raise IO.No_more_input
  in
  let read ch s pos len =
    try Bz2.read ch s pos len
    with End_of_file -> raise IO.No_more_input
  in
  IO.create_in
  ~read:(fun () -> input_char ch)
  ~input:(read ch)
  ~close:(fun () -> Bz2.close_in ch)
;;

let std_open_file file = IO.input_channel (open_in file)
let open_ch ch = IO.input_channel ch
let close_ch ch = IO.close_in ch

let open_file file =
  if Filename.check_suffix file ".gz" || Filename.check_suffix file ".cz" then
    gzip_open_file file
  else
  if Filename.check_suffix file ".bz2" then
    bzip_open_file file
  else
    std_open_file file
;;

let main () =
  let ch = open_file (Sys.argv.(1)) in
  try while true do print_string (IO.nread ch 10240) done
  with IO.No_more_input -> ()
;;

main ();;

ocaml bindings for Buddy BDD

Date Tags bdd / ocaml

I’ve taken some time off to finish the ocaml bindings to Buddy BDD.

This code is a fork of the bindings downloaded from http://nicosia.is.s.u-tokyo.ac.jp/members/hagiya/xml/readme.htm#buddy .The original authors are Akihiko Tozawa and Masami Hagiya and they granted me - via personal communication - the right to release the software as open source.

I’ve added documentation to the .mli (mostly from the buddy website), few functions that were missed in the original work, the build system and an example.

I’ve released the code under GPLv3 . The code is available on github. This snapshot compiles cleanly on debian unstable with the package libbdd-dev installed. There is a micro example in the source. I plan to use the bindings with some real world data in the next few weeks. Debian package is work in progress.

Enjoy. As always feedback is welcome.


ocaml bindings for picosat

The code is on github . Get in touch if you find this useful.

ocaml bingings for picosat

The package provides an example, solver.ml, that reads from standard input a formula and print its satisfiability and a satisfying assignment (if any). A formula is a sequence of lines that either define a variable (‘v’ ), define a clause (‘c’ followed by a sequence of names, with a leading ‘-‘ if they are negated in the clause) and comments (starting with ‘#’). A couple of example formulas are available in the ‘examples/’ directory.

for documentation refer to picosat.h and picosat.mli

Install and use

To compile ocaml-picosat you need the picosat solver (compiled as a dynamic library) installed on the system.

The library assumes : /usr/include/picosat/picosat.h and /usr/lib/libpicosat.so.1.0.0

Picosat is a stateless module. I’ve also add an higher level api to add clauses and to generate a model and an unset core. It mimics a bit the same concept of the minisat bindings, but without the OO interface. We can set the number of variables, the trace (on by default and the picosat library must be compiled with -DTRACE) and the initial seed.

type var = int
type lit
type value = True | False | Unknown
type solution = SAT | UNSAT | UNKNOWN

val init : ?seed : int -> ?nvars : int -> ?trace : bool -> unit -> unit
external reset: unit -> unit = "caml_picosat_reset"

external model: unit -> var list = "caml_model"
external unsatcore : unit -> var list = "caml_unsatcore"

val add_clause : lit list -> unit

val solve : ?limit : int -> unit -> solution 
val solve_with_assumptions : ?limit : int  -> lit list -> solution 

val new_var : unit -> var
val value_of : var -> value

val pos_lit : var -> lit
val neg_lit : var -> lit

val string_of_value : value -> string

This is a snippet from the test file :

let solve file =
  Picosat.init ();
  let vars = process_file file in
  let revs =
    let acc = Hashtbl.create (Hashtbl.length vars) in
    Hashtbl.iter (fun name v -> Hashtbl.add acc v name) vars ;
    acc
  in
  match Picosat.solve () with
  | Picosat.UNKNOWN -> printf "Limit exausted\n"
  | Picosat.UNSAT ->
      begin
        printf "unsat\nunsat core : %s\n"
        (String.concat "," (List.map (fun i -> (Hashtbl.find revs i)) (Picosat.unsatcore ())))
      end
  | Picosat.SAT   ->
      begin
        printf "sat\nmodel : \n";
        List.iter (fun i ->
          printf "  %s=%s\n"
          (Hashtbl.find revs i)
          (Picosat.string_of_value (Picosat.value_of i))
        ) (Picosat.model ())
      end

minisat2 ocaml bindings

Release early, release often !

I’ve been working with Falvio Lerda to update the ocaml binding to minisat2.

The biggest change from Fabio’s original version is the object oriented interface (mimicking the library c++ interface) and the addition of a couple of bindings.

You can find the git repo here.

I’ve also been working on the debian package and I’ve committed a draft package on git.debian.org.

A simple example is includes in the source where this is the main function.

(* Reads a given file and solves the instance. *)
let solve file =
  let solver = new solver in
  let vars = process_file solver file in
  match solver#solve with
  | Minisat.UNSAT -> printf "unsat\n"
  | Minisat.SAT   ->
      printf "sat\n";
      Hashtbl.iter
        (fun name v ->
          printf "  %s=%s\n"
            name
            (Minisat.string_of_value (solver#value_of v))
        )
        vars

Now I plan to finish the binding to minisat2 and then to attach [http://minisat.se/MiniSat+.html minisat+]. It should not be too difficult, if anyone is interested to lend me a hand, drop me a line.


lazy me, BronKerbosh algorithm revisited.

Date Tags ocaml

A while ago I wrote about the Bron - kerbosch algorithm giving a brain dead implementation based on simple lists. Of course since the amount of maximal cliques can be exponential in the size of graph, my previous implementation is useless on big graphs (say more then 80 nodes and 150 edges). It just takes forever.

Now I give a more efficient version of the algorithm that uses a simple lazy data structure to hold solution. Each element of the data structure is a set of nodes. The first part is the definition of a lazy list and a couple of ocamlgraph data structures. Pretty standard :

module PkgV = struct
    type t = int
    let compare = Pervasives.compare
    let hash i = i
    let equal = (=)
end

module UG = Graph.Imperative.Graph.Concrete(PkgV)
module N = Graph.Oper.Neighbourhood(UG)
module O = Graph.Oper.Make(Graph.Builder.I(UG))
module S = N.Vertex_Set
module RG = Graph.Rand.I(UG)

type 'a llist = 'a cell Lazy.t
and 'a cell = LList of 'a * 'a llist | Empty

exception LListEmpty

let empty = lazy(Empty)
let push e l = lazy(LList(e,l))

let hd s =
    match Lazy.force s with
    | LList (hd, _) -> hd
    | Empty -> raise LListEmpty

let rec append s1 s2 =
    lazy begin
        match Lazy.force s1 with
        | LList (hd, tl) -> LList (hd, append tl s2)
        | Empty -> Lazy.force s2
    end

let rec iter f s =
    begin
        match Lazy.force s with
        | LList (hd, tl) -> f hd ; iter f tl
        | Empty -> ()
    end

let rec map f s =
    lazy begin
        match Lazy.force s with
        | LList (hd, tl) -> LList (f hd, map f tl)
        | Empty -> Empty
    end

let rec flatten ss =
    lazy begin
        match Lazy.force ss with
        | Empty -> Empty
        | LList (hd, tl) ->
            match Lazy.force hd with
            | LList (hd2, tl2) -> LList (hd2, flatten (lazy (LList (tl2, tl))))
            | Empty -> Lazy.force (flatten tl)
    end

This second part is the algorithm itself. The fold function creates a lazy list of sets of vertex to be check by the algorithm. The body of the function each time choose a new pivot and invoke the bronKerbosch2 function on each of them in turn. When we finally manage to build a clique we return it (push r empty) embedded in a list. The flatten (map f m) takes care of unwinding the results in a flat lazy list.

let rec bronKerbosch2 gr r p x =
  let n v = N.set_from_vertex gr v in
  let rec fold acc (p,x,s) =
    if S.is_empty s then acc
    else
      let v = S.choose s in
      let rest = S.remove v s in
      let r' = S.union r (S.singleton v) in
      let p' = S.inter p (n v) in
      let x' = S.inter x (n v) in
      let acc' = (push (r',p',x') acc) in
      fold acc' (S.remove v p, S.add v x, rest)
  in
  if (S.is_empty p) && (S.is_empty x) then (push r empty)
  else
    let s = S.union p x in
    let u = S.choose s in
    let l = fold empty (p,x,S.diff p (n u)) in
    flatten (map (fun (r',p',x') -> bronKerbosch2 gr r' p' x') l)
;;

let max_independent_sets gr =
  let cgr = O.complement gr in
  let r = S.empty in
  let p = UG.fold_vertex S.add cgr S.empty in
  let x = S.empty in
  bronKerbosch2 cgr r p x
;;

To test it we just create a random graph and we iterates thought the results. Noticed that if the algorithm was not lazy, for big graphs, the iteration will get stuck waiting for the computation to be over.

let main () =
  let v = int_of_string Sys.argv.(1) in
  let e = int_of_string Sys.argv.(2) in
  let gr = RG.graph ~loops:true ~v:v ~e:e () in
  let mis = max_independent_sets gr in
  let i = ref 0 in
  iter (fun s ->
    Printf.printf "%d -> %s\n" !i (String.concat " , " (List.map string_of_int (S.elements s)));
    incr i
  ) mis
;;
main ();;

UPDATE

Following the lead from the comment below, I modified my lazy algorithm to use a smarter pivot selection strategy. On a random graph of 50 vertex and 80 edges I get almost 50% speed up ! This is great !

$./mis.native 50 80
--------------------------
Timer Mis classic. Total time: 15.035211. 
Timer Mis pivot selection. Total time: 8.708919. 
--------------------------

This is the code :

let choose gr cands s =
  let n v = N.set_from_vertex gr v in
  fst (
    S.fold (fun u (pivot, pivot_score) ->
      let score = S.cardinal (S.inter (n u) cands) in
      if score > pivot_score then (u, score)
      else (pivot, pivot_score)
    ) s (-1, -1)
  )

let rec bronKerbosch_pivot gr clique cands nots =
  let n v = N.set_from_vertex gr v in
  let rec fold acc (cands,nots,s) =
    if S.is_empty s then acc
    else
      let pivot = choose gr cands s in
      let rest = S.remove pivot s in
      let clique' = S.add pivot clique in
      let cands' = S.inter cands (n pivot) in
      let nots' = S.inter nots (n pivot) in
      let acc' = (push (clique',cands',nots') acc) in
      fold acc' (S.remove pivot cands, S.add pivot nots, rest)
  in
  if (S.is_empty cands) && (S.is_empty nots) then (push clique empty)
  else
    let s = S.union cands nots in
    let pivot = choose gr cands s in
    let rest = S.diff cands (n pivot) in
    let l = fold empty (S.remove pivot cands,S.add pivot nots,rest) in
    flatten (map (fun (clique',cands',nots') -> bronKerbosch_pivot gr clique' cands' nots') l)

let max_independent_sets_pivot gr =
  let cgr = O.complement gr in
  let clique = S.empty in
  let cands = UG.fold_vertex S.add cgr S.empty in
  let nots = S.empty in
  bronKerbosch_pivot cgr clique cands nots