A long time ago I wrote about how to handle compressed files in ocaml using extlib : http://mancoosi.org/~abate/transparently-open-compressed-files-ocaml
Today I got back to it and added bz2 support. The code is trivial. The only small problem to notice is that since the bz2 interface does not support a char input function, I’ve to simulate it using Bz2.read. A bit of a hack. I want to look at the bz2 bindings to fix this small shortfall. This is the code :
open ExtLib
let gzip_open_file file =
let ch = Gzip.open_in file in
let input_char ch = try Gzip.input_char ch with End_of_file -> raise IO.No_more_input in
let read ch = try Gzip.input ch with End_of_file -> raise IO.No_more_input in
IO.create_in
~read:(fun () -> input_char ch)
~input:(read ch)
~close:(fun () -> Gzip.close_in ch)
;;
let bzip_open_file file =
let ch = Bz2.open_in (open_in file) in
let input_char ch =
(** XXX ugly ! *)
try let s = " " in ignore (Bz2.read ch s 0 1) ; s.[0]
with End_of_file -> raise IO.No_more_input
in
let read ch s pos len =
try Bz2.read ch s pos len
with End_of_file -> raise IO.No_more_input
in
IO.create_in
~read:(fun () -> input_char ch)
~input:(read ch)
~close:(fun () -> Bz2.close_in ch)
;;
let std_open_file file = IO.input_channel (open_in file)
let open_ch ch = IO.input_channel ch
let close_ch ch = IO.close_in ch
let open_file file =
if Filename.check_suffix file ".gz" || Filename.check_suffix file ".cz" then
gzip_open_file file
else
if Filename.check_suffix file ".bz2" then
bzip_open_file file
else
std_open_file file
;;
let main () =
let ch = open_file (Sys.argv.(1)) in
try while true do print_string (IO.nread ch 10240) done
with IO.No_more_input -> ()
;;
main ();;