-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathutils.ml
143 lines (117 loc) · 4.14 KB
/
utils.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
open! Core
open Little_logger
let key_reference_id = "______pasv_key_reference______"
let is_key_reference record =
String.(Bio_io.Fasta.Record.id record = key_reference_id)
let reference_id_prefix = "______pasv_reference______seq_"
let make_reference_id i = reference_id_prefix ^ Int.to_string i
let reference_id_prefix_re =
Re2.create_exn "^______pasv_reference______seq_[0-9]+$"
let is_reference record =
Re2.matches reference_id_prefix_re @@ Bio_io.Fasta.Record.id record
let is_file name =
match Sys_unix.is_file name with `Yes -> true | `No | `Unknown -> false
let is_directory name =
match Sys_unix.is_directory name with `Yes -> true | `No | `Unknown -> false
(* See
https://github.com/ocaml/dune/commit/154272b779fe8943a9ce1b4afabb30150ab94ba6 *)
(* let ( ^/ ) = Filename.concat *)
(* Return list of entries in [path] as [path/entry] *)
let readdir path =
Array.fold ~init:[]
~f:(fun acc entry -> Filename.concat path entry :: acc)
(Sys_unix.readdir path)
(* May raise some unix errors? *)
let rec rm_rf name =
match Core_unix.lstat name with
| {st_kind= S_DIR; _} ->
List.iter (readdir name) ~f:rm_rf ;
Core_unix.rmdir name
| _ ->
Core_unix.unlink name
| exception Core_unix.Unix_error (ENOENT, _, _) ->
()
let clean_up keep_intermediate_files filenames =
if not keep_intermediate_files then
List.iter filenames ~f:(fun filename ->
if is_file filename then rm_rf filename )
(* The infile will either be queries or an alignment. It is where the basename
comes from. *)
let make_signatures_filename ~infile ~outdir =
let queries = Fname.of_string infile in
let dir = outdir in
Filename.concat dir [%string "%{queries.Fname.basename}.pasv_signatures.tsv"]
let make_outdir outdir force =
let open Or_error in
match (is_directory outdir, force) with
| true, true ->
(* The outdir exists, but --force is given so just keep going. *)
return ()
| true, false ->
(* The outdir exists, but --force is NOT given so just keep going so
that's an error. *)
errorf "--outdir '%s' already exists but --force was not given" outdir
| false, _ ->
(* If the dir doesn't exist, make it regardless of the force option. *)
return @@ Core_unix.mkdir_p outdir ~perm:0o755
let make_outdir_or_exit outdir force =
match make_outdir outdir force with
| Ok () ->
()
| Error err ->
Logger.fatal (fun () -> Error.to_string_hum err) ;
exit 1
let looks_like_fasta_file name =
let buf = Buffer.create 1 in
match
In_channel.with_file name ~f:(fun chan ->
In_channel.input_buffer chan buf ~len:1 )
with
| Some () ->
let s = Buffer.contents buf in
String.(s = ">")
| None ->
false
let assert_looks_like_fasta_file_or_exit name =
if not (looks_like_fasta_file name) then (
Logger.fatal (fun () ->
[%string
"file '%{name}' doesn't look like an fasta file. Check the file \
format!"] ) ;
exit 1 )
(* HMMER3/f *)
let looks_like_hmm_file name =
let buf = Buffer.create 8 in
match
In_channel.with_file name ~f:(fun chan ->
In_channel.input_buffer chan buf ~len:8 )
with
| Some () ->
let header = Buffer.contents buf in
String.(header = "HMMER3/f")
| None ->
false
let assert_looks_like_hmm_file_or_exit name =
if not (looks_like_hmm_file name) then (
Logger.fatal (fun () ->
[%string
"file '%{name}' doesn't look like an hmm file. Check the file \
format!"] ) ;
exit 1 )
let default_clustalo_other_aln_params = "--threads=1"
let default_mafft_other_aln_params = "--thread 1 --auto"
let try1 f a =
match f a with
| exception exn ->
Or_error.error "Caught exception" exn Exn.sexp_of_t
| result ->
Or_error.return result
(* Zero-based indexing from the end. *)
let get_from_end ary i =
let index = Array.length ary - (i + 1) in
if index < 0 || index >= Array.length ary then
Or_error.errorf "Bad index (%d); ary length is (%d)." index
(Array.length ary)
else Or_error.return @@ Array.get ary index
let all_true l = List.fold l ~init:true ~f:( && )
let any_true l = List.fold l ~init:false ~f:( || )