-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbulk_grep_is_detected.pl
106 lines (82 loc) · 2.27 KB
/
bulk_grep_is_detected.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/env perl
# Searches all input files for queries listed in query list file. Outputs query and each
# file it was found in, tab-separated, one line per query-file pair where query is detected.
# If query is not detected in any file, prints query followed by "not detected".
# Usage:
# perl bulk_grep_is_detected.pl [file listing queries, one per line] [file to grep]
# [another file to grep] [etc.]
# Prints to console. To print to file, use
# perl bulk_grep_is_detected.pl [file listing queries, one per line] [file to grep]
# [another file to grep] [etc.] > [output file path]
use strict;
use warnings;
my $query_list_file = $ARGV[0];
my @files_to_grep = @ARGV[1..$#ARGV];
my $NEWLINE = "\n";
my $DELIMITER = "\t";
my $PRINT_FULL_FILEPATH = 0; # if 1, prints full file path; if 0, prints filename only
# verifies that input files exist and is not empty
if(!$query_list_file or !-e $query_list_file or -z $query_list_file)
{
print STDERR "Error: query list file not provided, does not exist, or empty:\n\t"
.$query_list_file."\nExiting.\n";
die;
}
foreach my $file_to_grep(@files_to_grep)
{
if(!$file_to_grep or !-e $file_to_grep or -z $file_to_grep)
{
print STDERR "Error: file to grep not provided, does not exist, or empty:\n\t"
.$file_to_grep."\nExiting.\n";
die;
}
}
# prints header line
print "query".$DELIMITER;
print "file".$NEWLINE;
# read in query list and grep each query
open QUERY_LIST, "<$query_list_file" || die "Could not open $query_list_file to read; terminating =(\n";
while(<QUERY_LIST>) # for each line in the file
{
chomp;
if($_ =~ /\S/)
{
my $detected_in_at_least_one_file = 0;
foreach my $file_to_grep(@files_to_grep)
{
if(`grep "$_" $file_to_grep`)
{
print $_.$DELIMITER;
if($PRINT_FULL_FILEPATH)
{
print $file_to_grep;
}
else
{
print filename($file_to_grep);
}
print $NEWLINE;
$detected_in_at_least_one_file = 1;
}
}
# not detected in any file
if(!$detected_in_at_least_one_file)
{
print $_.$DELIMITER."not detected".$NEWLINE;
}
}
}
close QUERY_LIST;
# example input: /Users/lakras/my_file.txt
# example output: my_file.txt
sub filename
{
my $filepath = $_[0];
if($filepath =~ /^.*\/([^\/]+)$/)
{
return $1;
}
return "";
}
# August 17, 2021
# August 31, 2021