-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathadd_column_comparing_two_columns.pl
174 lines (148 loc) · 4.47 KB
/
add_column_comparing_two_columns.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
#!/usr/bin/env perl
# Adds a column indicating whether or not there is a difference between two columns.
# Usage:
# perl add_column_comparing_two_columns.pl [tab-separated table]
# "[title of first column to compare]" "[title of second column to compare]"
# [1 to print the actual values when different]
# "[optional new column title]" "[optional new column value if values are identical]"
# "[optional new column value if values are different]"
# "[optional new column value if one value missing]"
# "[optional new column value if both values missing]"
# Prints to console. To print to file, use
# perl add_column_comparing_two_columns.pl [tab-separated table]
# "[title of first column to compare]" "[title of second column to compare]"
# [1 to print the actual values when different]
# "[optional new column title]" "[optional new column value if values are identical]"
# "[optional new column value if values are different]"
# "[optional new column value if one value missing]"
# "[optional new column value if both values missing]" > [output table path]
use strict;
use warnings;
my $table = $ARGV[0];
my $column_title_1 = $ARGV[1];
my $column_title_2 = $ARGV[2];
my $print_values_if_different = $ARGV[3];
my $output_column_title = $ARGV[4]; # optional
my $output_value_same = $ARGV[5]; # optional
my $output_value_different = $ARGV[6]; # optional
my $output_value_one_missing = $ARGV[7]; # optional
my $output_value_both_missing = $ARGV[8]; # optional
my $NEWLINE = "\n";
my $DELIMITER = "\t";
# output defaults
my $DEFAULT_OUTPUT_VALUE_SAME = "same";
my $DEFAULT_OUTPUT_VALUE_DIFFERENT = "different";
my $DEFAULT_OUTPUT_VALUE_ONE_MISSING = "";
my $DEFAULT_OUTPUT_VALUE_BOTH_MISSING = "";
# verifies that input file exists and is not empty
if(!$table or !-e $table or -z $table)
{
print STDERR "Error: table not provided, does not exist, or empty:\n\t"
.$table."\nExiting.\n";
die;
}
# sets empty values to defaults
if(!$output_column_title)
{
$output_column_title = "compare ".$column_title_1." ".$column_title_2;
}
if(!$output_value_same)
{
$output_value_same = $DEFAULT_OUTPUT_VALUE_SAME;
}
if(!$output_value_different)
{
$output_value_different = $DEFAULT_OUTPUT_VALUE_DIFFERENT;
}
if(!$output_value_one_missing)
{
$output_value_one_missing = $DEFAULT_OUTPUT_VALUE_ONE_MISSING;
}
if(!$output_value_both_missing)
{
$output_value_both_missing = $DEFAULT_OUTPUT_VALUE_BOTH_MISSING;
}
# reads in table and generates new column title
my $first_line = 1;
my $column_1 = -1;
my $column_2 = -1;
open TABLE, "<$table" || die "Could not open $table to read; terminating =(\n";
while(<TABLE>) # for each row in the file
{
chomp;
my $line = $_;
if($line =~ /\S/) # if row not empty
{
my @items_in_line = split($DELIMITER, $line, -1);
if($first_line) # column titles
{
# identifies columns of interest
my $column = 0;
foreach my $column_title(@items_in_line)
{
if($column_title eq $column_title_1)
{
$column_1 = $column;
}
elsif($column_title eq $column_title_2)
{
$column_2 = $column;
}
$column++;
}
# verifies that we have found all columns of interest
if($column_1 == -1 or $column_2 == -1)
{
print STDERR "Error: input columns ".$column_title_1." and ".$column_title_2
." not both found. Exiting.\n";
die;
}
$first_line = 0; # next line is not column titles
# prints column titles line as is with new column
print $line.$DELIMITER;
print $output_column_title.$NEWLINE;
}
else # column values (not column titles)
{
# retrieves values of columns of interest and compares them
my $column_1_value = $items_in_line[$column_1];
my $column_2_value = $items_in_line[$column_2];
my $comparison_value = "";
if(!$column_1_value and !$column_2_value)
{
$comparison_value = $output_value_both_missing;
}
elsif(!$column_1_value or !$column_2_value)
{
$comparison_value = $output_value_one_missing;
}
elsif($column_1_value eq $column_2_value)
{
$comparison_value = $output_value_same;
}
else
{
$comparison_value = $output_value_different;
if($print_values_if_different)
{
$comparison_value .= ": ".$column_1_value."; ".$column_2_value;
}
}
# prints line as is with new column
print $line.$DELIMITER;
print $comparison_value.$NEWLINE;
}
}
}
close TABLE;
# returns 1 if string is empty; returns 0 if string is not empty
sub is_empty
{
my $value = $_[0];
if(defined $value and length $value)
{
return 0;
}
return 1;
}
# October 11, 2022