-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcompile_values_and_titles_in_selected_columns.pl
159 lines (131 loc) · 4.19 KB
/
compile_values_and_titles_in_selected_columns.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#!/usr/bin/env perl
# Generates a new column with the values in selected columns and their column titles,
# where values are present.
# Usage:
# perl compile_values_and_titles_in_selected_columns.pl [tab-separated table]
# "[column title]" "[another column title]" [etc.]
# Prints to console. To print to file, use
# perl compile_values_and_titles_in_selected_columns.pl [tab-separated table]
# "[column title]" "[another column title]" [etc.] > [output table path]
use strict;
use warnings;
my $table = $ARGV[0];
my @titles_of_columns_to_concatenate = @ARGV[1..$#ARGV];
my $NEWLINE = "\n";
my $DELIMITER = "\t";
my $NO_DATA = "";
# verifies that input file exists and is not empty
if(!$table or !-e $table or -z $table)
{
print STDERR "Error: table not provided, does not exist, or empty:\n\t"
.$table."\nExiting.\n";
die;
}
# verifies that titles of column to concatenate are provided and make sense
if(!scalar @titles_of_columns_to_concatenate)
{
print STDERR "Error: title of columns to concatenate not provided. Exiting.\n";
die;
}
# reads in and processes input table
my %column_to_concatenate = (); # key: column (0-indexed) -> value: 1 if column will be concatenated
my %column_title_to_column = ();
foreach my $column_title(@titles_of_columns_to_concatenate)
{
$column_title_to_column{$column_title} = -1;
}
my $first_line = 1;
my @column_to_column_title = (); # key: column (0-indexed) -> value: column title
open TABLE, "<$table" || die "Could not open $table to read; terminating =(\n";
while(<TABLE>) # for each row in the file
{
chomp;
my $line = $_;
if($line =~ /\S/) # if row not empty
{
my @items_in_line = split($DELIMITER, $line, -1);
if($first_line) # column titles
{
@column_to_column_title = @items_in_line;
# identifies columns to merge
my $column = 0;
foreach my $column_title(@items_in_line)
{
if(defined $column_title_to_column{$column_title})
{
if($column_title_to_column{$column_title} == -1)
{
$column_title_to_column{$column_title} = $column;
$column_to_concatenate{$column} = 1;
}
else
{
print STDERR "Warning: column ".$column_title." encountered more "
."than once in table ".$table."\n";
}
}
$column++;
}
# verifies that all columns have been found
foreach my $column_title(keys %column_title_to_column)
{
if($column_title_to_column{$column_title} == -1)
{
print STDERR "Error: expected column title ".$column_title
." not found in table ".$table
."\nExiting.\n";
die;
}
$column++;
}
# generates concatenated column title
my $concatenated_column_title = "compiled_".join("_", @titles_of_columns_to_concatenate);
# prints existing column titles
print $line.$DELIMITER;
# prints concatenated column title
print $concatenated_column_title.$NEWLINE;
$first_line = 0; # next line is not column titles
}
else # column values (not titles)
{
# retrieves column values and titles to concatenate
my %column_value_to_titles = (); # key: column value -> value: comma-separated list of selected column titles containing this value
my @values_in_order = ();
foreach my $column_title(@titles_of_columns_to_concatenate)
{
my $column = $column_title_to_column{$column_title};
my $value = $items_in_line[$column];
if(defined $value and length $value)
{
# saves column value in order it appeared, if we haven't seen it before
if(!$column_value_to_titles{$value})
{
push(@values_in_order, $value)
}
# saves column title
if($column_value_to_titles{$value})
{
$column_value_to_titles{$value} .= ", ";
}
$column_value_to_titles{$value} .= $column_title;
}
}
# puts together concatenated values and column titles to print
my $concatenated_values = "";
foreach my $value(@values_in_order)
{
if($concatenated_values)
{
$concatenated_values .= "; ";
}
$concatenated_values .= $value." (".$column_value_to_titles{$value}.")";
}
# prints existing column titles
print $line.$DELIMITER;
# prints concatenated column title
print $concatenated_values.$NEWLINE;
}
}
}
close TABLE;
# August 26, 2021