-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfill_in_empty_column_values_from_other_column.pl
144 lines (124 loc) · 3.47 KB
/
fill_in_empty_column_values_from_other_column.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#!/usr/bin/env perl
# Fills in empty values in column of interest with values from other column.
# Usage:
# perl fill_in_empty_column_values_from_other_column.pl [table] "[title of column to fill in]"
# "[title of column with potential replacement values]"
# Prints to console. To print to file, use
# perl fill_in_empty_column_values_from_other_column.pl [table] "[title of column to fill in]"
# "[title of column with potential replacement values]" > [output table path]
use strict;
use warnings;
my $table = $ARGV[0];
my $title_of_column_to_fill_in = $ARGV[1];
my $title_of_column_with_replacement_values = $ARGV[2];
my $NEWLINE = "\n";
my $DELIMITER = "\t";
# verifies that input file exists and is not empty
if(!$table or !-e $table or -z $table)
{
print STDERR "Error: table not provided, does not exist, or empty:\n\t"
.$table."\nExiting.\n";
die;
}
# reads in and processes input table
my $first_line = 1;
my $column_to_fill_in = -1;
my $column_with_replacement_values = -1;
open TABLE, "<$table" || die "Could not open $table to read; terminating =(\n";
while(<TABLE>) # for each row in the file
{
chomp;
my $line = $_;
if($line =~ /\S/) # if row not empty
{
my @items_in_line = split($DELIMITER, $line, -1);
if($first_line) # column titles
{
# identifies parameter columns
my $column = 0;
foreach my $column_title(@items_in_line)
{
if(defined $column_title and $column_title eq $title_of_column_to_fill_in)
{
if($column_to_fill_in != -1)
{
print STDERR "Error: title of column to fill in "
.$title_of_column_to_fill_in." appears more than once in table:"
."\n\t".$table."\nExiting.\n";
die;
}
$column_to_fill_in = $column;
}
if(defined $column_title and $column_title eq $title_of_column_with_replacement_values)
{
if($column_with_replacement_values != -1)
{
print STDERR "Error: title of column with replacement values "
.$title_of_column_with_replacement_values." appears more than once in table:"
."\n\t".$table."\nExiting.\n";
die;
}
$column_with_replacement_values = $column;
}
$column++;
}
# verifies that we have found both columns
if($column_to_fill_in == -1)
{
print STDERR "Error: could not find title of column to fill in "
.$title_of_column_to_fill_in." in table:\n\t".$table."\nExiting.\n";
die;
}
if($column_with_replacement_values == -1)
{
print STDERR "Error: could not find title of column with replacement values "
.$title_of_column_with_replacement_values." in table:\n\t".$table."\nExiting.\n";
die;
}
# prints header line as is
print $line.$NEWLINE;
$first_line = 0; # next line is not column titles
}
else # column values (not column titles)
{
my $replacement_value = $items_in_line[$column_with_replacement_values];
if(!defined $replacement_value)
{
$replacement_value = "";
}
# prints all values, filling in empty values in column to fill in
my $column = 0;
foreach my $value(@items_in_line)
{
# prints delimiter
if($column > 0)
{
print $DELIMITER;
}
# prints value
if($column == $column_to_fill_in)
{
if(defined $value and length $value)
{
print $value;
}
else
{
print $replacement_value;
}
}
else
{
if(defined $value and length $value)
{
print $value;
}
}
$column++;
}
print $NEWLINE;
}
}
}
close TABLE;
# August 24, 2021