-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdumpApiCommonWdkBatchesForSolr
More file actions
executable file
·86 lines (62 loc) · 3.03 KB
/
dumpApiCommonWdkBatchesForSolr
File metadata and controls
executable file
·86 lines (62 loc) · 3.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/perl
use lib "$ENV{GUS_HOME}/lib/perl";
use Getopt::Long;
use strict;
use DBI;
use SiteSearchData::Model::Utils;
my $BATCH_DIR_PREFIX = "solr-json-batch";
my ($wdkServiceUrl, $targetDir, $numberOfOrganisms, $verbose, $projectId);
&GetOptions("targetDir=s" => \$targetDir,
"wdkServiceUrl=s" => \$wdkServiceUrl,
"projectId=s" => \$projectId,
"numberOfOrganisms=i" => \$numberOfOrganisms,
"verbose!" => \$verbose);
$| = 1;
my $ORG_ABBREV_SQL =
"select distinct internal_abbrev from apidbtuning.OrganismAttributes where project_id = '$projectId' order by internal_abbrev";
&usage unless ($targetDir && $wdkServiceUrl);
die "Error: targetDir '$targetDir' is not a directory\n" unless -d $targetDir;
my $gusProps = SiteSearchData::Model::Utils::getPropsFromFile("$ENV{GUS_HOME}/config/gus.config");
my $modelProps = SiteSearchData::Model::Utils::getPropsFromFile("$ENV{GUS_HOME}/config/SiteSearchData/model.prop");
my $dbh = SiteSearchData::Model::Utils::getDbh($gusProps);
SiteSearchData::Model::Utils::runWdkReport($wdkServiceUrl, $targetDir, $BATCH_DIR_PREFIX, "pathway", $modelProps->{PROJECT_ID});
#SiteSearchData::Model::Utils::runWdkReport($wdkServiceUrl, $targetDir, $BATCH_DIR_PREFIX, "popset-isolate", $modelProps->{PROJECT_ID});
SiteSearchData::Model::Utils::runWdkReport($wdkServiceUrl, $targetDir, $BATCH_DIR_PREFIX, "compound", $modelProps->{PROJECT_ID});
SiteSearchData::Model::Utils::runWdkReport($wdkServiceUrl, $targetDir, $BATCH_DIR_PREFIX, "dataset-presenter", $modelProps->{PROJECT_ID});
my $organismCount = 0;
foreach my $organismAbbrev (getOrganisms($dbh)) {
if ($numberOfOrganisms && $organismCount == $numberOfOrganisms) {
print STDOUT "Reached $numberOfOrganisms organisms. Quitting\n";
last;
}
$organismCount += SiteSearchData::Model::Utils::runWdkReport($wdkServiceUrl, $targetDir, $BATCH_DIR_PREFIX, "organism", $organismAbbrev, 'organismAbbrev', $organismAbbrev);
}
print STDOUT "Done.\n";
exit 0;
sub getOrganisms {
my ($dbh) = @_;
my $sth = $dbh->prepare($ORG_ABBREV_SQL) || die "Couldn't prepare the SQL statement: " . $dbh->errstr;
$sth->execute || die "Failed to execute statement: " . $sth->errstr;
my @orgs;
while (my @row = $sth->fetchrow_array()) {
push(@orgs, $row[0]);
}
return @orgs;
}
sub usage {
die
"Dump ApiCommon batches to be loaded into solr from data produced by the workflow.
usage: dumpApiCommonWdkBatchesForSolr --wdkServiceUrl SERVICE_URL --targetDir TARGET_DIR [--numberOfOrganisms NUMBER] [--verbose]
Dump wdk records as Solr compatible Json
Dumps data in hard-coded set of batches:
- organisms (one batch per organism)
- pathways
- compounds
- datasets
- popset isolates
Connects to the appDb found in $ENV{GUS_HOME}/config/gus.config, and queries it to find the list of organisms to dump.
Connects to a running SiteSearchData wdk service to generate the reports.
If numberOfOrganisms provided, dump at most that many in this run (doesn't count skipped organisms).
Calls the createWdkRecordsSolrBatch command to do the work.
";
}