Skip to content

Commit 054aa4e

Browse files
willr3stalep
andcommitted
add load-legacy-tests, load-legacy-runs, and verifyimport commands
* add sqlall to jq migration for legacy extractors * add PostgreSQL jsonpath filter expression conversion to JQ * only queue top level nodes for upload * change Work to a set of activeNodes Co-authored-by: Ståle Pedersen <stalep@gmail.com>
1 parent 6893c38 commit 054aa4e

21 files changed

Lines changed: 2729 additions & 85 deletions

README.loadlegacy.md

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# Load Legacy
2+
3+
This adds two commands `load-legacy-tests` and `load-legacy-runs` that are used to convert the existing Horreum
4+
model into the h5m model. Run this with a local backup of a Horreum instance.
5+
6+
*DO NOT* run these commands on a production database.
7+
8+
## Setup
9+
10+
Make sure `db-kind=postgresql` in `src/main/resources/application.properties` because the legacy model requires postgresql jsonpath.
11+
12+
```
13+
quarkus.datasource.db-kind=postgresql
14+
```
15+
16+
Then build h5m
17+
```shell
18+
mvn clean package -Pnative
19+
```
20+
21+
## Command Overview
22+
23+
### load-legacy-tests
24+
25+
This command works by identifying the schemas used across all non-deleted runs for the test and creating a set of nodes that represent all the Transformers, Extractors, Labels, and Variables.
26+
The new nodes de-duplicate reused Extractors and eliminate Variables that do not make changes to existing Labels.
27+
There are no shared Nodes between Folders.
28+
29+
This command works by _creating_ new tables in the legacy schema. The tables represent all the `$schema` paths in the runs and datasets tables.
30+
The new tables are created the first time the command is run against a legacy schema and can take several minutes.
31+
The tables are re-used for subsequent calls to load.
32+
33+
### load-legacy-runs
34+
35+
This will load all non-deleted runs (optionally for a specific testId) into a Folder with the same name.
36+
37+
> Note: The `Folder` needs to already exist so use load-legacy-tests before load-legacy-runs.
38+
39+
## Setup
40+
41+
Start with a postgresql instance running a copy of the Horreum database listening on port `6000`.
42+
```bash
43+
podman run --name hdb \
44+
-v <backup_path>:/var/lib/postgresql/data:rw,Z \
45+
-e POSTGRES_DB=horreum \
46+
-e PGDATABASE=horreum \
47+
-e POSTGRES_USER=<username> \
48+
-e PGUSER=<username> \
49+
-e PGPASSWORD=<password> \
50+
-e POSTGRES_PASSWORD=<password> \
51+
-p 6000:<containerPort> \
52+
mirror.gcr.io/library/postgres:16
53+
```
54+
Start a postgres instance to run the h5m database
55+
```bash
56+
podman run --name h5m \
57+
-e POSTGRES_DB=quarkus \
58+
-e POSTGRES_USER=quarkus \
59+
-e POSTGRES_PASSWORD=quarkus \
60+
-p 5432:5432 \
61+
mirror.gcr.io/library/postgres:17
62+
```
63+
Specify the database url in a `.env` file
64+
```shell
65+
quarkus.datasource.jdbc.url=jdbc:postgresql://0.0.0.0:5432/quarkus
66+
```
67+
68+
Load all the legacy tests with
69+
```bash
70+
h5m load-legacy-tests username=<username> password=<password> url=jdbc:postgresql://0.0.0.0:6000/horreum
71+
```
72+
73+
It can take several minutes for the first invocation against the Horreum database as h5m will be scanning all runs and datasets to create reference tables.
74+
75+
Loading all runs at once will overwhelm h5m using the default configuration. It is best to load a single test at a time
76+
```bash
77+
h5m load-legacy-runs testId=391 username=<username> password=<password> url=jdbc:postgresql://0.0.0.0:6000/horreum
78+
```
79+
Loading in this manner will allow the workQueue to empty (before h5m exits) rather than the loader thread flooding the unbounded queue.
80+
81+
## Unit Testing
82+
83+
There are two unit tests in `H5mTest` that are disabled because they rely on a running Horreum database. They are for debugging purposes and are not
84+
intended to be enabled.

src/main/java/io/hyperfoil/tools/h5m/api/NodeType.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
@Schema(description = "The type of transformation a node performs")
66
public enum NodeType {
7+
EDIVISIVE("ed"),
78
FINGERPRINT("fp"),
89
FIXED_THRESHOLD("ft"),
910
JQ("jq"),

src/main/java/io/hyperfoil/tools/h5m/cli/H5m.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
@QuarkusMain
3131
@TopCommand
32-
@CommandLine.Command(name="", mixinStandardHelpOptions = true,separator = " ", subcommands={CommandLine.HelpCommand.class, AutoComplete.GenerateCompletion.class, ListCmd.class, AddCmd.class, RemoveCmd.class, AdminCmd.class, ExportFolder.class, ImportFolder.class})
32+
@CommandLine.Command(name="", mixinStandardHelpOptions = true,separator = " ", subcommands={CommandLine.HelpCommand.class, AutoComplete.GenerateCompletion.class, ListCmd.class, AddCmd.class, RemoveCmd.class, AdminCmd.class, ExportFolder.class, ImportFolder.class,LoadLegacyTests.class, LoadLegacyRuns.class, VerifyLegacy.class})
3333
public class H5m implements QuarkusApplication {
3434

3535
//@Inject
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
package io.hyperfoil.tools.h5m.cli;
2+
3+
import com.fasterxml.jackson.databind.JsonNode;
4+
import com.fasterxml.jackson.databind.ObjectMapper;
5+
import io.agroal.api.AgroalDataSource;
6+
import io.agroal.api.configuration.supplier.AgroalPropertiesReader;
7+
import io.hyperfoil.tools.h5m.api.Folder;
8+
import io.hyperfoil.tools.h5m.entity.FolderEntity;
9+
import io.hyperfoil.tools.h5m.svc.FolderService;
10+
import jakarta.inject.Inject;
11+
import picocli.CommandLine;
12+
13+
import java.sql.Connection;
14+
import java.sql.PreparedStatement;
15+
import java.sql.ResultSet;
16+
import java.sql.Statement;
17+
import java.util.HashMap;
18+
import java.util.Map;
19+
import java.util.concurrent.Callable;
20+
21+
@CommandLine.Command(name="load-legacy-runs")
22+
public class LoadLegacyRuns implements Callable<Integer> {
23+
24+
@Inject
25+
FolderService folderService;
26+
27+
@CommandLine.Option(names = {"username"}, description = "legacy db username", defaultValue = "quarkus") String username;
28+
@CommandLine.Option(names = {"password"}, description = "legacy db password", defaultValue = "quarkus") String password;
29+
@CommandLine.Option(names = {"url"}, description = "legacy connection url",defaultValue = "jdbc:postgresql://0.0.0.0:") String url;
30+
@CommandLine.Option(names = {"testId"}, description = "specify which test to load. Loads all if unspecified" ) Long testId;
31+
@CommandLine.Option(names = {"limit"}, description = "max runs to load", defaultValue = "-1") int limit;
32+
33+
@Override
34+
public Integer call() throws Exception {
35+
Map<String, String> props = new HashMap<>();
36+
props.put(AgroalPropertiesReader.MAX_SIZE, "1");
37+
props.put(AgroalPropertiesReader.MIN_SIZE, "1");
38+
props.put(AgroalPropertiesReader.INITIAL_SIZE, "1");
39+
props.put(AgroalPropertiesReader.MAX_LIFETIME_S, "57");
40+
props.put(AgroalPropertiesReader.ACQUISITION_TIMEOUT_S, "54");
41+
props.put(AgroalPropertiesReader.PRINCIPAL,username); //username
42+
props.put(AgroalPropertiesReader.CREDENTIAL,password);//password
43+
props.put(AgroalPropertiesReader.PROVIDER_CLASS_NAME , "org.postgresql.Driver");
44+
props.put(AgroalPropertiesReader.JDBC_URL, url );
45+
AgroalDataSource ds = AgroalDataSource.from(new AgroalPropertiesReader()
46+
.readProperties(props)
47+
.get());
48+
49+
Map<Long,String> tests = new HashMap<>();
50+
ObjectMapper mapper = new ObjectMapper();
51+
try(Connection connection = ds.getConnection()){
52+
if(testId!=null && testId > -1){
53+
try(PreparedStatement statement = connection.prepareStatement("select name from test where id = ?")){
54+
statement.setLong(1, testId);
55+
try (ResultSet rs = statement.executeQuery()){
56+
while(rs.next()){
57+
tests.put(testId, rs.getString("name"));
58+
}
59+
}
60+
}
61+
}else {
62+
try (Statement statement = connection.createStatement()) {
63+
try (ResultSet rs = statement.executeQuery("select id,name from test")) {
64+
while (rs.next()) {
65+
tests.put(rs.getLong(1), rs.getString(2));
66+
}
67+
}
68+
}
69+
}
70+
System.out.println("loaded "+tests.size()+" legacy tests");
71+
for(Long testId : tests.keySet()){
72+
String name = tests.get(testId);
73+
Folder folder = folderService.byName(name);
74+
if(folder == null){
75+
System.out.println("Failed to find Folder for test "+name+" id="+testId);
76+
continue;
77+
}
78+
try (PreparedStatement ps = connection.prepareStatement("select count(id) from run where testid = ? and trashed = false")) {
79+
ps.setLong(1, testId);
80+
try (ResultSet rs = ps.executeQuery()){
81+
while(rs.next()){
82+
System.out.println("loading "+rs.getLong(1)+" uploads to "+name);
83+
}
84+
}
85+
}
86+
String runQuery = limit > 0
87+
? "select id,data from run where testid = ? and trashed = false order by id desc limit ?"
88+
: "select id,data from run where testid = ? and trashed = false order by id desc";
89+
connection.setAutoCommit(false);
90+
try (PreparedStatement ps = connection.prepareStatement(runQuery)) {
91+
ps.setFetchSize(5);
92+
ps.setLong(1, testId);
93+
if (limit > 0) ps.setInt(2, limit);
94+
int count = 0;
95+
try (ResultSet rs = ps.executeQuery()) {
96+
while(rs.next()){
97+
Long id = rs.getLong(1);
98+
System.out.println(name+" "+id);
99+
JsonNode data = mapper.readTree(rs.getCharacterStream("data"));
100+
folderService.upload(folder.name(),null,data);
101+
count++;
102+
}
103+
}
104+
System.out.println("loaded " + count + " runs");
105+
} finally {
106+
connection.setAutoCommit(true);
107+
}
108+
}
109+
} finally {
110+
ds.close();
111+
}
112+
return 0;
113+
}
114+
}

0 commit comments

Comments
 (0)