google · jakubadamek · Mar 20, 2024 · Mar 20, 2024 · Apr 4, 2024 · Apr 4, 2024
diff --git a/performance-tests/scaling/.gitignore b/performance-tests/scaling/.gitignore
@@ -0,0 +1 @@
+/config.sh
diff --git a/performance-tests/scaling/config.sh.tmpl b/performance-tests/scaling/config.sh.tmpl
@@ -0,0 +1,31 @@
+# 79,768,7885,79370,791562
+export MULTIPLE_PATIENTS="79 768 7885"
+export MULTIPLE_JDBC_MODE="true false"
+# DirectRunner, FlinkRunner or DataflowRunner
+export MULTIPLE_FHIR_ETL_RUNNER="FlinkRunner DataflowRunner"
+export MUTLIPLE_FHIR_SERVER_URL="http://localhost:8080/fhir"
+# The number of resources to fetch in each API call.
+export MULTIPLE_BATCH_SIZE="100 300 1000 3000 10000"
+# FlinkRunner parallelization
+export MULTIPLE_FLINK_PARALLEL=150
+# DataflowRunner workers
+export MULTIPLE_DATAFLOW_WORKERS=65
+
+#export PATIENTS=7885
+#export JDBC_MODE=false
+#export FHIR_ETL_RUNNER=DataflowRunner
+
+# alloy or postgres
+export DB_TYPE=postgres
+export RUNNING_ON_HAPI_VM=false
+export FHIR_UPLOADER_CORES=8
+export ENABLE_UPLOAD=false
+export ENABLE_DOWNLOAD=true
+export ENABLE_SETUP_GOOGLE3=true
+
+# pipeline-scaling-1, pipeline-scaling-2, pipeline-scaling-belgium
+export POSTGRES_DB_INSTANCE="pipeline-scaling-2"
+export PROJECT_ID="fhir-analytics-test"
+
+export FHIR_SERVER_URL="http://localhost:8080/fhir"
+export VM_INSTANCE="Name of your FHIR VM instance"
diff --git a/performance-tests/scaling/first_time_setup_google3.sh b/performance-tests/scaling/first_time_setup_google3.sh
@@ -0,0 +1,18 @@
+set -e # Fail on errors.
+set -x # Show each command.
+
+source "./variables.sh"
+
+GITS_DIR=~/gits
+cd $GITS_DIR
+[[ -d "fhir-data-pipes" ]] || git clone https://github.com/google/fhir-data-pipes.git
+cd fhir-data-pipes
+
+chmod -R 755 ./utils
+sudo apt-get -y install maven
+sudo apt install npm
+mvn clean install -P dataflow-runner,cloudsql-postgres
+
+sudo apt-get install postgresql-client
+wget https://storage.googleapis.com/alloydb-auth-proxy/v1.7.1/alloydb-auth-proxy.linux.amd64 -O ~/Downloads/alloydb-auth-proxy
+chmod +x ~/Downloads/alloydb-auth-proxy
diff --git a/performance-tests/scaling/hapi_port_forward.sh b/performance-tests/scaling/hapi_port_forward.sh
@@ -0,0 +1,7 @@
+source ./variables.sh
+set -o nounset
+killall /usr/bin/ssh || true
+
+gcloud compute ssh $VM_INSTANCE --zone $VM_ZONE --project $PROJECT_ID -- -o ProxyCommand='corp-ssh-helper %h %p' \
+  -NL 8080:localhost:8080 \
+  -NL 5432:localhost:5432 &
diff --git a/performance-tests/scaling/k6/fhir_requests.js b/performance-tests/scaling/k6/fhir_requests.js
@@ -0,0 +1,25 @@
+// docker run --net=host --rm -i grafana/k6 run - <fhir_requests.js
+
+import http from 'k6/http';
+import { check, sleep } from 'k6';
+
+export const options = {
+  vus: 1000,
+  duration: '60s',
+};
+
+export default function () {
+  const prefix = "http://localhost:8080/fhir"
+  const res = http.get(`${prefix}/Patient?_count=100`);
+  check(res, { 'status was 200': (r) => r.status == 200 });
+  const patientIds = res.json().entry.map(entry => entry.resource.id);
+  // Step 3: Select Random Patient
+  const randomIndex = Math.floor(Math.random() * patientIds.length);
+  const selectedId = patientIds[randomIndex];
+  http.get(prefix + "/Patient/" + selectedId)
+    http.get(prefix + "/Encounter?patient=" + selectedId)
+    http.get(prefix + "/Observation?patient=" + selectedId)
+    http.get(prefix + "/MedicationRequest?patient=" + selectedId + "&status=active")
+
+  sleep(1);
+}
diff --git a/performance-tests/scaling/run_multiple_benchmarks.sh b/performance-tests/scaling/run_multiple_benchmarks.sh
@@ -0,0 +1,61 @@
+source config.sh
+
+if [[ -n "$PATIENTS" ]]; then
+  echo "ERROR: Comment out PATIENTS in config.sh if running multiple"
+  exit 1
+fi
+if [[ -n "$JDBC_MODE" ]]; then
+  echo "ERROR: Comment out JDBC_MODE in config.sh if running multiple"
+  exit 1
+fi
+if [[ -n "$FHIR_ETL_RUNNER" ]]; then
+  echo "ERROR: Comment out FHIR_ETL_RUNNER in config.sh if running multiple"
+  exit 1
+fi
+if [[ -n "$FHIR_SERVER_URL" ]]; then
+  echo "ERROR: Comment out FHIR_SERVER_URL in config.sh if running multiple"
+  exit 1
+fi
+if [[ -n "$BATCH_SIZE" ]]; then
+  echo "ERROR: Comment out BATCH_SIZE in config.sh if running multiple"
+  exit 1
+fi
+if [[ -n "$FLINK_PARALLEL" ]]; then
+  echo "ERROR: Comment out FLINK_PARALLEL in config.sh if running multiple"
+  exit 1
+fi
+if [[ -n "$DATAFLOW_WORKERS" ]]; then
+  echo "ERROR: Comment out DATAFLOW_WORKERS in config.sh if running multiple"
+  exit 1
+fi
+
+set -e # Fail on errors.
+set -x # Show each command.
+set -o nounset
+
+for dataflow in $MULTIPLE_DATAFLOW_WORKERS; do
+  for flink in $MULTIPLE_FLINK_PARALLEL; do
+    for batch in $MULTIPLE_BATCH_SIZE; do
+      for p in $MULTIPLE_PATIENTS; do
+        for j in $MULTIPLE_JDBC_MODE; do
+          for server in $MULTIPLE_FHIR_SERVER_URL; do
+            for f in $MULTIPLE_FHIR_ETL_RUNNER; do
+              export PATIENTS=$p
+              export JDBC_MODE=$j
+              export FHIR_ETL_RUNNER=$f
+              export FHIR_SERVER_URL=$server
+              export BATCH_SIZE=$batch
+              export FLINK_PARALLEL=$flink
+              export DATAFLOW_WORKERS=$dataflow
+              if [ "$ENABLE_SETUP_GOOGLE3" = true ]; then
+                ./setup_google3.sh
+                sleep 15
+              fi
+              ./upload_download.sh
+            done
+          done
+        done
+      done
+    done
+  done
+done
diff --git a/performance-tests/scaling/setup_google3.sh b/performance-tests/scaling/setup_google3.sh
@@ -0,0 +1,58 @@
+source ./variables.sh
+
+set -e # Fail on errors.
+set -x # Show each command.
+set -o nounset
+
+# Kill the current HAPI server to allow to delete and set the database.
+"${RUN_ON_HAPI_STANZA[@]}" "sudo killall /usr/bin/java || true"
+"${RUN_ON_HAPI_STANZA[@]}" "sudo killall python || true"
+"${RUN_ON_HAPI_STANZA[@]}" "sudo killall python3 || true"
+"${RUN_ON_HAPI_STANZA[@]}" "sudo killall sh || true"
+
+case "$DB_TYPE" in
+  "alloy")
+    ALLOY_INSTANCE="projects/fhir-analytics-test/locations/us-central1/clusters/pipeline-scaling-alloydb-1/instances/pipeline-scaling-alloydb-largest"
+    sudo killall alloydb-auth-proxy || true
+    nohup ~/Downloads/alloydb-auth-proxy $ALLOY_INSTANCE &
+    sleep 1
+    if [[ "$ENABLE_UPLOAD" = true ]]; then
+      for cmd in "DROP DATABASE IF EXISTS" "CREATE DATABASE"; do
+        PGPASSWORD="$DB_PASSWORD" psql -h 127.0.0.1 -p 5432 -U "$DB_USERNAME" -c "$cmd $DB_PATIENTS"
+      done
+    else
+      # Check DB connection.
+      PGPASSWORD="$DB_PASSWORD" psql -h 127.0.0.1 -p 5432 -U "$DB_USERNAME" -c "SELECT 1"
+    fi
+    DB_CONNECTION="jdbc:postgresql:///${DB_PATIENTS}?127.0.0.1:5432"
+    ;;
+  "postgres")
+    if [[ "$ENABLE_UPLOAD" = true ]]; then
+      gcloud sql databases delete "$DB_PATIENTS" --instance="$POSTGRES_DB_INSTANCE" --quiet || true
+      gcloud sql databases create "$DB_PATIENTS" --instance="$POSTGRES_DB_INSTANCE"
+    fi
+    DB_CONNECTION="jdbc:postgresql:///${DB_PATIENTS}?cloudSqlInstance=${PROJECT_ID}:${SQL_ZONE}:${POSTGRES_DB_INSTANCE}&socketFactory=com.google.cloud.sql.postgres.SocketFactory"
+    ;;
+  *)
+    echo "Invalid DB_TYPE $DB_TYPE"
+    ;;
+esac
+
+# shellcheck disable=SC2088
+APPLICATION_YAML="~/gits/hapi-fhir-jpaserver-starter/src/main/resources/application.yaml"
+
+# Update the DB connection config.
+"${RUN_ON_HAPI_STANZA[@]}" "sed -i '/.*url: jdbc:postgresql:.*/c\\    url: ${DB_CONNECTION}' $APPLICATION_YAML"
+"${RUN_ON_HAPI_STANZA[@]}" "sed -i '/    username: .*/c\\    username: ${DB_USERNAME}' $APPLICATION_YAML"
+# Turn off search index because we don't use it and it might conflict between load-balanced HAPI servers.
+# Reference: https://hapifhir.io/hapi-fhir/docs/server_jpa/elastic.html
+"${RUN_ON_HAPI_STANZA[@]}" "sed -i '/    hibernate.search.enabled: true/c\\    hibernate.search.enabled: false' $APPLICATION_YAML"
+# Start the HAPI server.
+# shellcheck disable=SC2088
+nohup "${RUN_ON_HAPI_STANZA[@]}" "~/gits/fhir-data-pipes/performance-tests/scaling/start_hapi_server.sh" >> "$TMP_DIR/nohup-hapi-$(date +%Y-%m-%d).out" 2>&1 &
+
+if [ "$RUNNING_ON_HAPI_VM" = false ]; then
+  (sleep 15; "$DIR_WITH_THIS_SCRIPT/hapi_port_forward.sh") &
+fi
+
+# tail -F ~/nohup-hapi.out
diff --git a/performance-tests/scaling/ssh_to_hapi.sh b/performance-tests/scaling/ssh_to_hapi.sh
@@ -0,0 +1,6 @@
+source ./variables.sh
+
+set -e # Fail on errors.
+set -x # Show each command.
+
+gcloud compute ssh $VM_INSTANCE --zone $VM_ZONE --project $PROJECT_ID -- -o ProxyCommand='corp-ssh-helper %h %p'
diff --git a/performance-tests/scaling/start_hapi_server.sh b/performance-tests/scaling/start_hapi_server.sh
@@ -0,0 +1,6 @@
+set -e # Fail on errors.
+set -x # Show each command.
+
+cd ~/gits/hapi-fhir-jpaserver-starter
+export PATH=$PATH:~/Downloads/apache-maven-3.9.6/bin
+mvn spring-boot:run -Pboot,cloudsql-postgres
diff --git a/performance-tests/scaling/terraform/.terraform/modules/modules.json b/performance-tests/scaling/terraform/.terraform/modules/modules.json
@@ -0,0 +1 @@
+{"Modules":[{"Key":"","Source":"","Dir":"."}]}
diff --git a/performance-tests/scaling/terraform/.terraform/plugin_path b/performance-tests/scaling/terraform/.terraform/plugin_path
@@ -0,0 +1,3 @@
+[
+  "/google/data/ro/teams/terraform/terraform_mpm/terraform_mpm.mpm/versions/1-d91dad6b_7ec29629_1bd5e9f0_59275854_e6928195"
+]
diff --git a/...ng/terraform/.terraform/plugins/registry.terraform.io/hashicorp/google/3.46.0/linux_amd64 b/...ng/terraform/.terraform/plugins/registry.terraform.io/hashicorp/google/3.46.0/linux_amd64
@@ -0,0 +1 @@
+/google/data/ro/teams/terraform/terraform_mpm/terraform_mpm.mpm/versions/1-d91dad6b_7ec29629_1bd5e9f0_59275854_e6928195/registry.terraform.io/hashicorp/google/3.46.0/linux_amd64
diff --git a/performance-tests/scaling/terraform/.terraform/plugins/selections.json b/performance-tests/scaling/terraform/.terraform/plugins/selections.json
@@ -0,0 +1,6 @@
+{
+  "registry.terraform.io/hashicorp/google": {
+    "hash": "h1:VDpoLyNgMRdzdXjflbsmdjnYkDn8Q0Z5pWupwAgD+ZI=",
+    "version": "3.46.0"
+  }
+}
diff --git a/performance-tests/scaling/terraform/main.tf b/performance-tests/scaling/terraform/main.tf
diff --git a/performance-tests/scaling/terraform/posgres.tf b/performance-tests/scaling/terraform/posgres.tf
@@ -0,0 +1,98 @@
+module "pg" {
+  source  = "terraform-google-modules/sql-db/google//modules/postgresql"
+  version = "~> 20.0"
+
+  name                 = var.pg_ha_name
+  random_instance_name = true
+  project_id           = var.project_id
+  database_version     = "POSTGRES_9_6"
+  region               = "us-central1"
+
+  // Master configurations
+  tier                            = "db-custom-1-3840"
+  zone                            = "us-central1-c"
+  availability_type               = "REGIONAL"
+  maintenance_window_day          = 7
+  maintenance_window_hour         = 12
+  maintenance_window_update_track = "stable"
+
+  deletion_protection = false
+
+  database_flags = [{ name = "autovacuum", value = "off" }]
+
+  user_labels = {
+    foo = "bar"
+  }
+
+  ip_configuration = {
+    ipv4_enabled       = true
+    require_ssl        = true
+    private_network    = null
+    allocated_ip_range = null
+    authorized_networks = [
+      {
+        name  = "${var.project_id}-cidr"
+        value = var.pg_ha_external_ip_range
+      },
+    ]
+  }
+
+  backup_configuration = {
+    enabled                        = true
+    start_time                     = "20:55"
+    location                       = null
+    point_in_time_recovery_enabled = false
+    transaction_log_retention_days = null
+    retained_backups               = 365
+    retention_unit                 = "COUNT"
+  }
+
+  // Read replica configurations
+  read_replica_name_suffix = "-test-ha"
+  read_replicas = [
+    {
+      name                  = "0"
+      zone                  = "us-central1-a"
+      availability_type     = "REGIONAL"
+      tier                  = "db-custom-1-3840"
+      ip_configuration      = local.read_replica_ip_configuration
+      database_flags        = [{ name = "autovacuum", value = "off" }]
+      disk_autoresize       = null
+      disk_autoresize_limit = null
+      disk_size             = null
+      disk_type             = "PD_HDD"
+      user_labels           = { bar = "baz" }
+      encryption_key_name   = null
+    },
+  ]
+
+  db_name      = var.pg_ha_name
+  db_charset   = "UTF8"
+  db_collation = "en_US.UTF8"
+
+  additional_databases = [
+    {
+      name      = "${var.pg_ha_name}-additional"
+      charset   = "UTF8"
+      collation = "en_US.UTF8"
+    },
+  ]
+
+  user_name     = "tftest"
+  user_password = "foobar"
+
+  additional_users = [
+    {
+      name            = "tftest2"
+      password        = "abcdefg"
+      host            = "localhost"
+      random_password = false
+    },
+    {
+      name            = "tftest3"
+      password        = "abcdefg"
+      host            = "localhost"
+      random_password = false
+    },
+  ]
+}
diff --git a/performance-tests/scaling/terraform/variables.tf b/performance-tests/scaling/terraform/variables.tf
@@ -0,0 +1,4 @@
+variable "project_id" {
+  description = "ID of the GCP project"
+  type        = string
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		/google/data/ro/teams/terraform/terraform_mpm/terraform_mpm.mpm/versions/1-d91dad6b_7ec29629_1bd5e9f0_59275854_e6928195/registry.terraform.io/hashicorp/google/3.46.0/linux_amd64