Skip to content

Commit 7b9057a

Browse files
committed
feat: adaptation de l'etl pour une utilisation avec Agents en intervention et Scaleway
1 parent e7f090c commit 7b9057a

7 files changed

Lines changed: 81 additions & 33 deletions

File tree

.env.example

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,6 @@ METABASE_USERNAME=rdv_metabase
44
RDV_SOLIDARITES_DB_URL=postgresql://user:pwd@localhost:10000/rdv_solidarites
55
RDV_SERVICE_PUBLIC_DB_URL=postgresql://user:pwd@localhost:10000/rdv_service_public
66
RDV_INSERTION_DB_URL=postgresql://user:pwd@localhost:10000/rdv_insertion
7+
8+
# CONFIG_PATH=
9+
# ORIGIN_DB_URL=

Dockerfile

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
FROM ruby:3.3.3-slim
2+
3+
# Add PostgreSQL repository for version 16
4+
RUN apt-get update && apt-get install -y gnupg2 lsb-release wget \
5+
&& wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - \
6+
&& echo "deb http://apt.postgresql.org/pub/repos/apt/ $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list \
7+
# Install PostgreSQL 16 client and other dependencies
8+
&& apt-get update && apt-get install -y \
9+
build-essential \
10+
curl \
11+
git \
12+
libpq-dev \
13+
postgresql-client-16 \
14+
&& rm -rf /var/lib/apt/lists/*
15+
16+
WORKDIR /app
17+
18+
# Copy the Gemfile and Gemfile.lock
19+
COPY Gemfile Gemfile.lock ./
20+
21+
# Copy the rest of the application
22+
COPY . .
23+
24+
# Install gems
25+
RUN bundle install --jobs 4
26+
27+
# Set environment variables
28+
ENV LANG=C.UTF-8
29+
30+
# Command to run
31+
CMD ["echo", "use -it to run the container"]

Gemfile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,3 @@ gem "pg", "~> 1.5"
99
gem "dotenv", "~> 3.1"
1010

1111
gem "anonymizer", git: "https://github.com/betagouv/rdv-service-public.git", branch: "production", glob: "lib/anonymizer/anonymizer.gemspec"
12-
# gem "anonymizer", path: "../rdv-service-public/lib/anonymizer"

README.md

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,20 +37,26 @@ Le user Postgres de la base remplie par l’ETL utilisé par Metabase n’a pas
3737

3838
## Usage en staging et production
3939

40+
### Hébergeur Scalingo
41+
4042
```shell
4143
scalingo --region osc-secnum-fr1 --app rdv-service-public-etl-staging run --detached "bundle exec ruby main.rb --app rdvi"
4244
```
4345

4446
Des CRON jobs réguliers seront bientôt configurés pour lancer ça.
4547

48+
### Hébergeur Scaleway
49+
50+
TODO
51+
4652
## Usage en local
4753

4854
> [!WARNING]
4955
> Assurez vous de supprimer les fichiers de dumps, les bases de données restaurées, et les mots de passe des users Postgres utilisés en local après vos tests
5056
5157
### Variables d’environnement
5258

53-
Copiez les variables d’environnement dans un fichier `.env` :
59+
Copiez le fichier `.env.example` puis renseignez les variables d’environnement dans un fichier `.env` :
5460

5561
`cp .env.example .env`
5662

@@ -99,3 +105,21 @@ Dans un autre terminal, lancer l’ETL :
99105
```shell
100106
bundle exec ruby main.rb --app rdvs
101107
```
108+
109+
## Usage en local avec Docker
110+
111+
Commencer par renseigner les variables d'environnement nécessaires dans le fichier `.env`, par exemple :
112+
113+
```dotenv
114+
ETL_DB_URL=postgresql://esd:[password]@192.168.120.44:5432/aei-etl
115+
METABASE_USERNAME=esd
116+
ORIGIN_DB_URL=postgresql://esd:[password]@192.168.120.44:5432/api-aei
117+
CONFIG_PATH=https://gitlab.com/incubateur-territoires/startups/agents-intervention/agents-en-intervention/-/raw/feat/metabase/config.etl.yml
118+
```
119+
120+
Puis :
121+
122+
```shell
123+
docker build -t etl .
124+
docker run -it docker run --env-file .env -it etl bundle exec ruby main.rb --app [app]
125+
```

lib/etl.rb

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,13 @@
88
class Etl
99
include Utils
1010

11-
VALID_APPS = %w[rdvi rdvs rdvsp].freeze
11+
attr_reader :app, :etl_db_url, :origin_db_url, :config_path, :metabase_username
1212

13-
attr_reader :app, :etl_db_url, :rdv_db_url, :config_path, :metabase_username
14-
15-
def initialize(app:, etl_db_url:, rdv_db_url:, config_path:, metabase_username:)
13+
def initialize(app:, etl_db_url:, origin_db_url:, config_path:, metabase_username:)
1614
@app = app
17-
raise 'invalid app' if VALID_APPS.exclude?(app)
1815

1916
@etl_db_url = etl_db_url
20-
@rdv_db_url = rdv_db_url
17+
@origin_db_url = origin_db_url
2118
@config_path = config_path
2219
@metabase_username = metabase_username
2320
end
@@ -30,8 +27,8 @@ def run
3027
@config = Anonymizer::Config.new(YAML.safe_load(File.read(config_path)))
3128

3229
# make sure RDV db connection works
33-
log_around "connect to RDV database #{rdv_db_url}" do
34-
ActiveRecord::Base.establish_connection rdv_db_url
30+
log_around "connect to origin database #{origin_db_url}" do
31+
ActiveRecord::Base.establish_connection origin_db_url
3532
ActiveRecord::Base.connection # triggers connection
3633
end
3734

@@ -54,7 +51,7 @@ def run
5451
pg_dump --clean --no-privileges --format tar \
5552
#{@config.truncated_table_names.map { "--exclude-table #{_1}" }.join(' ')} \
5653
-f #{dump_filename} \
57-
#{rdv_db_url}
54+
#{origin_db_url}
5855
SH
5956
)
6057
end
@@ -84,8 +81,13 @@ def run
8481
# workaround for a problematic column that we could also exclude
8582
# ERROR: cannot insert a non-DEFAULT value into column "text_search_terms" (PG::GeneratedAlways)
8683
# DÉTAIL : Column "text_search_terms" and "text_search_terms_with_notification_email" are generated columns.
87-
run_sql_command %(ALTER TABLE users DROP COLUMN IF EXISTS text_search_terms CASCADE)
88-
run_sql_command %(ALTER TABLE users DROP COLUMN IF EXISTS text_search_terms_with_notification_email CASCADE)
84+
# Vérifier que la table users existe avant de faire l'alter table
85+
if ActiveRecord::Base.connection.table_exists?("users")
86+
run_sql_command %(ALTER TABLE users DROP COLUMN IF EXISTS text_search_terms CASCADE)
87+
run_sql_command %(ALTER TABLE users DROP COLUMN IF EXISTS text_search_terms_with_notification_email CASCADE)
88+
else
89+
logger.info "La table users n'existe pas, aucune modification n'est appliquée."
90+
end
8991

9092
# STEP : move from public to target schema
9193
target_schema = app

main.rb

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,40 +11,29 @@
1111

1212
app = nil
1313
OptionParser.new do |opts|
14-
opts.on('-a', '--app APP', Etl::VALID_APPS) { app = _1 }
14+
opts.on('-a', '--app APP') { app = _1 }
1515
end.parse!
1616

17-
config_path = if ENV["CONFIG_PATH"] && File.exist?(ENV["CONFIG_PATH"])
18-
ENV["CONFIG_PATH"]
19-
else
20-
config_url = {
21-
"rdvi" => "https://raw.githubusercontent.com/betagouv/rdv-insertion/main/config/anonymizer.yml",
22-
"rdvs" => "https://raw.githubusercontent.com/betagouv/rdv-service-public/production/config/anonymizer.yml",
23-
"rdvsp" => "https://raw.githubusercontent.com/betagouv/rdv-service-public/production/config/anonymizer.yml"
24-
}[app]
25-
run_command "curl -o config.yml \"#{config_url}\""
26-
"config.yml"
17+
unless ENV["CONFIG_PATH"] && File.exist?(ENV["CONFIG_PATH"])
18+
raise "La variable d'environnement CONFIG_PATH n'est pas définie ou pointe vers un fichier inexistant"
2719
end
2820

29-
rdv_db_url_env_var = {
30-
"rdvi" => "RDV_INSERTION_DB_URL",
31-
"rdvs" => "RDV_SOLIDARITES_DB_URL",
32-
"rdvsp" => "RDV_SERVICE_PUBLIC_DB_URL"
33-
}[app]
21+
config_path = ENV["CONFIG_PATH"]
3422

23+
origin_db_url_env_var = "ORIGIN_DB_URL"
3524
etl_db_url_env_var = "ETL_DB_URL"
3625
metabase_username_env_var = "METABASE_USERNAME"
3726

3827
[
39-
rdv_db_url_env_var,
28+
origin_db_url_env_var,
4029
etl_db_url_env_var,
4130
metabase_username_env_var
4231
].each do |env_var|
4332
raise "Missing environment variable #{env_var}" if ENV[env_var].blank?
4433
end
4534

46-
rdv_db_url = ENV[rdv_db_url_env_var]
35+
origin_db_url = ENV[origin_db_url_env_var]
4736
etl_db_url = ENV[etl_db_url_env_var]
4837
metabase_username = ENV[metabase_username_env_var]
4938

50-
Etl.new(app:, etl_db_url:, rdv_db_url:, config_path:, metabase_username:).run
39+
Etl.new(app:, etl_db_url:, origin_db_url:, config_path:, metabase_username:).run

tests/test_etl.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def test_something
1919
Etl.new(
2020
app: "rdvs",
2121
etl_db_url: "postgresql://localhost/rdv_sp_etl_test_target",
22-
rdv_db_url: "postgresql://localhost/rdv_sp_etl_test_source",
22+
origin_db_url: "postgresql://localhost/rdv_sp_etl_test_source",
2323
config_path: File.expand_path("config.yml", File.dirname(__FILE__)),
2424
metabase_username: "rdv_sp_etl_metabase_user"
2525
).run

0 commit comments

Comments
 (0)