Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ git_source(:github) { |repo_name| "https://github.com/#{repo_name}.git" }
ruby "~> #{File.read(File.join(__dir__, '.ruby-version')).strip}"

gem 'rails', '~> 8.0.0'

gem 'activerecord-redshift-adapter'
gem 'ahoy_matey', '~> 3.0'
gem 'aws-sdk-kms', '~> 1.4'
gem 'aws-sdk-cloudwatchlogs', require: false
Expand Down
4 changes: 4 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@ GEM
activemodel (= 8.0.2.1)
activesupport (= 8.0.2.1)
timeout (>= 0.4.0)
activerecord-redshift-adapter (8.0.0)
activerecord (>= 7.0, < 9.0)
pg (~> 1.0)
activestorage (8.0.2.1)
actionpack (= 8.0.2.1)
activejob (= 8.0.2.1)
Expand Down Expand Up @@ -834,6 +837,7 @@ PLATFORMS
x86_64-linux-musl

DEPENDENCIES
activerecord-redshift-adapter
ahoy_matey (~> 3.0)
aws-sdk-cloudwatchlogs
aws-sdk-kms (~> 1.4)
Expand Down
4 changes: 2 additions & 2 deletions app/jobs/reports/base_report.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ module Reports
class BaseReport < ApplicationJob
queue_as :long_running

def self.transaction_with_timeout(rails_env = Rails.env)
def self.transaction_with_timeout(rails_env = Rails.env, shard: :read_replica)
# rspec-rails's use_transactional_tests does not seem to act as expected when switching
# connections mid-test, so we just skip for now :[
return yield if rails_env.test?

ActiveRecord::Base.connected_to(role: :reading, shard: :read_replica) do
ActiveRecord::Base.connected_to(role: :reading, shard:) do
ActiveRecord::Base.transaction do
quoted_timeout = ActiveRecord::Base.connection.quote(IdentityConfig.store.report_timeout)
ActiveRecord::Base.connection.execute("SET LOCAL statement_timeout = #{quoted_timeout}")
Expand Down
81 changes: 81 additions & 0 deletions app/jobs/reports/data_warehouse/daily_auths_report.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# frozen_string_literal: true

module Reports
module DataWarehouse
class DailyAuthsReport < BaseReport
REPORT_NAME = 'dw-daily-auths-report'

attr_reader :report_date

def perform(report_date)
return unless IdentityConfig.store.data_warehouse_v3_enabled

@report_date = report_date

_latest, path = generate_s3_paths(REPORT_NAME, 'json', now: report_date)
body = report_body.to_json

[
bucket_name, # default reporting bucket
IdentityConfig.store.s3_public_reports_enabled && public_bucket_name,
].select(&:present?)
.each do |bucket_name|
upload_file_to_s3_bucket(
path: path,
body: body,
content_type: 'application/json',
bucket: bucket_name,
)
end
end

def start
report_date.beginning_of_day
end

def finish
report_date.end_of_day
end

def report_body
params = {
start: start,
finish: finish,
}.transform_values { |v| ActiveRecord::Base.connection.quote(v) }

sql = format(<<-SQL, params)
SELECT
COUNT(*)
, sp_return_logs.ial
, sp_return_logs.issuer
, service_providers.iaa
, MAX(service_providers.friendly_name) AS friendly_name
, MAX(agencies.name) AS agency
FROM
sp_return_logs
LEFT JOIN
service_providers ON service_providers.issuer = sp_return_logs.issuer
LEFT JOIN
agencies ON service_providers.agency_id = agencies.id
WHERE
sp_return_logs.returned_at::date BETWEEN %{start} AND %{finish}
AND sp_return_logs.billable = true
GROUP BY
sp_return_logs.ial
, sp_return_logs.issuer
, service_providers.iaa
SQL

results = transaction_with_timeout(shard: :data_warehouse) do
ActiveRecord::Base.connection.execute(sql)
end

{
start: start,
finish: finish,
results: results.as_json,
}
end
end
end
end
12 changes: 11 additions & 1 deletion app/models/application_record.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,22 @@ class ApplicationRecord < ActiveRecord::Base
# Forces ActiveRecord to select individual columns instead of SELECT *
self.ignored_columns = [:__fake_column__]

connects_to shards: {
shards_config = {
default: { writing: :primary, reading: :primary },
read_replica: {
# writing to the read_replica won't work, but AR needs to have something here
writing: :read_replica,
reading: :read_replica,
},
}

if IdentityConfig.store.data_warehouse_enabled && IdentityConfig.store.data_warehouse_v3_enabled
shards_config[:data_warehouse] = {
# writing to the data_warehouse won't work, but AR needs to have something here
writing: :data_warehouse,
reading: :data_warehouse,
}
end

connects_to shards: shards_config
end
6 changes: 6 additions & 0 deletions config/application.yml.default
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ country_phone_number_overrides: '{}'
dashboard_api_token: ''
dashboard_url: https://dashboard.demo.login.gov
data_warehouse_enabled: false
data_warehouse_v3_enabled: false
database_advisory_locks_enabled: false
database_host: ''
database_name: ''
Expand Down Expand Up @@ -401,6 +402,11 @@ redis_pool_size: 10
redis_throttle_pool_size: 5
redis_throttle_url: redis://localhost:6379/1
redis_url: redis://localhost:6379/0
redshift_database_name: 'analytics'
redshift_endpoint_host: 'test'
redshift_host: 'test'
redshift_secret_arn: 'arn:aws'
redshift_credentials: '{}'
reg_confirmed_email_max_attempts: 20
reg_confirmed_email_window_in_minutes: 60
reg_unconfirmed_email_max_attempts: 20
Expand Down
15 changes: 15 additions & 0 deletions config/database.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ test:
<<: *test
database: <%= "identity_idp_worker_jobs_test#{ENV['TEST_ENV_NUMBER']}" %>
migrations_paths: db/worker_jobs_migrate
data_warehouse:
<<: *test

<%
worker_pool = if Identity::Hostdata.instance_role == 'worker'
Expand Down Expand Up @@ -112,3 +114,16 @@ production:
sslmode: <%= IdentityConfig.store.database_worker_jobs_sslmode %>
sslrootcert: '/usr/local/share/aws/rds-combined-ca-bundle.pem'
migrations_paths: db/worker_jobs_migrate
<% if IdentityConfig.store.data_warehouse_enabled && IdentityConfig.store.data_warehouse_v3_enabled %>
data_warehouse:
adapter: redshift
host: <%= IdentityConfig.store.redshift_endpoint_host %>
port: 5439
database: "<%= IdentityConfig.store.redshift_database_name %>"
username: "<%= IdentityConfig.store.redshift_credentials.fetch('username', '') %>"
password: "<%= IdentityConfig.store.redshift_credentials.fetch('password', '') %>"
encoding: utf8
database_tasks: false
replica: true
schema_search_path: idp,logs
<% end %>
6 changes: 6 additions & 0 deletions lib/identity_config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def self.store
config.add(:dashboard_api_token, type: :string)
config.add(:dashboard_url, type: :string)
config.add(:data_warehouse_enabled, type: :boolean)
config.add(:data_warehouse_v3_enabled, type: :boolean)
config.add(:database_advisory_locks_enabled, type: :boolean)
config.add(:database_host, type: :string)
config.add(:database_name, type: :string)
Expand Down Expand Up @@ -429,6 +430,11 @@ def self.store
config.add(:redis_throttle_pool_size, type: :integer)
config.add(:redis_throttle_url, type: :string)
config.add(:redis_url, type: :string)
config.add(:redshift_database_name, type: :string)
config.add(:redshift_host, type: :string)
config.add(:redshift_endpoint_host, type: :string)
config.add(:redshift_secret_arn, type: :string)
config.add(:redshift_credentials, type: :json)
config.add(:reg_confirmed_email_max_attempts, type: :integer)
config.add(:reg_confirmed_email_window_in_minutes, type: :integer)
config.add(:reg_unconfirmed_email_max_attempts, type: :integer)
Expand Down
124 changes: 124 additions & 0 deletions spec/jobs/reports/data_warehouse/daily_auths_report_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
require 'rails_helper'

RSpec.describe Reports::DataWarehouse::DailyAuthsReport do
subject(:report) { Reports::DataWarehouse::DailyAuthsReport.new }

let(:report_date) { Date.new(2021, 3, 1) }
let(:s3_public_reports_enabled) { true }
let(:s3_report_bucket_prefix) { 'reports-bucket' }
let(:s3_report_public_bucket_prefix) { 'public-reports-bucket' }

before do
allow(Identity::Hostdata).to receive(:env).and_return('int')
allow(Identity::Hostdata).to receive(:aws_account_id).and_return('1234')
allow(Identity::Hostdata).to receive(:aws_region).and_return('us-west-1')
allow(IdentityConfig.store).to receive(:s3_public_reports_enabled)
.and_return(s3_public_reports_enabled)
allow(IdentityConfig.store).to receive(:s3_report_bucket_prefix)
.and_return(s3_report_bucket_prefix)
allow(IdentityConfig.store).to receive(:s3_report_public_bucket_prefix)
.and_return(s3_report_public_bucket_prefix)
allow(IdentityConfig.store).to receive(:data_warehouse_enabled)
.and_return(true)
allow(IdentityConfig.store).to receive(:data_warehouse_v3_enabled)
.and_return(true)

Aws.config[:s3] = {
stub_responses: {
put_object: {},
},
}
end

describe '#perform' do
it 'uploads a file to S3 based on the report date' do
['reports-bucket.1234-us-west-1',
'public-reports-bucket-int.1234-us-west-1'].each do |bucket|
expect(report).to receive(:upload_file_to_s3_bucket).with(
path: 'int/dw-daily-auths-report/2021/2021-03-01.dw-daily-auths-report.json',
body: kind_of(String),
content_type: 'application/json',
bucket: bucket,
).exactly(1).time.and_call_original
end

expect(report).to receive(:report_body).and_call_original.once

report.perform(report_date)
end

context 'when s3 public reports are disabled' do
let(:s3_public_reports_enabled) { false }

it 'only uploads to one bucket' do
expect(report).to receive(:upload_file_to_s3_bucket).with(
hash_including(
path: 'int/dw-daily-auths-report/2021/2021-03-01.dw-daily-auths-report.json',
bucket: 'reports-bucket.1234-us-west-1',
),
).exactly(1).time.and_call_original

report.perform(report_date)
end
end

context 'with data' do
let(:timestamp) { report_date + 12.hours }

let(:agency) { create(:agency, name: 'The Agency') }

before do
ActiveRecord::Base.establish_connection(:data_warehouse)
create(
:service_provider,
issuer: 'a',
iaa: 'iaa123',
friendly_name: 'The App',
agency: agency,
)
create(:sp_return_log, ial: 1, issuer: 'a', returned_at: timestamp, billable: true)
create(:sp_return_log, ial: 1, issuer: 'a', returned_at: timestamp, billable: true)
create(:sp_return_log, ial: 2, issuer: 'a', returned_at: timestamp, billable: true)

# extra non-billable row that shouldn't be counter
create(:sp_return_log, ial: 2, issuer: 'a', returned_at: timestamp, billable: false)
end

after do
ActiveRecord::Base.establish_connection(:data_warehouse)
end

it 'aggregates by issuer' do
expect(report).to receive(:upload_file_to_s3_bucket)
.exactly(2).times do |path:, body:, content_type:, bucket:|
parsed = JSON.parse(body, symbolize_names: true)

expect(parsed[:start]).to eq(report_date.beginning_of_day.as_json)
expect(parsed[:finish]).to eq(report_date.end_of_day.as_json)
expect(parsed[:results]).to match_array(
[
{
count: 2,
ial: 1,
issuer: 'a',
iaa: 'iaa123',
friendly_name: 'The App',
agency: 'The Agency',
},
{
count: 1,
ial: 2,
issuer: 'a',
iaa: 'iaa123',
friendly_name: 'The App',
agency: 'The Agency',
},
],
)
end

report.perform(report_date)
end
end
end
end