Skip to content

Commit 422bc77

Browse files
ci: disaster recovery dry run action (#2015)
* ci: disaster recovery dry run action * add slack notification * slack action v1. Renamed secrets
1 parent 1366f44 commit 422bc77

File tree

1 file changed

+135
-0
lines changed

1 file changed

+135
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
name: . ⚠️🔧 Test Recover S3 Repository back in time 🔧️⚠️
2+
3+
on:
4+
workflow_dispatch:
5+
schedule:
6+
#Scheduled to run at 7 a.m on every day-of-week from Monday through Friday.
7+
- cron: "0 7 * * 1-5"
8+
9+
env:
10+
MANDATORY_PREFIX: 'infrastructure_agent/test_disaster_recovery'
11+
TEST_FOLDER: 'test'
12+
IMAGE: 'ghcr.io/newrelic-forks/s3-pit-restore:latest'
13+
AWS_REGION: 'us-east-1'
14+
TEMP_AWS_PROFILE: 'temp_aws_profile'
15+
BUCKET_NAME: 'nr-downloads-ohai-staging'
16+
TESTING_FILE: 'test.txt'
17+
18+
jobs:
19+
recover-s3-repository:
20+
name: Execute S3 PIT restore for testing disaster recovery
21+
runs-on: ubuntu-24.04
22+
steps:
23+
- name: Checkout repository
24+
uses: actions/checkout@v4
25+
with:
26+
repository: newrelic-forks/s3-pit-restore
27+
ref: master
28+
29+
- name: Setup AWS credentials for Staging
30+
run: |
31+
./setup_aws_credentials.sh
32+
env:
33+
AWS_ACCESS_KEY_ID: ${{ secrets.OHAI_AWS_ACCESS_KEY_ID_STAGING }}
34+
AWS_SECRET_ACCESS_KEY: ${{ secrets.OHAI_AWS_SECRET_ACCESS_KEY_STAGING }}
35+
AWS_ROLE_ARN: ${{ secrets.OHAI_AWS_ROLE_ARN_STAGING }}
36+
AWS_ROLE_SESSION_NAME: ${{ secrets.OHAI_AWS_ROLE_SESSION_NAME_STAGING }}
37+
AWS_SESSION_DURATION_SECONDS: 14400
38+
TEMP_AWS_PROFILE: ${{ env.TEMP_AWS_PROFILE }}
39+
40+
- name: Add aws credentials and paths to env
41+
run: |
42+
echo AWS_PROFILE="${{ env.TEMP_AWS_PROFILE }}" >> $GITHUB_ENV
43+
echo AWS_REGION="${{ env.AWS_REGION }}" >> $GITHUB_ENV
44+
echo TEST_FOLDER_ABS_PATH="s3://${{ env.BUCKET_NAME }}/${{ env.MANDATORY_PREFIX }}/${{ env.TEST_FOLDER }}" >> $GITHUB_ENV
45+
46+
- name: set README and a file that will not be rolled back
47+
run: |
48+
echo "This folder is meant to test the disaster recevery dry-run." > DISASTER_TEST_README.md
49+
echo "Just to periodically ensure the procedure works" >> DISASTER_TEST_README.md
50+
aws s3 cp DISASTER_TEST_README.md s3://${{ env.BUCKET_NAME }}/${{ env.MANDATORY_PREFIX }}/README.md
51+
echo "This file should be present after running the procedure" > PERPETUAL_FILE.md
52+
aws s3 cp PERPETUAL_FILE.md s3://${{ env.BUCKET_NAME }}/${{ env.MANDATORY_PREFIX }}/${{ env.TEST_FOLDER }}/PERPETUAL_FILE.md
53+
54+
- name: ensure folders from previous execution do not exist
55+
run: |
56+
set +e
57+
TZ="UTC" aws s3 ls ${{ env.TEST_FOLDER_ABS_PATH }}.original
58+
if [ $? -eq 0 ]; then
59+
echo "original folder ${{ env.TEST_FOLDER_ABS_PATH }}.original should not exist"
60+
exit 1
61+
fi
62+
TZ="UTC" aws s3 ls ${{ env.TEST_FOLDER_ABS_PATH }}.restored
63+
if [ $? -eq 0 ]; then
64+
echo "restored folder ${{ env.TEST_FOLDER_ABS_PATH }}.restored should not exist"
65+
exit 1
66+
fi
67+
68+
- name: Get current datetime and sleep for a couple of minutes
69+
run: |
70+
now=$( date +"%m-%d-%Y %H:%M:%S %z" )
71+
echo "INIT_DATETIME=$now" >> $GITHUB_ENV
72+
sleep 120
73+
74+
- name: create a file in the bucket to be rolled back (this file should be deleted by the procedure)
75+
run: |
76+
echo "this is a test" > ${{ env.TESTING_FILE }}
77+
aws s3 cp ${{ env.TESTING_FILE }} ${{ env.TEST_FOLDER_ABS_PATH }}/${{ env.TESTING_FILE }}
78+
# ensure the file is there
79+
TZ="UTC" aws s3 ls ${{ env.TEST_FOLDER_ABS_PATH }}/${{ env.TESTING_FILE }}
80+
81+
- name: Run S3 PIT restore in Staging S3 for the test folder
82+
run: |
83+
BUCKET="${{ env.BUCKET_NAME }}" \
84+
PREFIX="${{ env.MANDATORY_PREFIX }}/${{ env.TEST_FOLDER }}" \
85+
TIME="${{ env.INIT_DATETIME }}" \
86+
IMAGE="${{ env.IMAGE }}" \
87+
AWS_PROFILE="${{ env.TEMP_AWS_PROFILE }}" \
88+
make restore
89+
90+
- name: Ensure the perpetual file exists
91+
run: |
92+
TZ="UTC" aws s3 ls ${{ env.TEST_FOLDER_ABS_PATH }}/PERPETUAL_FILE.md
93+
94+
- name: Ensure the rollbacked file does not exist
95+
run: |
96+
set +e
97+
TZ="UTC" aws s3 ls ${{ env.TEST_FOLDER_ABS_PATH }}/${{ env.TESTING_FILE }}
98+
if [ $? -eq 0 ]; then
99+
echo "The file ${{ env.TEST_FOLDER_ABS_PATH }}/${{ env.TESTING_FILE }} should have been deleted"
100+
exit 1
101+
fi
102+
103+
- name: Ensure the original with the original file exists
104+
run: |
105+
TZ="UTC" aws s3 ls ${{ env.TEST_FOLDER_ABS_PATH }}.original/${{ env.TESTING_FILE }}
106+
107+
- name: Delete .original
108+
run: |
109+
aws s3 rm --recursive "${{ env.TEST_FOLDER_ABS_PATH }}.original"
110+
111+
- name: Send Slack notification to OHAI
112+
if: ${{ failure() }}
113+
id: slack
114+
uses: slackapi/slack-github-action@v1
115+
with:
116+
payload: |
117+
{
118+
"text": ":rotating_light: Dry-Run Recover S3 Repository failed :warning: :warning: :warning: @hero check <${{ env.GITHUB_JOB_URL }}> :rotating_light:"
119+
}
120+
env:
121+
SLACK_WEBHOOK_URL: ${{ secrets.OHAI_SLACK_WEBHOOK }}
122+
GITHUB_JOB_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
123+
124+
- name: Send Slack notification to AC
125+
if: ${{ failure() }}
126+
id: slack
127+
uses: slackapi/slack-github-action@v1
128+
with:
129+
payload: |
130+
{
131+
"text": ":rotating_light: Dry-Run Recover S3 Repository failed :warning: :warning: :warning: @hero check <${{ env.GITHUB_JOB_URL }}> :rotating_light:"
132+
}
133+
env:
134+
SLACK_WEBHOOK_URL: ${{ secrets.AC_SLACK_WEBHOOK }}
135+
GITHUB_JOB_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}

0 commit comments

Comments
 (0)