Skip to content

Commit 0e63bc9

Browse files
stdavissteveoh
andcommitted
wip: backup process
Co-authored-by: steveoh <[email protected]>
1 parent 82a8beb commit 0e63bc9

File tree

9 files changed

+217
-17
lines changed

9 files changed

+217
-17
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
temp

.vscode/extensions.json

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
// See https://go.microsoft.com/fwlink/?LinkId=827846 to learn about workspace recommendations.
3+
// Extension identifier format: ${publisher}.${name}. Example: vscode.csharp
4+
// List of extensions which should be recommended for users of this workspace.
5+
"recommendations": [
6+
"editorconfig.editorconfig",
7+
"njpwerner.autodocstring",
8+
"ms-python.vscode-pylance",
9+
"ms-python.python",
10+
"donjayamanne.python-environment-manager",
11+
"charliermarsh.ruff",
12+
"tamasfe.even-better-toml"
13+
],
14+
// List of extensions recommended by VS Code that should not be recommended for users of this workspace.
15+
"unwantedRecommendations": []
16+
}

.vscode/settings.json

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{
2+
"cSpell.words": ["orgid"],
3+
"editor.formatOnSave": true,
4+
"editor.rulers": [120],
5+
"coverage-gutters.showGutterCoverage": false,
6+
"coverage-gutters.showLineCoverage": true,
7+
"coverage-gutters.showRulerCoverage": false,
8+
"coverage-gutters.highlightdark": "rgb(61, 153, 112, .05)",
9+
"coverage-gutters.noHighlightDark": "rgb(255, 65, 54, .05)",
10+
"coverage-gutters.partialHighlightDark": "rgb(255, 133, 27, .05)",
11+
"python.languageServer": "Pylance",
12+
"python.testing.pytestEnabled": true,
13+
"editor.codeActionsOnSave": {
14+
"source.organizeImports": "explicit"
15+
},
16+
"[python]": {
17+
"editor.defaultFormatter": "charliermarsh.ruff"
18+
}
19+
}

README.md

+21-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,22 @@
11
# project-moonwalk
2-
AGOL backups with style
2+
3+
Open source ArcGIS Online item backup and restore with style.
4+
5+
Add a `backup` tag to your item and moonwalk will start backing up your items.
6+
7+
This project is split into 2 parts. backup and restore. The backup service scans for items with the `backup` tag and uses the `createReplica` and `data` endpoints to extract the information necessary to restore the item. This data is placed in Google Cloud Storage. Backups are created daily and kept on a rolling 14 day schedule. Every sunday, the backup is also stored in a special area for 90 days.
8+
9+
The restore service consists of a website that allows you to view your backups and trigger a restore.
10+
11+
## Installation
12+
13+
1. Run some terraform to create some cloud infrastructure
14+
15+
## Development
16+
17+
### Backups
18+
19+
1. conda create --name moonwalk-backup python=3.11
20+
1. conda activate moonwalk-backup
21+
1. cd packages/backup
22+
1. pip install -e ".[tests]"

packages/backup/.gitignore

+7-6
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,3 @@
1-
secrets.json
2-
sheets-sa.json
3-
4-
*.py[cod]
5-
61
# Packages
72
.eggs/
83
*.egg
@@ -23,13 +18,19 @@ __pycache__
2318
# Installer logs
2419
pip-log.txt
2520

21+
*.py[cod]
2622
*.pyt.xml
2723
*.csv.xml
2824
arc.dir
2925
.pytest_cache
3026
__pycache__
3127

32-
.env/
28+
# MacOS
3329
.DS_Store
30+
31+
# Testing
3432
cov.xml
3533
.coverage
34+
35+
# Secrets
36+
secrets.json

packages/backup/setup.py

+5-7
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,15 @@
3232
"Issue Tracker": "https://github.com/agrc/project-moonwalk/issues",
3333
},
3434
keywords=["gis"],
35-
install_requires=[
36-
"arcgis>=2.3,<2.4",
37-
],
35+
install_requires=["arcgis==2.*", "google-cloud-storage==2.*"],
3836
extras_require={
3937
"tests": [
40-
"pytest-cov>=3,<6",
41-
"pytest-instafail==0.5.*",
38+
"pytest-cov==5.*",
39+
"pytest-instafail==0.*",
4240
"pytest-mock==3.*",
4341
"pytest-ruff==0.*",
4442
"pytest-watch==4.*",
45-
"pytest>=6,<9",
46-
"black>=24.4.2,<24.5",
43+
"pytest==8.*",
4744
"ruff==0.*",
4845
]
4946
},
@@ -53,6 +50,7 @@
5350
entry_points={
5451
"console_scripts": [
5552
"backup = backup.main:backup",
53+
"backup_local = backup.main:local_backup",
5654
]
5755
},
5856
)

packages/backup/src/backup/__init__.py

Whitespace-only changes.

packages/backup/src/backup/main.py

+91-3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
11
import json
2+
from datetime import datetime
23
from pathlib import Path
4+
from pprint import pprint
5+
from time import sleep
36

47
import arcgis
8+
from utilities import delete_folder, write_to_bucket
9+
10+
NEEDS_WEEKLY_BACKUP = datetime.today().weekday() == 0
511

612

713
def _get_secrets():
8-
"""A helper method for loading secrets from either a GCF mount point or the local src/nfhl-skid/secrets/secrets.
14+
"""A helper method for loading secrets from either a GCF mount point or a local secrets folder.
915
json file
1016
1117
Raises:
@@ -30,8 +36,90 @@ def _get_secrets():
3036

3137

3238
def backup():
33-
pass
39+
secrets = _get_secrets()
40+
gis = arcgis.GIS(
41+
url=secrets["AGOL_ORG"],
42+
username=secrets["AGOL_USER"],
43+
password=secrets["AGOL_PASSWORD"],
44+
)
3445

46+
page_size = 100
47+
has_more = True
48+
start = 1
49+
summary = {}
50+
export_jobs = []
51+
supported_types = [
52+
arcgis.gis.ItemTypeEnum.FEATURE_SERVICE.value,
53+
arcgis.gis.ItemTypeEnum.WEB_EXPERIENCE.value,
54+
arcgis.gis.ItemTypeEnum.WEB_MAP.value,
55+
arcgis.gis.ItemTypeEnum.WEB_SCENE.value,
56+
arcgis.gis.ItemTypeEnum.WEB_MAPPING_APPLICATION.value,
57+
]
3558

36-
if __name__ == "__main__":
59+
while has_more:
60+
response = gis.content.advanced_search(
61+
query=f"orgid:{gis.properties.id}",
62+
filter=f'tags:{secrets["TAG_NAME"]}',
63+
max_items=page_size,
64+
start=start,
65+
)
66+
67+
#: couldn't query or filter multiple types at once, so filtering here
68+
for item in [filteredItem for filteredItem in response["results"] if filteredItem.type in supported_types]:
69+
print(f"Preparing {item.title} ({item.type}, {item.id})")
70+
item_json = dict(item)
71+
72+
versions = write_to_bucket("sample-bucket", item.id, "item.json", item_json, NEEDS_WEEKLY_BACKUP)
73+
versions = write_to_bucket("sample-bucket", item.id, "data.json", item.get_data(), NEEDS_WEEKLY_BACKUP)
74+
75+
summary[item.id] = {
76+
"title": item.title,
77+
"versions": versions,
78+
"type": item.type,
79+
}
80+
81+
if item.type == arcgis.gis.ItemTypeEnum.FEATURE_SERVICE.value:
82+
print("Requesting feature service export")
83+
84+
job = item.export(
85+
"moonwalk-export.zip",
86+
arcgis.gis.ItemTypeEnum.FILE_GEODATABASE.value,
87+
wait=False,
88+
tags=[],
89+
)
90+
export_jobs.append(job)
91+
92+
has_more = response["nextStart"] > 0
93+
start = response["nextStart"]
94+
95+
print("Downloading export jobs")
96+
97+
while len(export_jobs) > 0:
98+
for job in export_jobs:
99+
item = arcgis.gis.Item(gis, job["exportItemId"])
100+
101+
try:
102+
item.download(save_path=f'./temp/sample-bucket/{job["serviceItemId"]}', file_name="data.zip")
103+
job["downloaded"] = True
104+
item.delete(permanent=True)
105+
except Exception as error:
106+
print(error)
107+
print(f"Failed to download {item.title} ({item.id}), {item.status()}")
108+
109+
export_jobs = [job for job in export_jobs if "downloaded" not in job]
110+
111+
if len(export_jobs) > 0:
112+
print("waiting 5 seconds...", len(export_jobs))
113+
sleep(5)
114+
115+
pprint(summary, indent=2)
116+
117+
118+
def local_backup():
119+
temp_folder = Path("./temp")
120+
delete_folder(temp_folder)
37121
backup()
122+
123+
124+
if __name__ == "__main__":
125+
local_backup()
+57
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import json
2+
from pathlib import Path
3+
from google.cloud import storage
4+
# from google.oauth2 import service_account
5+
6+
# CREDENTIAL_FILE = Path("./service-account.json")
7+
8+
# if not CREDENTIAL_FILE.exists():
9+
# raise FileNotFoundError("missing service account")
10+
11+
# credential_data = {}
12+
# with CREDENTIAL_FILE.open() as reader:
13+
# credential_data = json.load(reader)
14+
15+
# CREDENTIALS = service_account.Credentials.from_service_account_info(credential_data)
16+
17+
STORAGE_CLIENT = storage.Client()
18+
BUCKET_NAME = "ut-dts-agrc-moonwalk-dev"
19+
20+
def write_to_gcs(bucket_name, folder, filename, data, needs_weekly_backup):
21+
bucket = STORAGE_CLIENT.bucket(bucket_name)
22+
23+
paths = [folder / 'short' / filename]
24+
25+
if needs_weekly_backup:
26+
paths.append(folder / "long" / filename)
27+
28+
for path in paths:
29+
try:
30+
blob = bucket.bucket.blob(path)
31+
blob.upload_from_filename(data)
32+
except Exception as error:
33+
print(error)
34+
35+
def write_to_bucket(bucket, folder, filename, data, needs_weekly_backup):
36+
base_path = Path(f"./temp/{bucket}/{folder}")
37+
paths = [base_path / 'short' / filename]
38+
39+
if needs_weekly_backup:
40+
paths.append(base_path / 'long' / filename)
41+
42+
for path in paths:
43+
path.parent.mkdir(parents=True, exist_ok=True)
44+
45+
with open(path, "w") as f:
46+
json.dump(data, f)
47+
48+
def delete_folder(pth):
49+
if not pth.exists():
50+
return
51+
52+
for sub in pth.iterdir():
53+
if sub.is_dir():
54+
delete_folder(sub)
55+
else:
56+
sub.unlink()
57+
pth.rmdir()

0 commit comments

Comments
 (0)