Skip to content

Commit 5701e9b

Browse files
committed
reorder timdex functions and organize under extras
1 parent 3da47ee commit 5701e9b

File tree

3 files changed

+31
-31
lines changed

3 files changed

+31
-31
lines changed

abdiff/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
)
2020
from abdiff.core import init_job as core_init_job
2121
from abdiff.core.utils import read_job_json
22-
from abdiff.helpers.timdex_sources import get_ordered_extracted_files_all_sources
22+
from abdiff.extras.timdex_sources import get_ordered_extracted_files_all_sources
2323
from abdiff.webapp.app import app
2424

2525
logger = logging.getLogger(__name__)
Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -13,26 +13,16 @@
1313
CONFIG = Config()
1414

1515

16-
def get_extracted_files_for_source(
17-
source: str,
18-
bucket: str = CONFIG.TIMDEX_BUCKET,
19-
) -> list[str]:
20-
"""List S3 URIs for extract files in TIMDEX S3 bucket for a given source."""
21-
s3_client = boto3.client("s3")
22-
files = []
23-
24-
paginator = s3_client.get_paginator("list_objects_v2")
25-
page_iterator = paginator.paginate(Bucket=bucket, Prefix=source)
26-
27-
for page in page_iterator:
28-
if "Contents" in page:
29-
for obj in page["Contents"]:
30-
if not obj["Key"].endswith("/"): # skip folders
31-
s3_uri = f"s3://{bucket}/{obj['Key']}"
32-
files.append(s3_uri)
33-
34-
# filter where "extracted" in filename
35-
return [file for file in files if "extracted" in file]
16+
def get_ordered_extracted_files_all_sources(
17+
sources: list[str] | None = None,
18+
) -> dict[str, list[str]]:
19+
"""Get ordered extract files for all TIMDEX sources."""
20+
if not sources:
21+
sources = CONFIG.active_timdex_sources
22+
return {
23+
source: get_ordered_extracted_files_since_last_full_run(source=source)
24+
for source in sources
25+
}
3626

3727

3828
def get_ordered_extracted_files_since_last_full_run(source: str) -> list[str]:
@@ -81,13 +71,23 @@ def _extract_date(filename: str) -> datetime.datetime:
8171
return datetime.datetime.strptime(date_string, "%Y-%m-%d").astimezone(datetime.UTC)
8272

8373

84-
def get_ordered_extracted_files_all_sources(
85-
sources: list[str] | None = None,
86-
) -> dict[str, list[str]]:
87-
"""Get ordered extract files for all TIMDEX sources."""
88-
if not sources:
89-
sources = CONFIG.active_timdex_sources
90-
return {
91-
source: get_ordered_extracted_files_since_last_full_run(source=source)
92-
for source in sources
93-
}
74+
def get_extracted_files_for_source(
75+
source: str,
76+
bucket: str = CONFIG.TIMDEX_BUCKET,
77+
) -> list[str]:
78+
"""List S3 URIs for extract files in TIMDEX S3 bucket for a given source."""
79+
s3_client = boto3.client("s3")
80+
files = []
81+
82+
paginator = s3_client.get_paginator("list_objects_v2")
83+
page_iterator = paginator.paginate(Bucket=bucket, Prefix=source)
84+
85+
for page in page_iterator:
86+
if "Contents" in page:
87+
for obj in page["Contents"]:
88+
if not obj["Key"].endswith("/"): # skip folders
89+
s3_uri = f"s3://{bucket}/{obj['Key']}"
90+
files.append(s3_uri)
91+
92+
# filter where "extracted" in filename
93+
return [file for file in files if "extracted" in file]

0 commit comments

Comments
 (0)