Skip to content

Commit 5686b07

Browse files
committed
Merge branch 'develop' into schematic-212-telemetry-combining
2 parents 7259617 + 998e295 commit 5686b07

File tree

9 files changed

+262
-222
lines changed

9 files changed

+262
-222
lines changed

.github/workflows/scan_repo.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ jobs:
1212
trivy:
1313
name: Trivy
1414
runs-on: ubuntu-latest
15+
env:
16+
TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2
17+
TRIVY_JAVA_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-java-db:1
1518
steps:
1619
- name: Checkout code
1720
uses: actions/checkout@v4

CONTRIBUTION.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ Please note we have a [code of conduct](CODE_OF_CONDUCT.md), please follow it in
66

77
## How to report bugs or feature requests
88

9-
You can **create bug and feature requests** through [Sage Bionetwork's FAIR Data service desk](https://sagebionetworks.jira.com/servicedesk/customer/portal/5/group/8). Providing enough details to the developers to verify and troubleshoot your issue is paramount:
9+
You can **create bug and feature requests** through [Sage Bionetwork's DPE schematic support](https://sagebionetworks.jira.com/servicedesk/customer/portal/5/group/7/create/225). Providing enough details to the developers to verify and troubleshoot your issue is paramount:
1010
- **Provide a clear and descriptive title as well as a concise summary** of the issue to identify the problem.
1111
- **Describe the exact steps which reproduce the problem** in as many details as possible.
1212
- **Describe the behavior you observed after following the steps** and point out what exactly is the problem with that behavior.
@@ -25,7 +25,7 @@ For new features, bugs, enhancements:
2525

2626
#### 1. Branch Setup
2727
* Pull the latest code from the develop branch in the upstream repository.
28-
* Checkout a new branch formatted like so: `develop-<feature/fix-name>` from the develop branch
28+
* Checkout a new branch formatted like so: `<JIRA-ID>-<feature/fix-name>` from the develop branch
2929

3030
#### 2. Development Workflow
3131
* Develop on your new branch.
@@ -35,22 +35,22 @@ For new features, bugs, enhancements:
3535
* You can choose to create a draft PR if you prefer to develop this way
3636

3737
#### 3. Branch Management
38-
* Push code to `develop-<feature/fix-name>` in upstream repo:
38+
* Push code to `<JIRA-ID>-<feature/fix-name>` in upstream repo:
3939
```
40-
git push <upstream> develop-<feature/fix-name>
40+
git push <upstream> <JIRA-ID>-<feature/fix-name>
4141
```
42-
* Branch off `develop-<feature/fix-name>` if you need to work on multiple features associated with the same code base
42+
* Branch off `<JIRA-ID>-<feature/fix-name>` if you need to work on multiple features associated with the same code base
4343
* After feature work is complete and before creating a PR to the develop branch in upstream
4444
a. ensure that code runs locally
4545
b. test for logical correctness locally
4646
c. run `pre-commit` to style code if the hook is not installed
4747
c. wait for git workflow to complete (e.g. tests are run) on github
4848

4949
#### 4. Pull Request and Review
50-
* Create a PR from `develop-<feature/fix-name>` into the develop branch of the upstream repo
50+
* Create a PR from `<JIRA-ID>-<feature/fix-name>` into the develop branch of the upstream repo
5151
* Request a code review on the PR
5252
* Once code is approved merge in the develop branch. The **"Squash and merge"** strategy should be used for a cleaner commit history on the `develop` branch. The description of the squash commit should include enough information to understand the context of the changes that were made.
53-
* Once the actions pass on the main branch, delete the `develop-<feature/fix-name>` branch
53+
* Once the actions pass on the main branch, delete the `<JIRA-ID>-<feature/fix-name>` branch
5454

5555
### Updating readthedocs documentation
5656
1. Navigate to the docs directory.

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,4 @@ RUN poetry install --no-interaction --no-ansi --no-root
2929

3030
COPY . ./
3131

32-
RUN poetry install --only-root
32+
RUN poetry install --only-root

schematic/__init__.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
DEPLOYMENT_ENVIRONMENT,
1515
SERVICE_INSTANCE_ID,
1616
SERVICE_NAME,
17+
SERVICE_VERSION,
1718
Resource,
1819
)
1920
from opentelemetry.sdk.trace import TracerProvider
@@ -25,6 +26,7 @@
2526

2627
from schematic.configuration.configuration import CONFIG
2728
from schematic.loader import LOADER
29+
from schematic.version import __version__
2830
from schematic_api.api.security_controller import info_from_bearer_auth
2931

3032
Synapse.allow_client_caching(False)
@@ -103,11 +105,7 @@ def set_up_tracing(session: requests.Session) -> None:
103105
attributes={
104106
SERVICE_INSTANCE_ID: service_instance_id,
105107
SERVICE_NAME: tracing_service_name,
106-
# TODO: Revisit this portion later on. As of 11/12/2024 when
107-
# deploying this to ECS or running within a docker container,
108-
# the package version errors out with the following error:
109-
# importlib.metadata.PackageNotFoundError: No package metadata was found for schematicpy
110-
# SERVICE_VERSION: package_version,
108+
SERVICE_VERSION: __version__,
111109
DEPLOYMENT_ENVIRONMENT: deployment_environment,
112110
}
113111
)
@@ -136,6 +134,7 @@ def set_up_logging(session: requests.Session) -> None:
136134
SERVICE_INSTANCE_ID: service_instance_id,
137135
SERVICE_NAME: logging_service_name,
138136
DEPLOYMENT_ENVIRONMENT: deployment_environment,
137+
SERVICE_VERSION: __version__,
139138
}
140139
)
141140

schematic/__main__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from schematic.visualization.commands import (
1414
viz as viz_cli,
1515
) # viz generation commands
16+
from schematic import __version__
1617

1718
logger = logging.getLogger()
1819
click_log.basic_config(logger)
@@ -24,6 +25,7 @@
2425
# invoke_without_command=True -> forces the application not to show aids before losing them with a --h
2526
@click.group(context_settings=CONTEXT_SETTINGS, invoke_without_command=True)
2627
@click_log.simple_verbosity_option(logger)
28+
@click.version_option(version=__version__, prog_name="schematic")
2729
def main():
2830
"""
2931
Command line interface to the `schematic` backend services.

schematic/utils/general.py

Lines changed: 7 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import logging
66
import os
77
import pstats
8-
import subprocess
8+
from pathlib import Path
99
import tempfile
1010
from cProfile import Profile
1111
from datetime import datetime, timedelta
@@ -129,40 +129,19 @@ def calculate_datetime(
129129
return date_time_result
130130

131131

132-
def check_synapse_cache_size(
133-
directory: str = "/root/.synapseCache",
134-
) -> float:
135-
"""use du --sh command to calculate size of .synapseCache.
132+
def check_synapse_cache_size(directory: str = "/root/.synapseCache") -> float:
133+
"""Calculate size of .synapseCache directory in bytes using pathlib.
136134
137135
Args:
138136
directory (str, optional): .synapseCache directory. Defaults to '/root/.synapseCache'
139137
140138
Returns:
141-
float: returns size of .synapsecache directory in bytes
139+
float: size of .synapsecache directory in bytes
142140
"""
143-
# Note: this command might fail on windows user.
144-
# But since this command is primarily for running on AWS, it is fine.
145-
command = ["du", "-sh", directory]
146-
output = subprocess.run(command, capture_output=True, check=False).stdout.decode(
147-
"utf-8"
141+
total_size = sum(
142+
f.stat().st_size for f in Path(directory).rglob("*") if f.is_file()
148143
)
149-
150-
# Parsing the output to extract the directory size
151-
size = output.split("\t")[0]
152-
if "K" in size:
153-
size_in_kb = float(size.rstrip("K"))
154-
byte_size = size_in_kb * 1000
155-
elif "M" in size:
156-
size_in_mb = float(size.rstrip("M"))
157-
byte_size = size_in_mb * 1000000
158-
elif "G" in size:
159-
size_in_gb = float(size.rstrip("G"))
160-
byte_size = size_in_gb * (1024**3)
161-
elif "B" in size:
162-
byte_size = float(size.rstrip("B"))
163-
else:
164-
logger.error("Cannot recognize the file size unit")
165-
return byte_size
144+
return total_size
166145

167146

168147
def clear_synapse_cache(synapse_cache: cache.Cache, minutes: int) -> int:

schematic/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
"""Sets the version of the package"""
22
import importlib.metadata
33

4-
__version__ = importlib.metadata.version("schematic")
4+
__version__ = importlib.metadata.version("schematicpy")

tests/test_utils.py

Lines changed: 1 addition & 180 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,11 @@
22
import json
33
import logging
44
import os
5-
import tempfile
6-
import time
7-
from datetime import datetime
8-
from pathlib import Path
9-
from typing import Generator, Union
105

116
import numpy as np
127
import pandas as pd
138
import pytest
14-
import synapseclient.core.cache as cache
15-
from _pytest.fixtures import FixtureRequest
169
from pandas.testing import assert_frame_equal
17-
from synapseclient.core.exceptions import SynapseHTTPError
1810

1911
from schematic.models.metadata import MetadataModel
2012
from schematic.models.validate_manifest import ValidateManifest
@@ -26,14 +18,8 @@
2618
convert_graph_to_jsonld,
2719
)
2820
from schematic.schemas.data_model_parser import DataModelParser
29-
from schematic.utils import cli_utils, df_utils, general, io_utils, validate_utils
21+
from schematic.utils import cli_utils, df_utils, io_utils, validate_utils
3022
from schematic.utils.df_utils import load_df
31-
from schematic.utils.general import (
32-
calculate_datetime,
33-
check_synapse_cache_size,
34-
clear_synapse_cache,
35-
entity_type_mapping,
36-
)
3723
from schematic.utils.schema_utils import (
3824
check_for_duplicate_components,
3925
check_if_display_name_is_valid_label,
@@ -168,13 +154,6 @@
168154

169155
DATA_MODEL_DICT = {"example.model.csv": "CSV", "example.model.jsonld": "JSONLD"}
170156

171-
test_disk_storage = [
172-
(2, 4000, 16000),
173-
(1000, 4000, 16000),
174-
(2000000, 1900000, 2000000),
175-
(1073741825, 1073741824, 1181116006.4),
176-
]
177-
178157

179158
def get_metadataModel(helpers, model_name: str):
180159
metadataModel = MetadataModel(
@@ -185,164 +164,6 @@ def get_metadataModel(helpers, model_name: str):
185164
return metadataModel
186165

187166

188-
# create temporary files with various size based on request
189-
@pytest.fixture()
190-
def create_temp_query_file(
191-
tmp_path: Path, request: FixtureRequest
192-
) -> Generator[tuple[Path, Path, Path], None, None]:
193-
"""create temporary files of various size based on request parameter.
194-
195-
Args:
196-
tmp_path (Path): temporary file path
197-
request (any): a request for a fixture from a test
198-
199-
Yields:
200-
Generator[Tuple[Path, Path, Path]]: return path of mock synapse cache directory, mock table query folder and csv
201-
"""
202-
# define location of mock synapse cache
203-
mock_synapse_cache_dir = tmp_path / ".synapseCache/"
204-
mock_synapse_cache_dir.mkdir()
205-
mock_sub_folder = mock_synapse_cache_dir / "123"
206-
mock_sub_folder.mkdir()
207-
mock_table_query_folder = mock_sub_folder / "456"
208-
mock_table_query_folder.mkdir()
209-
210-
# create mock table query csv
211-
mock_synapse_table_query_csv = (
212-
mock_table_query_folder / "mock_synapse_table_query.csv"
213-
)
214-
with open(mock_synapse_table_query_csv, "wb") as f:
215-
f.write(b"\0" * request.param)
216-
yield mock_synapse_cache_dir, mock_table_query_folder, mock_synapse_table_query_csv
217-
218-
219-
class TestGeneral:
220-
@pytest.mark.parametrize("create_temp_query_file", [3, 1000], indirect=True)
221-
def test_clear_synapse_cache(self, create_temp_query_file) -> None:
222-
# define location of mock synapse cache
223-
(
224-
mock_synapse_cache_dir,
225-
mock_table_query_folder,
226-
mock_synapse_table_query_csv,
227-
) = create_temp_query_file
228-
# create a mock cache map
229-
mock_cache_map = mock_table_query_folder / ".cacheMap"
230-
mock_cache_map.write_text(
231-
f"{mock_synapse_table_query_csv}: '2022-06-13T19:24:27.000Z'"
232-
)
233-
234-
assert os.path.exists(mock_synapse_table_query_csv)
235-
236-
# since synapse python client would compare last modified date and before date
237-
# we have to create a little time gap here
238-
time.sleep(1)
239-
240-
# clear cache
241-
my_cache = cache.Cache(cache_root_dir=mock_synapse_cache_dir)
242-
clear_synapse_cache(my_cache, minutes=0.0001)
243-
# make sure that cache files are now gone
244-
assert os.path.exists(mock_synapse_table_query_csv) == False
245-
assert os.path.exists(mock_cache_map) == False
246-
247-
def test_calculate_datetime_before_minutes(self):
248-
input_date = datetime.strptime("07/20/23 17:36:34", "%m/%d/%y %H:%M:%S")
249-
minutes_before = calculate_datetime(
250-
input_date=input_date, minutes=10, before_or_after="before"
251-
)
252-
expected_result_date_before = datetime.strptime(
253-
"07/20/23 17:26:34", "%m/%d/%y %H:%M:%S"
254-
)
255-
assert minutes_before == expected_result_date_before
256-
257-
def test_calculate_datetime_after_minutes(self):
258-
input_date = datetime.strptime("07/20/23 17:36:34", "%m/%d/%y %H:%M:%S")
259-
minutes_after = calculate_datetime(
260-
input_date=input_date, minutes=10, before_or_after="after"
261-
)
262-
expected_result_date_after = datetime.strptime(
263-
"07/20/23 17:46:34", "%m/%d/%y %H:%M:%S"
264-
)
265-
assert minutes_after == expected_result_date_after
266-
267-
def test_calculate_datetime_raise_error(self):
268-
with pytest.raises(ValueError):
269-
input_date = datetime.strptime("07/20/23 17:36:34", "%m/%d/%y %H:%M:%S")
270-
minutes = calculate_datetime(
271-
input_date=input_date, minutes=10, before_or_after="error"
272-
)
273-
274-
# this test might fail for windows machine
275-
@pytest.mark.not_windows
276-
@pytest.mark.parametrize(
277-
"create_temp_query_file,local_disk_size,gh_disk_size",
278-
test_disk_storage,
279-
indirect=["create_temp_query_file"],
280-
)
281-
def test_check_synapse_cache_size(
282-
self,
283-
create_temp_query_file,
284-
local_disk_size: int,
285-
gh_disk_size: Union[int, float],
286-
) -> None:
287-
mock_synapse_cache_dir, _, _ = create_temp_query_file
288-
disk_size = check_synapse_cache_size(mock_synapse_cache_dir)
289-
290-
# For some reasons, when running in github action, the size of file changes.
291-
if IN_GITHUB_ACTIONS:
292-
assert disk_size == gh_disk_size
293-
else:
294-
assert disk_size == local_disk_size
295-
296-
def test_find_duplicates(self):
297-
mock_list = ["foo", "bar", "foo"]
298-
mock_dups = {"foo"}
299-
300-
test_dups = general.find_duplicates(mock_list)
301-
assert test_dups == mock_dups
302-
303-
def test_dict2list_with_dict(self):
304-
mock_dict = {"foo": "bar"}
305-
mock_list = [{"foo": "bar"}]
306-
307-
test_list = general.dict2list(mock_dict)
308-
assert test_list == mock_list
309-
310-
def test_dict2list_with_list(self):
311-
# mock_dict = {'foo': 'bar'}
312-
mock_list = [{"foo": "bar"}]
313-
314-
test_list = general.dict2list(mock_list)
315-
assert test_list == mock_list
316-
317-
@pytest.mark.parametrize(
318-
"entity_id,expected_type",
319-
[
320-
("syn27600053", "folder"),
321-
("syn29862078", "file"),
322-
("syn23643253", "asset view"),
323-
("syn30988314", "folder"),
324-
("syn51182432", "org.sagebionetworks.repo.model.table.TableEntity"),
325-
],
326-
)
327-
def test_entity_type_mapping(self, synapse_store, entity_id, expected_type):
328-
syn = synapse_store.syn
329-
330-
entity_type = entity_type_mapping(syn, entity_id)
331-
assert entity_type == expected_type
332-
333-
def test_entity_type_mapping_invalid_entity_id(self, synapse_store):
334-
syn = synapse_store.syn
335-
336-
# test with an invalid entity id
337-
with pytest.raises(SynapseHTTPError) as exception_info:
338-
entity_type_mapping(syn, "syn123456")
339-
340-
def test_download_manifest_to_temp_folder(self):
341-
with tempfile.TemporaryDirectory() as tmpdir:
342-
path_dir = general.create_temp_folder(tmpdir)
343-
assert os.path.exists(path_dir)
344-
345-
346167
class TestCliUtils:
347168
def test_query_dict(self):
348169
mock_dict = {"k1": {"k2": {"k3": "foobar"}}}

0 commit comments

Comments
 (0)