-
Notifications
You must be signed in to change notification settings - Fork 6
454 migrate resources from old cms to aiod platform #469
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from 14 commits
3201b3a
3ff6149
387ad73
cba3df3
5bb9e4f
db7d758
1c51694
6bc6b64
51a1700
da4c6dc
fd2fd9a
b0b93bb
ef24a07
8ea3275
b3f04d5
723b981
6b2705c
1422345
094669f
3742286
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
FROM aiod_metadata_catalogue | ||
|
||
COPY organisations.sh /opt/connectors/script/organisations.sh | ||
COPY events.sh /opt/connectors/script/events.sh | ||
COPY news.sh /opt/connectors/script/news.sh | ||
COPY entry.sh /opt/connectors/script/entry.sh | ||
COPY cron /etc/cron.d/aiod | ||
|
||
|
||
USER root | ||
RUN apt -y install cron | ||
|
||
RUN chmod +x /opt/connectors/script/organisations.sh | ||
RUN chmod +x /opt/connectors/script/events.sh | ||
RUN chmod +x /opt/connectors/script/news.sh | ||
RUN chmod +x /opt/connectors/script/entry.sh | ||
RUN crontab /etc/cron.d/aiod | ||
|
||
WORKDIR /app |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
40 * * * * bash /opt/connectors/script/organisations.sh >> /opt/connectors/data/ai4europe_cms/organisation/cron.log 2>&1 | ||
40 * * * * bash /opt/connectors/script/events.sh >> /opt/connectors/data/ai4europe_cms/events/cron.log 2>&1 | ||
40 * * * * bash /opt/connectors/script/news.sh >> /opt/connectors/data/ai4europe_cms/news/cron.log 2>&1 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#!/bin/bash | ||
|
||
mkdir -p /opt/connectors/data/ai4europe_cms/organisation | ||
mkdir -p /opt/connectors/data/ai4europe_cms/event | ||
mkdir -p /opt/connectors/data/ai4europe_cms/news | ||
|
||
/usr/sbin/cron -f -l 4 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#!/bin/bash | ||
|
||
WORK_DIR=/opt/connectors/data/ai4europe_cms/event | ||
CONNECTOR=connectors.ai4europe_cms.ai4europe_cms_event_connector.AI4EuropeCmsEventConnector | ||
|
||
another_instance() | ||
{ | ||
echo $(date -u) "This script is already running in a different thread." | ||
exit 1 | ||
} | ||
exec 9< "$0" | ||
flock -n -x 9 || another_instance | ||
|
||
echo $(date -u) "Starting synchronization..." | ||
PYTHONPATH=/app /usr/local/bin/python3 /app/connectors/synchronization.py \ | ||
-c $CONNECTOR \ | ||
-w $WORK_DIR \ | ||
--save-every 100 >> ${WORK_DIR}/connector.log 2>&1 | ||
echo $(date -u) "Synchronization Done." |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#!/bin/bash | ||
|
||
WORK_DIR=/opt/connectors/data/ai4europe_cms/news | ||
CONNECTOR=connectors.ai4europe_cms.ai4europe_cms_news_connector.AI4EuropeCmsNewsConnector | ||
|
||
another_instance() | ||
{ | ||
echo $(date -u) "This script is already running in a different thread." | ||
exit 1 | ||
} | ||
exec 9< "$0" | ||
flock -n -x 9 || another_instance | ||
|
||
echo $(date -u) "Starting synchronization..." | ||
PYTHONPATH=/app /usr/local/bin/python3 /app/connectors/synchronization.py \ | ||
-c $CONNECTOR \ | ||
-w $WORK_DIR \ | ||
--save-every 100 >> ${WORK_DIR}/connector.log 2>&1 | ||
echo $(date -u) "Synchronization Done." |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#!/bin/bash | ||
|
||
WORK_DIR=/opt/connectors/data/ai4europe_cms/organisation | ||
CONNECTOR=connectors.ai4europe_cms.ai4europe_cms_organisation_connector.AI4EuropeCmsOrganisationConnector | ||
|
||
another_instance() | ||
{ | ||
echo $(date -u) "This script is already running in a different thread." | ||
exit 1 | ||
} | ||
exec 9< "$0" | ||
flock -n -x 9 || another_instance | ||
|
||
echo $(date -u) "Starting synchronization..." | ||
PYTHONPATH=/app /usr/local/bin/python3 /app/connectors/synchronization.py \ | ||
-c $CONNECTOR \ | ||
-w $WORK_DIR \ | ||
--save-every 100 >> ${WORK_DIR}/connector.log 2>&1 | ||
echo $(date -u) "Synchronization Done." |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,95 @@ | ||||||
import requests | ||||||
|
||||||
from requests.exceptions import HTTPError | ||||||
from typing import Iterator | ||||||
|
||||||
from connectors.abstract.resource_connector import ResourceConnector | ||||||
from connectors.record_error import RecordError | ||||||
from database.model.platform.platform_names import PlatformName | ||||||
from database.model.resource_read_and_create import resource_create | ||||||
from database.model.event.event import Event | ||||||
from connectors.resource_with_relations import ResourceWithRelations | ||||||
from database.model.ai_resource.text import Text | ||||||
|
||||||
|
||||||
class AI4EuropeCmsEventConnector(ResourceConnector[Event]): | ||||||
@property | ||||||
def resource_class(self) -> type[Event]: | ||||||
return Event | ||||||
|
||||||
@property | ||||||
def platform_name(self) -> PlatformName: | ||||||
return PlatformName.ai4europe_cms | ||||||
|
||||||
def run(self, state: dict, **kwargs) -> Iterator[ResourceWithRelations[Event] | RecordError]: | ||||||
"""Fetch resources and update the state""" | ||||||
|
||||||
url_data = "https://community-dev-api.aiod.eu/api/events/" | ||||||
|
||||||
headers = {"AuthorizationToken": "1234567890"} | ||||||
|
||||||
response = requests.get(url_data, headers=headers, timeout=600) | ||||||
|
||||||
if not response.ok: | ||||||
status_code = response.status_code | ||||||
msg = response.json()["error"]["message"] | ||||||
err_msg = f"Error while fetching {url_data} from AI4Europe CMS: ({status_code}) {msg}" | ||||||
err = HTTPError(err_msg) | ||||||
yield RecordError(identifier=None, error=err) | ||||||
return | ||||||
|
||||||
try: | ||||||
events = response.json() | ||||||
except Exception as e: | ||||||
yield RecordError(identifier=None, error=e) | ||||||
return | ||||||
|
||||||
for event in events: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems like you are fetching -all- events from ai4eu every hour. This is very wasteful and imposes unneeded strain on both servers and databases, and more prone to connection errors. If you do not fix this from the ai4eu side, please consider at least updating the connector. For example, you could make sure the E.g., There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the more "clear" way to resolve this is by adding the ai4eu identifier into the response of the API. This way we could utilize There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @antonis96 the ai4europe api response has a For example page https://ai4europe.eu/node/2420 Is this sufficient? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @AlexJoom Yes, this is totally fine. I will proceed with the necessary changes. |
||||||
pydantic_class = resource_create(Event) | ||||||
desc = event.get("description") or {} | ||||||
|
||||||
yield ResourceWithRelations[Event]( | ||||||
resource=pydantic_class( | ||||||
platform_resource_identifier=( | ||||||
event["platform_resource_identifier"] | ||||||
if event.get("platform_resource_identifier") is not None | ||||||
else None | ||||||
), | ||||||
platform=event["platform"] if event.get("platform") is not None else None, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Safely retrieving the value for a key and otherwise returning None is exactly what There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Platform and Platform Identifier are specifically for connectors and are limited to exactly those platforms we have connectors for. In this case I think the platform should simply be |
||||||
name=event["name"] if event.get("name") is not None else None, | ||||||
date_published=event["date_published"] | ||||||
if event.get("date_published") is not None | ||||||
else None, | ||||||
start_date=event["start_date"] if event.get("start_date") is not None else None, | ||||||
end_date=event["end_date"] if event.get("end_date") is not None else None, | ||||||
registration_link=event["registration_link"] | ||||||
if event.get("registration_link") is not None | ||||||
and len(event.get("registration_link")) <= 256 | ||||||
else None, | ||||||
mode=event["mode"] if event.get("mode") is not None else None, | ||||||
scientific_domain=[sd for sd in event.get("scientific_domain")] | ||||||
if event.get("scientific_domain") is not None | ||||||
else [], | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In these cases you may also need to use the second parameter which specifies the value to return if the key is not present in the dictionary, e.g.:
are equivalent. |
||||||
industrial_sector=[ins for ins in event.get("industrial_sector")] | ||||||
if event.get("industrial_sector") is not None | ||||||
else [], | ||||||
relevant_link=[rl for rl in event.get("relevant_link")] | ||||||
if event.get("relevant_link") is not None | ||||||
else [], | ||||||
alternate_name=[an for an in event.get("alternate_name")] | ||||||
if event.get("alternate_name") is not None | ||||||
else [], | ||||||
application_area=[ar for ar in event.get("application_area")] | ||||||
if event.get("application_area") is not None | ||||||
else [], | ||||||
keyword=[k for k in event.get("keyword")] | ||||||
if event.get("keyword") is not None | ||||||
else [], | ||||||
same_as=event["same_as"] if event.get("same_as") is not None else None, | ||||||
description=Text( | ||||||
plain=desc.get("plain") or "", | ||||||
html=desc.get("html") or "", | ||||||
), | ||||||
), | ||||||
resource_ORM_class=Event, | ||||||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
import requests | ||
|
||
from requests.exceptions import HTTPError | ||
from typing import Iterator | ||
|
||
from connectors.abstract.resource_connector import ResourceConnector | ||
from connectors.record_error import RecordError | ||
from database.model.platform.platform_names import PlatformName | ||
from database.model.resource_read_and_create import resource_create | ||
from database.model.news.news import News | ||
from connectors.resource_with_relations import ResourceWithRelations | ||
|
||
|
||
class AI4EuropeCmsNewsConnector(ResourceConnector[News]): | ||
@property | ||
def resource_class(self) -> type[News]: | ||
return News | ||
|
||
@property | ||
def platform_name(self) -> PlatformName: | ||
return PlatformName.ai4europe_cms | ||
|
||
def run(self, state: dict, **kwargs) -> Iterator[ResourceWithRelations[News] | RecordError]: | ||
"""Fetch resources and update the state""" | ||
|
||
url_data = "https://community-dev-api.aiod.eu/api/news/" | ||
|
||
headers = {"AuthorizationToken": "1234567890"} | ||
|
||
response = requests.get(url_data, headers=headers, timeout=600) | ||
|
||
if not response.ok: | ||
status_code = response.status_code | ||
msg = response.json()["error"]["message"] | ||
err_msg = f"Error while fetching {url_data} from AI4Europe CMS: ({status_code}) {msg}" | ||
err = HTTPError(err_msg) | ||
yield RecordError(identifier=None, error=err) | ||
return | ||
|
||
try: | ||
news = response.json() | ||
except Exception as e: | ||
yield RecordError(identifier=None, error=e) | ||
return | ||
|
||
for n in news: | ||
pydantic_class = resource_create(News) | ||
|
||
yield ResourceWithRelations[News]( | ||
resource=pydantic_class( | ||
platform_resource_identifier=( | ||
n["platform_resource_identifier"] | ||
if n.get("platform_resource_identifier") is not None | ||
else None | ||
), | ||
platform=n["platform"] if n.get("platform") is not None else None, | ||
name=n["name"] if n.get("name") is not None else None, | ||
date_published=n["date_published"] | ||
if n.get("date_published") is not None | ||
else None, | ||
headline=n["headline"] if n.get("headline") is not None else None, | ||
alternative_headline=n["alternative_headline"] | ||
if n.get("alternative_headline") is not None | ||
else None, | ||
category=[cat for cat in n["category"]] | ||
if n.get("category") is not None | ||
else [], | ||
source=n["source"] if n.get("source") is not None else None, | ||
scientific_domain=[sd for sd in n.get("scientific_domain")] | ||
if n.get("scientific_domain") is not None | ||
else [], | ||
industrial_sector=[ins for ins in n.get("industrial_sector")] | ||
if n.get("industrial_sector") is not None | ||
else [], | ||
relevant_link=[rl for rl in n.get("relevant_link")] | ||
if n.get("relevant_link") is not None | ||
else [], | ||
alternate_name=[an for an in n.get("alternate_name")] | ||
if n.get("alternate_name") is not None | ||
else [], | ||
application_area=[ar for ar in n.get("application_area")] | ||
if n.get("application_area") is not None | ||
else [], | ||
keyword=[k for k in n.get("keyword")] if n.get("keyword") is not None else [], | ||
same_as=n["same_as"] if n.get("same_as") is not None else None, | ||
), | ||
resource_ORM_class=News, | ||
) |
Uh oh!
There was an error while loading. Please reload this page.