Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
- name: Install dependencies
run: |
poetry install -vvv

poetry run pip install -r requirements_dev.txt
- name: Lint with flake8
run: |
poetry run flake8
Expand All @@ -48,6 +48,7 @@ jobs:

- name: Coveralls
uses: coverallsapp/github-action@643bc377ffa44ace6394b2b5d0d3950076de9f63 # v2.3.0
continue-on-error: true
with:
coverage-reporter-version: "v0.6.9"
flag-name: run ${{ join(matrix.*, ' - ') }}
Expand Down
14 changes: 10 additions & 4 deletions chronicler/eventizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,18 @@
import pkgutil
import os

from collections import namedtuple
from collections.abc import Iterator, Generator
from typing import Any

from cloudevents.http import CloudEvent


Identity = namedtuple('Identity',
['name', 'email', 'username'],
defaults=(None, None, None))


class Eventizer:
"""Abstract class to eventize data.

Expand Down Expand Up @@ -77,14 +83,14 @@ def eventize(name: str, raw_items: Iterator[dict[str, Any]]) -> Generator[CloudE
yield from eventizer.eventize(raw_items)


def _find_eventizers(top_package_name: str) -> dict[str, Eventizer]:
def _find_eventizers(top_package_name: str) -> dict[str, type[Eventizer]]:
"""Find available eventizers.

Look for the `Eventizer` classes under `top_package`
and its sub-packages. When `top_package` defines a namespace,
Look for the `Eventizer` classes under `top_package_name`
and its sub-packages. When `top_package_name` defines a namespace,
classes under that same namespace will be found too.

:param top_package: package storing eventizer classes
:param top_package_name: package storing eventizer classes

:returns: a dict with `Eventizer`
"""
Expand Down
163 changes: 159 additions & 4 deletions chronicler/events/core/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,15 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#

from typing import Any
import logging
import re

from cloudevents.http import CloudEvent
from typing import Any, Generator

from ...eventizer import Eventizer, uuid
from cloudevents.http import CloudEvent
from grimoirelab_toolkit.identities import generate_uuid
Comment thread
sduenas marked this conversation as resolved.

from ...eventizer import Eventizer, uuid, Identity

GIT_EVENT_COMMIT = "org.grimoirelab.events.git.commit"
GIT_EVENT_MERGE_COMMIT = "org.grimoirelab.events.git.merge"
Expand All @@ -32,11 +35,45 @@
GIT_EVENT_ACTION_COPIED = "org.grimoirelab.events.git.file.copied"
GIT_EVENT_ACTION_TYPE_CHANGED = "org.grimoirelab.events.git.file.typechanged"

GIT_EVENT_COMMIT_AUTHORED_BY = "org.grimoirelab.events.git.commit.authored_by"
GIT_EVENT_COMMIT_COMMITTED_BY = "org.grimoirelab.events.git.commit.committed_by"
GIT_EVENT_COMMIT_ACKED_BY = "org.grimoirelab.events.git.commit.acked_by"
GIT_EVENT_COMMIT_CO_AUTHORED_BY = "org.grimoirelab.events.git.commit.co_authored_by"
GIT_EVENT_COMMIT_HELPED_BY = "org.grimoirelab.events.git.commit.helped_by"
GIT_EVENT_COMMIT_MENTORED_BY = "org.grimoirelab.events.git.commit.mentored_by"
GIT_EVENT_COMMIT_REPORTED_BY = "org.grimoirelab.events.git.commit.reported_by"
GIT_EVENT_COMMIT_REVIEWED_BY = "org.grimoirelab.events.git.commit.reviewed_by"
GIT_EVENT_COMMIT_SIGNED_OFF_BY = "org.grimoirelab.events.git.commit.signed_off_by"
GIT_EVENT_COMMIT_SUGGESTED_BY = "org.grimoirelab.events.git.commit.suggested_by"
GIT_EVENT_COMMIT_TESTED_BY = "org.grimoirelab.events.git.commit.tested_by"
Comment thread
sduenas marked this conversation as resolved.

COMMIT_TRAILERS = {
"Acked-by": GIT_EVENT_COMMIT_ACKED_BY,
"Co-authored-by": GIT_EVENT_COMMIT_CO_AUTHORED_BY,
"Helped-by": GIT_EVENT_COMMIT_HELPED_BY,
"Mentored-by": GIT_EVENT_COMMIT_MENTORED_BY,
"Reported-by": GIT_EVENT_COMMIT_REPORTED_BY,
"Reviewed-by": GIT_EVENT_COMMIT_REVIEWED_BY,
"Signed-off-by": GIT_EVENT_COMMIT_SIGNED_OFF_BY,
"Suggested-by": GIT_EVENT_COMMIT_SUGGESTED_BY,
"Tested-by": GIT_EVENT_COMMIT_TESTED_BY,
}

# Pair programming regex. Some matching examples are:
# - John Smith, John Doe and Jane Rae <pairprogramming@example.com>
# - John Smith, John Doe & Jane Rae <pairprogramming@example>
# - John Smith and John Doe <pairpogramming@example>
GIT_AUTHORS_REGEX = re.compile(
r"(?P<first_authors>.+?)\s+(?:[aA][nN][dD]|&|\+)\s+(?P<last_author>.+?)\s+<(?P<email>[^>]+)>"
)

logger = logging.getLogger(__name__)


class GitEventizer(Eventizer):
"""Eventize git commits"""

def eventize_item(self, raw_item: dict[str, Any]) -> list[dict[str, Any]]:
def eventize_item(self, raw_item: dict[str, Any]) -> list[CloudEvent]:
events = []

item_uuid = raw_item.get('uuid', None)
Expand Down Expand Up @@ -68,6 +105,11 @@ def eventize_item(self, raw_item: dict[str, Any]) -> list[dict[str, Any]]:

events.extend(action_events)

identities_events = self._eventize_commit_identities(event,
raw_item)

events.extend(identities_events)

return events

def _eventize_commit_actions(self, parent_event: CloudEvent, raw_files_data):
Expand Down Expand Up @@ -143,3 +185,116 @@ def _process_action(self, source, time, event_uuid, action, file_data):
event = CloudEvent(attributes, data)

return event

def _eventize_commit_identities(self, parent_event: CloudEvent, raw_item: dict[str, Any]) -> list[CloudEvent]:
"""Eventize commit identities from a git commit item."""

events = []

authors = self._parse_authors(raw_item["data"]["Author"])
identity_events = self._process_identities(parent_event['source'],
parent_event['time'],
parent_event['id'],
GIT_EVENT_COMMIT_AUTHORED_BY,
authors)
events.extend(identity_events)

committers = self._parse_authors(raw_item["data"]["Commit"])
identity_events = self._process_identities(parent_event['source'],
parent_event['time'],
parent_event['id'],
GIT_EVENT_COMMIT_COMMITTED_BY,
committers)
events.extend(identity_events)

for trailer, event_type in COMMIT_TRAILERS.items():
signers = raw_item["data"].get(trailer, [])
identity_events = self._process_identities(parent_event['source'],
parent_event['time'],
parent_event['id'],
event_type,
signers)
events.extend(identity_events)

return events

def _process_identities(
self,
source: str,
time: str,
event_uuid: str,
event_type: str,
raw_identities: list[str]
) -> Generator[CloudEvent, None, None]:
"""Obtain identity events from a list of identities.

:param source: data source of the event
:param time: time of the event
:param event_uuid: UUID of the parent event
:param event_type: type of the identity event
:param raw_identities: list of strings with the identities information

:returns: generator of CloudEvent with the identity information
"""
for raw_identity in raw_identities:
try:
identity = self._parse_identity(raw_identity)
identity_id = generate_uuid(source="git",
email=identity.email,
name=identity.name,
username=identity.username)
except ValueError as e:
logger.warning(f"Cannot generate UUID for identity '{raw_identity}' "
f"in event '{event_uuid}': {e}. Skipping.")
continue

role = event_type.split('.')[-1]
event_id = uuid(event_uuid, role, identity_id)

data = {
"source": "git",
"name": identity.name,
"username": identity.username,
"email": identity.email,
"role": role,
"uuid": identity_id,
}

attributes = {
"id": event_id,
"linked_event": event_uuid,
"type": event_type,
"source": source,
"time": time,
}

yield CloudEvent(attributes, data)

@staticmethod
def _parse_authors(authors: str) -> list[str]:
"""Parse a list of authors from a string."""

m = GIT_AUTHORS_REGEX.match(authors)
if m:
authors = m.group("first_authors").split(",")
authors = [author.strip() for author in authors]
authors += [m.group("last_author")]
authors += [f"<{m.group('email')}>"]
Comment thread
sduenas marked this conversation as resolved.
return authors
else:
return [authors]

@staticmethod
def _parse_identity(git_author: str) -> Identity:
"""Extract identity information from a Git author string."""

fields = git_author.split("<")
name = fields[0]
name = name.strip()
if not name:
name = None
email = None
if len(fields) > 1:
email = git_author.split("<")[1][:-1]

return Identity(email=email, name=name)
Loading