Skip to content

Commit 5def5e7

Browse files
authored
Merges #8 Closes #8
2 parents f98bf1d + 51363d8 commit 5def5e7

9 files changed

Lines changed: 814 additions & 153 deletions

File tree

.github/workflows/tests.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ jobs:
3434
- name: Install dependencies
3535
run: |
3636
poetry install -vvv
37-
37+
poetry run pip install -r requirements_dev.txt
3838
- name: Lint with flake8
3939
run: |
4040
poetry run flake8
@@ -48,6 +48,7 @@ jobs:
4848
4949
- name: Coveralls
5050
uses: coverallsapp/github-action@643bc377ffa44ace6394b2b5d0d3950076de9f63 # v2.3.0
51+
continue-on-error: true
5152
with:
5253
coverage-reporter-version: "v0.6.9"
5354
flag-name: run ${{ join(matrix.*, ' - ') }}

chronicler/eventizer.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,18 @@
2020
import pkgutil
2121
import os
2222

23+
from collections import namedtuple
2324
from collections.abc import Iterator, Generator
2425
from typing import Any
2526

2627
from cloudevents.http import CloudEvent
2728

2829

30+
Identity = namedtuple('Identity',
31+
['name', 'email', 'username'],
32+
defaults=(None, None, None))
33+
34+
2935
class Eventizer:
3036
"""Abstract class to eventize data.
3137
@@ -77,14 +83,14 @@ def eventize(name: str, raw_items: Iterator[dict[str, Any]]) -> Generator[CloudE
7783
yield from eventizer.eventize(raw_items)
7884

7985

80-
def _find_eventizers(top_package_name: str) -> dict[str, Eventizer]:
86+
def _find_eventizers(top_package_name: str) -> dict[str, type[Eventizer]]:
8187
"""Find available eventizers.
8288
83-
Look for the `Eventizer` classes under `top_package`
84-
and its sub-packages. When `top_package` defines a namespace,
89+
Look for the `Eventizer` classes under `top_package_name`
90+
and its sub-packages. When `top_package_name` defines a namespace,
8591
classes under that same namespace will be found too.
8692
87-
:param top_package: package storing eventizer classes
93+
:param top_package_name: package storing eventizer classes
8894
8995
:returns: a dict with `Eventizer`
9096
"""

chronicler/events/core/git.py

Lines changed: 159 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,15 @@
1616
# along with this program. If not, see <http://www.gnu.org/licenses/>.
1717
#
1818

19-
from typing import Any
19+
import logging
20+
import re
2021

21-
from cloudevents.http import CloudEvent
22+
from typing import Any, Generator
2223

23-
from ...eventizer import Eventizer, uuid
24+
from cloudevents.http import CloudEvent
25+
from grimoirelab_toolkit.identities import generate_uuid
2426

27+
from ...eventizer import Eventizer, uuid, Identity
2528

2629
GIT_EVENT_COMMIT = "org.grimoirelab.events.git.commit"
2730
GIT_EVENT_MERGE_COMMIT = "org.grimoirelab.events.git.merge"
@@ -32,11 +35,45 @@
3235
GIT_EVENT_ACTION_COPIED = "org.grimoirelab.events.git.file.copied"
3336
GIT_EVENT_ACTION_TYPE_CHANGED = "org.grimoirelab.events.git.file.typechanged"
3437

38+
GIT_EVENT_COMMIT_AUTHORED_BY = "org.grimoirelab.events.git.commit.authored_by"
39+
GIT_EVENT_COMMIT_COMMITTED_BY = "org.grimoirelab.events.git.commit.committed_by"
40+
GIT_EVENT_COMMIT_ACKED_BY = "org.grimoirelab.events.git.commit.acked_by"
41+
GIT_EVENT_COMMIT_CO_AUTHORED_BY = "org.grimoirelab.events.git.commit.co_authored_by"
42+
GIT_EVENT_COMMIT_HELPED_BY = "org.grimoirelab.events.git.commit.helped_by"
43+
GIT_EVENT_COMMIT_MENTORED_BY = "org.grimoirelab.events.git.commit.mentored_by"
44+
GIT_EVENT_COMMIT_REPORTED_BY = "org.grimoirelab.events.git.commit.reported_by"
45+
GIT_EVENT_COMMIT_REVIEWED_BY = "org.grimoirelab.events.git.commit.reviewed_by"
46+
GIT_EVENT_COMMIT_SIGNED_OFF_BY = "org.grimoirelab.events.git.commit.signed_off_by"
47+
GIT_EVENT_COMMIT_SUGGESTED_BY = "org.grimoirelab.events.git.commit.suggested_by"
48+
GIT_EVENT_COMMIT_TESTED_BY = "org.grimoirelab.events.git.commit.tested_by"
49+
50+
COMMIT_TRAILERS = {
51+
"Acked-by": GIT_EVENT_COMMIT_ACKED_BY,
52+
"Co-authored-by": GIT_EVENT_COMMIT_CO_AUTHORED_BY,
53+
"Helped-by": GIT_EVENT_COMMIT_HELPED_BY,
54+
"Mentored-by": GIT_EVENT_COMMIT_MENTORED_BY,
55+
"Reported-by": GIT_EVENT_COMMIT_REPORTED_BY,
56+
"Reviewed-by": GIT_EVENT_COMMIT_REVIEWED_BY,
57+
"Signed-off-by": GIT_EVENT_COMMIT_SIGNED_OFF_BY,
58+
"Suggested-by": GIT_EVENT_COMMIT_SUGGESTED_BY,
59+
"Tested-by": GIT_EVENT_COMMIT_TESTED_BY,
60+
}
61+
62+
# Pair programming regex. Some matching examples are:
63+
# - John Smith, John Doe and Jane Rae <pairprogramming@example.com>
64+
# - John Smith, John Doe & Jane Rae <pairprogramming@example>
65+
# - John Smith and John Doe <pairpogramming@example>
66+
GIT_AUTHORS_REGEX = re.compile(
67+
r"(?P<first_authors>.+?)\s+(?:[aA][nN][dD]|&|\+)\s+(?P<last_author>.+?)\s+<(?P<email>[^>]+)>"
68+
)
69+
70+
logger = logging.getLogger(__name__)
71+
3572

3673
class GitEventizer(Eventizer):
3774
"""Eventize git commits"""
3875

39-
def eventize_item(self, raw_item: dict[str, Any]) -> list[dict[str, Any]]:
76+
def eventize_item(self, raw_item: dict[str, Any]) -> list[CloudEvent]:
4077
events = []
4178

4279
item_uuid = raw_item.get('uuid', None)
@@ -68,6 +105,11 @@ def eventize_item(self, raw_item: dict[str, Any]) -> list[dict[str, Any]]:
68105

69106
events.extend(action_events)
70107

108+
identities_events = self._eventize_commit_identities(event,
109+
raw_item)
110+
111+
events.extend(identities_events)
112+
71113
return events
72114

73115
def _eventize_commit_actions(self, parent_event: CloudEvent, raw_files_data):
@@ -143,3 +185,116 @@ def _process_action(self, source, time, event_uuid, action, file_data):
143185
event = CloudEvent(attributes, data)
144186

145187
return event
188+
189+
def _eventize_commit_identities(self, parent_event: CloudEvent, raw_item: dict[str, Any]) -> list[CloudEvent]:
190+
"""Eventize commit identities from a git commit item."""
191+
192+
events = []
193+
194+
authors = self._parse_authors(raw_item["data"]["Author"])
195+
identity_events = self._process_identities(parent_event['source'],
196+
parent_event['time'],
197+
parent_event['id'],
198+
GIT_EVENT_COMMIT_AUTHORED_BY,
199+
authors)
200+
events.extend(identity_events)
201+
202+
committers = self._parse_authors(raw_item["data"]["Commit"])
203+
identity_events = self._process_identities(parent_event['source'],
204+
parent_event['time'],
205+
parent_event['id'],
206+
GIT_EVENT_COMMIT_COMMITTED_BY,
207+
committers)
208+
events.extend(identity_events)
209+
210+
for trailer, event_type in COMMIT_TRAILERS.items():
211+
signers = raw_item["data"].get(trailer, [])
212+
identity_events = self._process_identities(parent_event['source'],
213+
parent_event['time'],
214+
parent_event['id'],
215+
event_type,
216+
signers)
217+
events.extend(identity_events)
218+
219+
return events
220+
221+
def _process_identities(
222+
self,
223+
source: str,
224+
time: str,
225+
event_uuid: str,
226+
event_type: str,
227+
raw_identities: list[str]
228+
) -> Generator[CloudEvent, None, None]:
229+
"""Obtain identity events from a list of identities.
230+
231+
:param source: data source of the event
232+
:param time: time of the event
233+
:param event_uuid: UUID of the parent event
234+
:param event_type: type of the identity event
235+
:param raw_identities: list of strings with the identities information
236+
237+
:returns: generator of CloudEvent with the identity information
238+
"""
239+
for raw_identity in raw_identities:
240+
try:
241+
identity = self._parse_identity(raw_identity)
242+
identity_id = generate_uuid(source="git",
243+
email=identity.email,
244+
name=identity.name,
245+
username=identity.username)
246+
except ValueError as e:
247+
logger.warning(f"Cannot generate UUID for identity '{raw_identity}' "
248+
f"in event '{event_uuid}': {e}. Skipping.")
249+
continue
250+
251+
role = event_type.split('.')[-1]
252+
event_id = uuid(event_uuid, role, identity_id)
253+
254+
data = {
255+
"source": "git",
256+
"name": identity.name,
257+
"username": identity.username,
258+
"email": identity.email,
259+
"role": role,
260+
"uuid": identity_id,
261+
}
262+
263+
attributes = {
264+
"id": event_id,
265+
"linked_event": event_uuid,
266+
"type": event_type,
267+
"source": source,
268+
"time": time,
269+
}
270+
271+
yield CloudEvent(attributes, data)
272+
273+
@staticmethod
274+
def _parse_authors(authors: str) -> list[str]:
275+
"""Parse a list of authors from a string."""
276+
277+
m = GIT_AUTHORS_REGEX.match(authors)
278+
if m:
279+
authors = m.group("first_authors").split(",")
280+
authors = [author.strip() for author in authors]
281+
authors += [m.group("last_author")]
282+
authors += [f"<{m.group('email')}>"]
283+
return authors
284+
else:
285+
return [authors]
286+
287+
@staticmethod
288+
def _parse_identity(git_author: str) -> Identity:
289+
"""Extract identity information from a Git author string."""
290+
291+
fields = git_author.split("<")
292+
name = fields[0]
293+
name = name.strip()
294+
if not name:
295+
name = None
296+
email = None
297+
if len(fields) > 1:
298+
email = git_author.split("<")[1][:-1]
299+
300+
return Identity(email=email, name=name)

0 commit comments

Comments
 (0)