Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions specfile/changelog.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,77 @@ def style(self) -> ChangelogStyle:
return ChangelogStyle.openSUSE
return ChangelogStyle.standard

@property
def author(self) -> Optional[str]:
"""The author of this changelog entry including their email."""
if self.style == ChangelogStyle.openSUSE:
# openSUSE format: "OPENSUSE_CHANGELOG_SEPARATOR\n$DATE - $AUTHOR <$EMAIL>"
# Extract everything after " - " from the actual header line (second line)
header_lines = self.header.split("\n")
if len(header_lines) > 1:
if len(date_author := header_lines[1].split("-", maxsplit=1)) > 1:
return date_author[1].strip()
else:
# Standard format: "* $DATE $AUTHOR <$EMAIL> [- $EVR]"
header_without_asterisk = self.header.removeprefix("* ")

# Find the end of the date portion by looking for year (4 digits followed by space)
year_match = re.search(r"\d{4}\s+", header_without_asterisk)
if year_match:
author_and_evr = header_without_asterisk[year_match.end() :]

if not self.evr:
return author_and_evr

if (gt_ind := author_and_evr.rfind(">")) > 0:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This assumes the author substring always ends with a specifically formatted e-mail, but that doesn't have to be the case, even though Fedora Packaging Guidelines mandate so. specfile should be able to parse any format supported by RPM, which allows any string after the timestamp (ended by a newline) - and refers to it as name. What about adding a more generic name property and then deriving author and evr from it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have refactored the code a bit, so that it no longer relies on the > character to be present. But I am not sure if we should try to parse such a changelog correctly anyway. Imagine the following entry:

* Mon May 22 2023 $FirstName $LastName

Our evr regex will recognize $LastName as the EVR in this case and given that we allow anything as the evr, there's no way to distinguish it from a last name. At least nothing immediate comes to my mind.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's true, but there are lots of spec files with changelog entries like this, for example:
https://src.fedoraproject.org/rpms/gdb/blob/rawhide/f/gdb.spec#_1516
https://src.fedoraproject.org/rpms/rpm-spec-language-server/blob/rawhide/f/rpm-spec-language-server.spec#_72

We could try to recognize these formats, and document that author/EVR parsing is best effort and doesn't have to be accurate:

  • * timestamp author - EVR
  • * timestamp author [EVR]
  • * timestamp author (EVR)
  • * timestamp author

return author_and_evr[: gt_ind + 1].strip()

return None

@property
def timestamp(self) -> datetime.datetime:
"""The timestamp of this changelog entry."""
if self.style == ChangelogStyle.openSUSE:
# openSUSE format: "OPENSUSE_CHANGELOG_SEPARATOR\n$DATE - $AUTHOR <$EMAIL>"
# Date format: "Tue Dec 17 14:21:37 UTC 2024"
header_lines = self.header.split("\n")
if len(header_lines) > 1:
date_author_line = header_lines[1] # Skip the separator line
# Extract date part before " - "
if " - " in date_author_line:
date_part = date_author_line.split(" - ", maxsplit=1)[0].strip()
# Parse openSUSE extended format: "Tue Dec 17 14:21:37 UTC 2024"
return datetime.datetime.strptime(
date_part, "%a %b %d %H:%M:%S %Z %Y"
).replace(tzinfo=datetime.timezone.utc)
else:
# Standard format: "* $DATE $AUTHOR <$EMAIL> [- $EVR]"
header_without_asterisk = self.header.removeprefix("* ")

# Extract date part - everything up to the year + space
year_match = re.search(r"\d{4}\s+", header_without_asterisk)
if year_match:
date_part = header_without_asterisk[: year_match.end()].strip()

# Handle extended format with time and timezone
if self.extended_timestamp:
dt = datetime.datetime.strptime(
date_part, "%a %b %d %H:%M:%S %Z %Y"
)
# Convert to UTC if not already
if dt.tzinfo is None:
dt = dt.replace(tzinfo=datetime.timezone.utc)
return dt
else:
# Basic format examples:
# "Thu Jan 04 2007"
# "Tue May 04 2021"
# "Tue May 4 2021"
dt = datetime.datetime.strptime(date_part, "%a %b %d %Y")
return dt.replace(hour=12, tzinfo=datetime.timezone.utc)

raise ValueError(f"Could not parse date from {self.header}")

@classmethod
def assemble(
cls,
Expand Down
144 changes: 144 additions & 0 deletions tests/unit/test_changelog.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,150 @@ def test_create_opensuse_changelog_assemble(
)


@pytest.mark.parametrize(
"changelog_entry, expected_author, expected_timestamp",
[
# Standard Fedora/RPM style entries (basic date format)
(
ChangelogEntry(
"* Thu Jan 04 2007 Michael Schwendt <[email protected]>", [""]
),
"Michael Schwendt <[email protected]>",
datetime.datetime(2007, 1, 4, 12, 0, 0, tzinfo=datetime.timezone.utc),
),
(
ChangelogEntry(
"* Fri Jul 26 2024 Miroslav Suchý <[email protected]> - ss981107-67",
[""],
),
"Miroslav Suchý <[email protected]>",
datetime.datetime(2024, 7, 26, 12, 0, 0, tzinfo=datetime.timezone.utc),
),
(
ChangelogEntry(
"* Mon Jul 13 2020 Tom Stellard <[email protected]> 4.0-0.4.pre2",
[""],
),
"Tom Stellard <[email protected]>",
datetime.datetime(2020, 7, 13, 12, 0, 0, tzinfo=datetime.timezone.utc),
),
(
ChangelogEntry(
"* Thu Feb 04 2016 Marcin Zajaczkowski <mszpak ATT wp DOTT pl> - 1:0.9.10-6",
[""],
),
"Marcin Zajaczkowski <mszpak ATT wp DOTT pl>",
datetime.datetime(2016, 2, 4, 12, 0, 0, tzinfo=datetime.timezone.utc),
),
(
ChangelogEntry(
"* Mon Jan 03 2022 Fedora Kernel Team <[email protected]> "
"[5.16-0.rc8.55]",
[""],
),
"Fedora Kernel Team <[email protected]>",
datetime.datetime(2022, 1, 3, 12, 0, 0, tzinfo=datetime.timezone.utc),
),
(
ChangelogEntry(
"* Wed Jan 23 2002 Karsten Hopp <[email protected]> (4.6-1)", [""]
),
"Karsten Hopp <[email protected]>",
datetime.datetime(2002, 1, 23, 12, 0, 0, tzinfo=datetime.timezone.utc),
),
(
ChangelogEntry(
"* Thu Apr 9 2015 Jeffrey C. Ollie <[email protected]> - 13.3.2-1:", [""]
),
"Jeffrey C. Ollie <[email protected]>",
datetime.datetime(2015, 4, 9, 12, 0, 0, tzinfo=datetime.timezone.utc),
),
# Standard format with extended timestamp
(
ChangelogEntry(
"* Mon Oct 18 12:34:45 CEST 2021 Nikola Forró <[email protected]> - 0.2-1",
[""],
),
"Nikola Forró <[email protected]>",
datetime.datetime(2021, 10, 18, 12, 34, 45, tzinfo=datetime.timezone.utc),
),
(
ChangelogEntry(
"* Mon Feb 23 2009 Fedora Release Engineering <[email protected]>"
" - 1.23-3.20081106gitbe42b4",
[""],
),
"Fedora Release Engineering <[email protected]>",
datetime.datetime(2009, 2, 23, 12, 0, 0, tzinfo=datetime.timezone.utc),
),
# Entry without EVR
(
ChangelogEntry("* Mon May 22 2023 Nikola Forró <[email protected]>", [""]),
"Nikola Forró <[email protected]>",
datetime.datetime(2023, 5, 22, 12, 0, 0, tzinfo=datetime.timezone.utc),
),
# openSUSE/SUSE style entries (always extended format)
(
ChangelogEntry(
_OPENSUSE_CHANGELOG_SEPARATOR
+ "\n"
+ "Tue Dec 17 14:21:37 UTC 2024 - Dan Čermák <[email protected]>",
[""],
),
"Dan Čermák <[email protected]>",
datetime.datetime(2024, 12, 17, 14, 21, 37, tzinfo=datetime.timezone.utc),
),
(
ChangelogEntry(
_OPENSUSE_CHANGELOG_SEPARATOR
+ "\n"
+ "Mon Nov 4 17:47:23 UTC 2024 - Dan Čermák <[email protected]>",
[""],
),
"Dan Čermák <[email protected]>",
datetime.datetime(2024, 11, 4, 17, 47, 23, tzinfo=datetime.timezone.utc),
),
(
ChangelogEntry(
_OPENSUSE_CHANGELOG_SEPARATOR
+ "\n"
+ "Fri May 17 09:14:20 UTC 2024 - Dominique Leuenberger <[email protected]>",
[""],
),
"Dominique Leuenberger <[email protected]>",
datetime.datetime(2024, 5, 17, 9, 14, 20, tzinfo=datetime.timezone.utc),
),
(
ChangelogEntry(
_OPENSUSE_CHANGELOG_SEPARATOR
+ "\n"
+ "Mon Oct 10 13:27:24 UTC 2022 - Stephan Kulow <[email protected]>",
[""],
),
"Stephan Kulow <[email protected]>",
datetime.datetime(2022, 10, 10, 13, 27, 24, tzinfo=datetime.timezone.utc),
),
(
ChangelogEntry(
_OPENSUSE_CHANGELOG_SEPARATOR
+ "\n"
+ "Fri Jun 25 07:31:34 UTC 2021 - Dan Čermák <[email protected]>",
[""],
),
"Dan Čermák <[email protected]>",
datetime.datetime(2021, 6, 25, 7, 31, 34, tzinfo=datetime.timezone.utc),
),
],
)
def test_author_timestamp(
changelog_entry: ChangelogEntry,
expected_author: str,
expected_timestamp: datetime.datetime,
):
assert changelog_entry.author == expected_author
assert changelog_entry.timestamp == expected_timestamp


def test_get_raw_section_data():
tzinfo = datetime.timezone(datetime.timedelta(hours=2), name="CEST")
changelog = Changelog(
Expand Down
Loading