Skip to content

Commit 4f957a8

Browse files
authored
Merge pull request #113 from nexB/purl2sym-metadata
Add metadata support for linux, mtd-utils, barebox, e2fsprogs and erofs-utils
2 parents 16b267d + 2170bfe commit 4f957a8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+242884
-2482
lines changed

Diff for: src/fetchcode/package.py

+133-31
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,22 @@ def get_github_data_for_miniupnp(purl):
242242
)
243243

244244

245+
@router.route("pkg:generic/erofs-utils.*",)
246+
def get_github_data_for_erofs_utils(purl):
247+
"""
248+
Yield `Package` object for erofs-utils package from GitHub.
249+
"""
250+
generic_purl = PackageURL.from_string(purl)
251+
github_repo_purl = PackageURL(
252+
type="github",
253+
namespace="erofs",
254+
name="erofs-utils",
255+
version=generic_purl.version,
256+
)
257+
258+
return GitHubSource.get_package_info(github_repo_purl)
259+
260+
245261
@router.route("pkg:bitbucket/.*")
246262
def get_bitbucket_data_from_purl(purl):
247263
"""
@@ -317,9 +333,12 @@ def get_gnu_data_from_purl(purl):
317333
"""Generate `Package` object from the `purl` string of gnu type"""
318334
purl = PackageURL.from_string(purl)
319335
source_archive_url = f"https://ftp.gnu.org/pub/gnu/{purl.name}/"
320-
regex = r"^({}-)([\w.-]*)(.tar.gz)$".format(purl.name)
336+
version_regex_template = r"^({}-)(?P<version>[\w.-]*)(.tar.gz)$"
337+
version_regex = re.compile(version_regex_template.format(re.escape(purl.name)))
321338

322-
yield from extract_packages_from_listing(purl, source_archive_url, regex, [])
339+
yield from extract_packages_from_listing(
340+
purl, source_archive_url, version_regex, []
341+
)
323342

324343

325344
@dataclasses.dataclass
@@ -333,8 +352,8 @@ class DirectoryListedSource:
333352
"description": "Flag indicating whether the archives are nested within another directory"
334353
},
335354
)
336-
source_archive_regex: str = dataclasses.field(
337-
default="",
355+
source_archive_regex: re.Pattern = dataclasses.field(
356+
default=None,
338357
metadata={
339358
"description": "Regular expression pattern to match files in the directory listing."
340359
},
@@ -368,7 +387,9 @@ def get_package_info(cls, package_url):
368387
class IpkgDirectoryListedSource(DirectoryListedSource):
369388
source_url = "https://web.archive.org/web/20090326020239/http://handhelds.org/download/packages/ipkg/"
370389
is_nested = False
371-
source_archive_regex = r"^(ipkg[-_])([\w.-]*)(_arm.ipk|.tar.gz)$"
390+
source_archive_regex = re.compile(
391+
r"^(ipkg[-_])(?P<version>[\w.-]*)(_arm.ipk|.tar.gz)$"
392+
)
372393
ignored_files_and_dir = []
373394

374395
@classmethod
@@ -398,132 +419,163 @@ def get_package_info(cls, package_url):
398419
class UtilLinuxDirectoryListedSource(DirectoryListedSource):
399420
source_url = "https://mirrors.edge.kernel.org/pub/linux/utils/util-linux/"
400421
is_nested = True
401-
source_archive_regex = r"^(util-linux-)([\w.-]*)(.tar.gz)$"
422+
# Source archive ex: util-linux-1.2.3.tar.gz
423+
source_archive_regex = re.compile(r"^(util-linux-)(?P<version>[\w.-]*)(.tar.gz)$")
402424
ignored_files_and_dir = []
403425

404426

405427
class BusyBoxDirectoryListedSource(DirectoryListedSource):
406428
source_url = "https://www.busybox.net/downloads/"
407-
source_archive_regex = r"^(busybox-)([\w.-]*)(.tar.bz2)$"
429+
# Source archive ex: busybox-1.2.3.tar.bz2
430+
source_archive_regex = re.compile(r"^(busybox-)(?P<version>[\w.-]*)(.tar.bz2)$")
408431
is_nested = False
409432
ignored_files_and_dir = []
410433

411434

412435
class UclibcDirectoryListedSource(DirectoryListedSource):
413436
source_url = "https://www.uclibc.org/downloads/"
414-
source_archive_regex = r"^(uClibc-)([\w.-]*)(.tar.gz)$"
437+
# Source archive ex: uClibc-1.2.3.tar.gz
438+
source_archive_regex = re.compile(r"^(uClibc-)(?P<version>[\w.-]*)(.tar.gz)$")
415439
is_nested = False
416440
ignored_files_and_dir = []
417441

418442

419443
class UclibcNGDirectoryListedSource(DirectoryListedSource):
420444
source_url = "https://downloads.uclibc-ng.org/releases/"
421-
source_archive_regex = r"^(uClibc-ng-)([\w.-]*)(.tar.gz)$"
445+
# Source archive ex: uClibc-ng-1.2.3.tar.gz
446+
source_archive_regex = re.compile(r"^(uClibc-ng-)(?P<version>[\w.-]*)(.tar.gz)$")
422447
is_nested = True
423448
ignored_files_and_dir = []
424449

425450

426451
class Bzip2DirectoryListedSource(DirectoryListedSource):
427452
source_url = "https://sourceware.org/pub/bzip2/"
428-
source_archive_regex = r"^(bzip2-)([\w.-]*)(.tar.gz)$"
453+
# Source archive ex: bzip2-1.2.3.tar.gz
454+
source_archive_regex = re.compile(r"^(bzip2-)(?P<version>[\w.-]*)(.tar.gz)$")
429455
is_nested = False
430456
ignored_files_and_dir = []
431457

432458

433459
class OpenSSHDirectoryListedSource(DirectoryListedSource):
434460
source_url = "https://cdn.openbsd.org/pub/OpenBSD/OpenSSH/"
435-
source_archive_regex = r"^(openssh-)([\w.-]*)(.tgz|.tar.gz)$"
461+
# Source archive ex: openssh-1.2.3.tar.gz
462+
source_archive_regex = re.compile(r"^(openssh-)(?P<version>[\w.-]*)(.tgz|.tar.gz)$")
436463
is_nested = False
437464
ignored_files_and_dir = []
438465

439466

440467
class DnsmasqDirectoryListedSource(DirectoryListedSource):
441468
source_url = "https://thekelleys.org.uk/dnsmasq/"
442-
source_archive_regex = r"^(dnsmasq-)([\w.-]*)(.tar.xz|.tar.gz)$"
469+
# Source archive ex: dnsmasq-1.2.3.tar.gz
470+
source_archive_regex = re.compile(
471+
r"^(dnsmasq-)(?P<version>[\w.-]*)(.tar.xz|.tar.gz)$"
472+
)
443473
is_nested = False
444474
ignored_files_and_dir = []
445475

446476

447477
class EbtablesDirectoryListedSource(DirectoryListedSource):
448478
source_url = "https://www.netfilter.org/pub/ebtables/"
449-
source_archive_regex = r"^(ebtables-)([\w.-]*)(.tar.gz)$"
479+
# Source archive ex: ebtables-1.2.3.tar.gz
480+
source_archive_regex = re.compile(r"^(ebtables-)(?P<version>[\w.-]*)(.tar.gz)$")
450481
is_nested = False
451482
ignored_files_and_dir = []
452483

453484

454485
class HostapdDirectoryListedSource(DirectoryListedSource):
455486
source_url = "https://w1.fi/releases/"
456-
source_archive_regex = r"^(hostapd-)([\w.-]*)(.tar.gz)$"
487+
# Source archive ex: hostapd-1.2.3.tar.gz
488+
source_archive_regex = re.compile(r"^(hostapd-)(?P<version>[\w.-]*)(.tar.gz)$")
457489
is_nested = False
458490
ignored_files_and_dir = []
459491

460492

461493
class Iproute2DirectoryListedSource(DirectoryListedSource):
462494
source_url = "https://mirrors.edge.kernel.org/pub/linux/utils/net/iproute2/"
463-
source_archive_regex = r"^(iproute2-)([\w.-]*)(.tar.xz|.tar.gz)$"
495+
source_archive_regex = re.compile(
496+
# Source archive ex: iproute2-1.2.3.tar.gz
497+
r"^(iproute2-)(?P<version>[\w.-]*)(.tar.xz|.tar.gz)$"
498+
)
464499
is_nested = False
465500
ignored_files_and_dir = []
466501

467502

468503
class IptablesDirectoryListedSource(DirectoryListedSource):
469504
source_url = "https://www.netfilter.org/pub/iptables/"
470-
source_archive_regex = r"^(iptables-)([\w.-]*)(.tar.bz2)$"
505+
# Source archive ex: iptables-1.2.3.tar.bz2
506+
source_archive_regex = re.compile(r"^(iptables-)(?P<version>[\w.-]*)(.tar.bz2)$")
471507
is_nested = False
472508
ignored_files_and_dir = []
473509

474510

475511
class LibnlDirectoryListedSource(DirectoryListedSource):
476512
source_url = "https://www.infradead.org/~tgr/libnl/files/"
477-
source_archive_regex = r"^(libnl-)([\w.-]*)(.tar.gz)$"
513+
# Source archive ex: libnl-1.2.3.tar.gz
514+
source_archive_regex = re.compile(r"^(libnl-)(?P<version>[\w.-]*)(.tar.gz)$")
478515
is_nested = False
479516
ignored_files_and_dir = []
480517

481518

482519
class LighttpdDirectoryListedSource(DirectoryListedSource):
483520
source_url = "https://download.lighttpd.net/lighttpd/releases-1.4.x/"
484-
source_archive_regex = r"^(lighttpd-)([\w.-]*)(.tar.gz)$"
521+
# Source archive ex: lighttpd-1.2.3.tar.gz
522+
source_archive_regex = re.compile(r"^(lighttpd-)(?P<version>[\w.-]*)(.tar.gz)$")
485523
is_nested = False
486524
ignored_files_and_dir = []
487525

488526

489527
class NftablesDirectoryListedSource(DirectoryListedSource):
490528
source_url = "https://www.netfilter.org/pub/nftables/"
491-
source_archive_regex = r"^(nftables-)([\w.-]*)(.tar.xz|.tar.bz2)$"
529+
# Source archive ex: nftables-1.2.3.tar.bz2
530+
source_archive_regex = re.compile(
531+
r"^(nftables-)(?P<version>[\w.-]*)(.tar.xz|.tar.bz2)$"
532+
)
492533
is_nested = False
493534
ignored_files_and_dir = []
494535

495536

496537
class WpaSupplicantDirectoryListedSource(DirectoryListedSource):
497538
source_url = "https://w1.fi/releases/"
498-
source_archive_regex = r"^(wpa_supplicant-)([\w.-]*)(.tar.gz)$"
539+
# Source archive ex: wpa_supplicant-1.2.3.tar.gz
540+
source_archive_regex = re.compile(
541+
r"^(wpa_supplicant-)(?P<version>[\w.-]*)(.tar.gz)$"
542+
)
499543
is_nested = False
500544
ignored_files_and_dir = []
501545

502546

503547
class SyslinuxDirectoryListedSource(DirectoryListedSource):
504548
source_url = "https://mirrors.edge.kernel.org/pub/linux/utils/boot/syslinux/"
505-
source_archive_regex = r"^(syslinux-)([\w.-]*)(.tar.gz)$"
549+
# Source archive ex: syslinux-1.2.3.tar.gz
550+
source_archive_regex = re.compile(r"^(syslinux-)(?P<version>[\w.-]*)(.tar.gz)$")
506551
is_nested = False
507552
ignored_files_and_dir = []
508553

509554

510555
class SyslinuxDirectoryListedSource(DirectoryListedSource):
511556
source_url = "https://mirrors.edge.kernel.org/pub/linux/utils/boot/syslinux/"
512-
source_archive_regex = r"^(syslinux-)([\w.-]*)(.tar.gz)$"
557+
# Source archive ex: syslinux-1.2.3.tar.gz
558+
source_archive_regex = re.compile(r"^(syslinux-)(?P<version>[\w.-]*)(.tar.gz)$")
513559
is_nested = False
514560
ignored_files_and_dir = []
515561

516562

517563
class ToyboxDirectoryListedSource(DirectoryListedSource):
518564
source_url = "http://www.landley.net/toybox/downloads/"
519-
source_archive_regex = r"^(toybox-)([\w.-]*)(.tar.gz|.tar.bz2)$"
565+
# Source archive ex: toybox-1.2.3.tar.gz
566+
source_archive_regex = re.compile(
567+
r"^(toybox-)(?P<version>[\w.-]*)(.tar.gz|.tar.bz2)$"
568+
)
520569
is_nested = False
521570
ignored_files_and_dir = []
522571

523572

524573
class DropbearDirectoryListedSource(DirectoryListedSource):
525574
source_url = "https://matt.ucc.asn.au/dropbear/releases/"
526-
source_archive_regex = r"^(dropbear-)([\w.-]*)(.tar.bz2|_i386.deb)$"
575+
# Source archive ex: dropbear-1.2.3.tar.bz2
576+
source_archive_regex = re.compile(
577+
r"^(dropbear-)(?P<version>[\w.-]*)(.tar.bz2|_i386.deb)$"
578+
)
527579
is_nested = False
528580
ignored_files_and_dir = [
529581
"dropbear-0.44test1.tar.bz2",
@@ -539,10 +591,53 @@ class DropbearDirectoryListedSource(DirectoryListedSource):
539591

540592
class SambaDirectoryListedSource(DirectoryListedSource):
541593
source_url = "https://download.samba.org/pub/samba/stable/"
542-
source_archive_regex = r"^(samba-)([\w.-]*)(.tar.gz)$"
594+
# Source archive ex: samba-1.2.3.tar.gz
595+
source_archive_regex = re.compile(r"^(samba-)(?P<version>[\w.-]*)(.tar.gz)$")
596+
is_nested = False
597+
ignored_files_and_dir = []
598+
599+
600+
class MtdUtilsDirectoryListedSource(DirectoryListedSource):
601+
source_url = "https://infraroot.at/pub/mtd/"
602+
# Source archive ex: mtd-utils-1.2.3.tar.bz2
603+
source_archive_regex = re.compile(r"^(mtd-utils-)(?P<version>[\w.-]*)(.tar.bz2)$")
604+
is_nested = False
605+
ignored_files_and_dir = []
606+
607+
608+
class BareboxDirectoryListedSource(DirectoryListedSource):
609+
source_url = "https://www.barebox.org/download/"
610+
# Source archive ex: barebox-1.2.3.tar.bz2
611+
source_archive_regex = re.compile(r"^(barebox-)(?P<version>[\w.-]*)(.tar.bz2)$")
543612
is_nested = False
544613
ignored_files_and_dir = []
545614

615+
class LinuxDirectoryListedSource(DirectoryListedSource):
616+
source_url = "https://cdn.kernel.org/pub/linux/kernel/"
617+
# Source archive ex: linux-1.2.3.tar.gz
618+
source_archive_regex = re.compile(r"^(linux-)(?P<version>[\w.-]*)(.tar.gz)$")
619+
is_nested = True
620+
ignored_files_and_dir = [
621+
"Historic/",
622+
"SillySounds/",
623+
"crypto/",
624+
"firmware/",
625+
"next/",
626+
"people/",
627+
"ports/",
628+
"projects/",
629+
"testing/",
630+
"tools/",
631+
"uemacs/",
632+
]
633+
634+
class E2fsprogsDirectoryListedSource(DirectoryListedSource):
635+
source_url = "https://mirrors.edge.kernel.org/pub/linux/kernel/people/tytso/e2fsprogs/"
636+
# Source archive ex: e2fsprogs-1.2.3.tar.gz
637+
source_archive_regex = re.compile(r"^(e2fsprogs-)(?P<version>[\w.-]*)(.tar.gz)$")
638+
is_nested = True
639+
ignored_files_and_dir = ["testing/"]
640+
546641

547642
DIR_SUPPORTED_PURLS = [
548643
"pkg:generic/busybox.*",
@@ -566,6 +661,10 @@ class SambaDirectoryListedSource(DirectoryListedSource):
566661
"pkg:generic/util-linux.*",
567662
"pkg:generic/wpa_supplicant.*",
568663
"pkg:generic/ipkg.*",
664+
"pkg:generic/mtd-utils.*",
665+
"pkg:generic/barebox.*",
666+
"pkg:generic/linux.*",
667+
"pkg:generic/e2fsprogs.*",
569668
]
570669

571670
DIR_LISTED_SOURCE_BY_PACKAGE_NAME = {
@@ -589,6 +688,10 @@ class SambaDirectoryListedSource(DirectoryListedSource):
589688
"util-linux": UtilLinuxDirectoryListedSource,
590689
"wpa_supplicant": WpaSupplicantDirectoryListedSource,
591690
"ipkg": IpkgDirectoryListedSource,
691+
"mtd-utils": MtdUtilsDirectoryListedSource,
692+
"barebox": BareboxDirectoryListedSource,
693+
"linux": LinuxDirectoryListedSource,
694+
"e2fsprogs": E2fsprogsDirectoryListedSource,
592695
}
593696

594697

@@ -605,18 +708,17 @@ def get_packages_from_listing(purl, source_archive_url, regex, ignored_files_and
605708
"""
606709
Return list of package data from a directory listing based on the specified regex.
607710
"""
608-
pattern = re.compile(regex)
609711
_, listing = htmllistparse.fetch_listing(source_archive_url)
610712

611713
packages = []
612714
for file in listing:
613-
if not pattern.match(file.name) or file.name in ignored_files_and_dir:
715+
match = regex.match(file.name)
716+
if not match or file.name in ignored_files_and_dir:
614717
continue
615718

616-
match = re.search(regex, file.name)
617-
version = match.group(2)
719+
version = match.group("version")
618720
version = version.strip("v").strip()
619-
if not version:
721+
if not version or not version[0].isdigit():
620722
continue
621723

622724
modified_time = file.modified
@@ -668,7 +770,7 @@ def extract_package_from_nested_listing(purl, source_url, regex, ignored_files_a
668770
"""
669771
_, listing = htmllistparse.fetch_listing(source_url)
670772
for directory in listing:
671-
if not directory.name.endswith("/"):
773+
if not directory.name.endswith("/") or directory.name in ignored_files_and_dir:
672774
continue
673775

674776
directory_url = urljoin(source_url, directory.name)

Diff for: src/fetchcode/package_util.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ def _get_github_packages(purl, version_regex, ignored_tag_regex, default_package
127127
version = tag
128128

129129
version = version.strip("Vv").strip()
130-
if not version:
130+
if not version or not version[0].isdigit():
131131
continue
132132

133133
download_url = archive_download_url.format(

0 commit comments

Comments
 (0)