Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
<!-- markdownlint-disable MD033 -->
<!-- MD033/no-inline-html -->

Comment thread
edoardolincetto marked this conversation as resolved.
Outdated
# publiccode.yml parser for Go

[![Join the #publiccode channel](https://img.shields.io/badge/Slack%20channel-%23publiccode-blue.svg?logo=slack)](https://developersitalia.slack.com/messages/CAM3F785T)
Expand Down Expand Up @@ -64,6 +65,18 @@ Run `publiccode-parser --help` for the available command line flags.

The tool returns 0 in case of successful validation, 1 otherwise.

### Generic Git Repository Support

For Git repositories not hosted on supported platforms (GitHub, GitLab and Bitbucket),
use the `--allow-local-git-clone` flag to validate file references:

```shell
# Validates files by cloning the repository locally
$ publiccode-parser --allow-local-git-clone publiccode.yml
```

This feature uses Git sparse-checkout to minimize bandwidth usage and automatically cleans up temporary files after validation. Requires `git` to be installed.

Comment thread
edoardolincetto marked this conversation as resolved.
Outdated
## With Docker

You can easily validate your files using Docker on your local machine or in your
Expand Down
18 changes: 12 additions & 6 deletions fields.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@ func validateFieldsV0(publiccode PublicCode, parser Parser, network bool) error
if _, err := isRelativePathOrURL(*publiccodev0.Logo, "logo"); err != nil {
vr = append(vr, err)
} else if !parser.disableExternalChecks {
validLogo, err := parser.validLogo(toCodeHostingURL(*publiccodev0.Logo, parser.currentBaseURL), network)
u, isGitRepo := toCodeHostingURL(*publiccodev0.Logo, parser.currentBaseURL, parser.allowLocalGitClone)

validLogo, err := parser.validLogo(u, network, isGitRepo)
if !validLogo {
vr = append(vr, newValidationError("logo", err.Error()))
}
Expand All @@ -65,7 +67,9 @@ func validateFieldsV0(publiccode PublicCode, parser Parser, network bool) error
if _, err := isRelativePathOrURL(*publiccodev0.MonochromeLogo, "monochromeLogo"); err != nil {
vr = append(vr, err)
} else if !parser.disableExternalChecks {
validLogo, err := parser.validLogo(toCodeHostingURL(*publiccodev0.MonochromeLogo, parser.currentBaseURL), network)
u, isGitRepo := toCodeHostingURL(*publiccodev0.MonochromeLogo, parser.currentBaseURL, parser.allowLocalGitClone)

validLogo, err := parser.validLogo(u, network, isGitRepo)
if !validLogo {
vr = append(vr, newValidationError("monochromeLogo", err.Error()))
}
Expand Down Expand Up @@ -109,10 +113,10 @@ func validateFieldsV0(publiccode PublicCode, parser Parser, network bool) error
if _, err := isRelativePathOrURL(*publiccodev0.Legal.AuthorsFile, "legal.authorsFile"); err != nil {
vr = append(vr, err)
} else if !parser.disableExternalChecks {
exists, err := parser.fileExists(toCodeHostingURL(*publiccodev0.Legal.AuthorsFile, parser.currentBaseURL), network)
if !exists {
u := toCodeHostingURL(*publiccodev0.Legal.AuthorsFile, parser.currentBaseURL)
u, isGitRepo := toCodeHostingURL(*publiccodev0.Legal.AuthorsFile, parser.currentBaseURL, parser.allowLocalGitClone)

exists, err := parser.fileExists(u, network, isGitRepo)
if !exists {
vr = append(vr, newValidationError("legal.authorsFile", "'%s' does not exist: %s", urlutil.DisplayURL(&u), err.Error()))
}
}
Expand Down Expand Up @@ -165,7 +169,9 @@ func validateFieldsV0(publiccode PublicCode, parser Parser, network bool) error
if _, err := isRelativePathOrURL(v, keyName); err != nil {
vr = append(vr, err)
} else if !parser.disableExternalChecks {
isImage, err := parser.isImageFile(toCodeHostingURL(v, parser.currentBaseURL), network)
u, isGitRepo := toCodeHostingURL(v, parser.currentBaseURL, parser.allowLocalGitClone)

isImage, err := parser.isImageFile(u, network, isGitRepo)
if !isImage {
vr = append(vr, newValidationError(
keyName,
Expand Down
278 changes: 278 additions & 0 deletions git_helper.go
Comment thread
edoardolincetto marked this conversation as resolved.
Outdated
Original file line number Diff line number Diff line change
@@ -0,0 +1,278 @@
package publiccode

import (
"fmt"
"net/url"
"os"
"os/exec"
"path"
"path/filepath"
"strings"
)

// Provides functionality for cloning and checking files in Git repositories.
type gitHelper struct {
// Base directory for all temporary Git clones
tempDir string
// Maps repository URLs to their local clone paths
clonedRepos map[string]string
}

func newGitHelper() (*gitHelper, error) {
tempDir, err := os.MkdirTemp("", "publiccode-git-")
if err != nil {
return nil, fmt.Errorf("failed to create temp directory: %w", err)
}

return &gitHelper{
tempDir: tempDir,
clonedRepos: make(map[string]string),
}, nil
}

// Performs a sparse clone of a Git repository.
func (g *gitHelper) cloneRepo(repoURL string) (string, error) {
// Check if already cloned
if clonePath, ok := g.clonedRepos[repoURL]; ok {
return clonePath, nil
}

// Create a repo name
repoName := strings.NewReplacer(
"http://", "",
"https://", "",
"/", "_",
":", "_",
).Replace(repoURL)
if len(repoName) > 100 {
repoName = repoName[:100]
}

clonePath := filepath.Join(g.tempDir, repoName)

// Perform sparse clone
args := []string{"clone", "--filter=blob:none", "--no-checkout"}
args = append(args, repoURL, clonePath)
cmd := exec.Command("git", args...)

output, err := cmd.CombinedOutput()
if err != nil {
return "", fmt.Errorf("git clone failed: %w\nOutput: %s", err, output)
}

// Initialize sparse checkout
cmd = exec.Command("git", "sparse-checkout", "init", "--cone")

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This creates a dependency on git and I'd rather not. Let's use a library here so it has a chance to work in WASM as well (if we implement storage for the clone someway)

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've migrated to go-git to remove the dependency on git. Since go-git doesn't currently support partial clones, the new implementation is slower (especially for large repositories) but it should be fine for typical use cases.

cmd.Dir = clonePath

output, err = cmd.CombinedOutput()
if err != nil {
os.RemoveAll(clonePath)

return "", fmt.Errorf("git sparse-checkout init failed: %w\nOutput: %s", err, output)
}

g.clonedRepos[repoURL] = clonePath

return clonePath, nil
}

// Checks out a specific file from the cloned repository.
func (g *gitHelper) checkoutFile(repoPath string, filePath string) error {
// First, add the file to sparse-checkout
dir := filepath.Dir(filePath)
if dir != "." && dir != "" {
cmd := exec.Command("git", "sparse-checkout", "add", dir)
cmd.Dir = repoPath

_, err := cmd.CombinedOutput()
if err != nil {
// Try to add the specific file if directory fails
cmd = exec.Command("git", "sparse-checkout", "add", filePath)
cmd.Dir = repoPath

output, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("git sparse-checkout add failed: %w\nOutput: %s", err, output)
}
}
}

// Checkout the file
cmd := exec.Command("git", "checkout", "HEAD", "--", filePath)
cmd.Dir = repoPath

output, err := cmd.CombinedOutput()
if err != nil {
// File might not exist in the repository
return fmt.Errorf("git checkout failed: %w\nOutput: %s", err, output)
}

return nil
}

// Checks if a file exists in a Git repository by attempting to check it out.
func (g *gitHelper) fileExistsInRepo(repoURL string, filePath string) (bool, string, error) {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should also handle the case where the publiccode.yml is already in a cloned repo.

// Clone the repository if not already cloned
clonePath, err := g.cloneRepo(repoURL)
if err != nil {
return false, "", err
}

// Try to checkout the file

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are a lot of comments like this one. Was this PR AI assisted?

err = g.checkoutFile(clonePath, filePath)
if err != nil {
// File doesn't exist in the repository
return false, "", err
}

// File exists and has been checked out
localPath := filepath.Join(clonePath, filePath)
if _, err := os.Stat(localPath); err != nil {
// File was supposedly checked out but doesn't exist - this is an error
return false, "", fmt.Errorf("file was checked out but not found at %s: %w", localPath, err)
}

return true, localPath, nil
}

// Removes all temporary directories and cloned repositories.
func (g *gitHelper) cleanup() error {
if g.tempDir != "" {
return os.RemoveAll(g.tempDir)
}

return nil
}

// Checks if an URL is a generic Git repository URL.
// Returns false for supported hosting platforms (GitHub, GitLab, Bitbucket)
// which have web interfaces and should not use local Git cloning.
func isGitURL(u *url.URL) bool {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Commenting on this function, but it's a general remark.

isGitURL, isGitRepo, are misleading. We should pick an alternative way of expressing the concept: They're always git repos in all cases, and it might be confusing to follow the code with this naming.

if u == nil {
return false
}

host := strings.ToLower(u.Host)
switch host {
case "github.com":
return false
case "gitlab.com":
return false
case "bitbucket.org":
return false
}

if u.Scheme == "git" {
return true
}

if u.Scheme == "http" || u.Scheme == "https" {
if strings.HasSuffix(u.Path, ".git") ||
strings.Contains(u.Path, ".git/") {
return true
}
}

return false
}

// Extracts the base repository URL in a generic Git repository.
func getRepoURL(u *url.URL) string {
repoURL := *u

// For generic Git repos, remove the file path if present
if idx := strings.Index(repoURL.Path, ".git/"); idx != -1 {
// Keep everything up to and including .git
repoURL.Path = repoURL.Path[:idx+4]

return repoURL.String()
}

return repoURL.String()
}

// Extracts the file path from a generic Git repository URL.
func extractFilePathFromURL(u *url.URL) (string, error) {
urlPath := u.Path

var filePath string

if strings.Contains(urlPath, ".git/") {
if idx := strings.Index(urlPath, ".git/"); idx != -1 {
filePath = urlPath[idx+5:]
}
}

if filePath == "" {
return "", fmt.Errorf("could not extract file path from URL: %s", u.String())
}

// Clean up the file path
filePath = path.Clean(filePath)

return filePath, nil
}

// Checks if a file exists in a Git repository.
func (p *Parser) checkFileInGitRepo(u *url.URL) (bool, string, error) {
if !p.allowLocalGitClone {
return false, "", fmt.Errorf("local Git clone not allowed")
}

// Extract repository URL and file path
repoURL := getRepoURL(u)

filePath, err := extractFilePathFromURL(u)
if err != nil {
return false, "", fmt.Errorf("failed to extract file path from URL %s: %w", u.String(), err)
}

if p.gitRepoCache == nil {
p.gitRepoCache = make(map[string]string)
}

if cachedPath, ok := p.gitRepoCache[repoURL]; ok {
// Check if file exists in cached repo
localPath := filepath.Join(cachedPath, filePath)
if _, err := os.Stat(localPath); err == nil {
return true, localPath, nil
}
}

// Create a temporary Git helper
helper, err := newGitHelper()
if err != nil {
return false, "", fmt.Errorf("failed to create Git helper: %w", err)
}
// Don't cleanup the temp directory, will be managed by the Parser

// Check if file exists in repository
exists, localPath, err := helper.fileExistsInRepo(repoURL, filePath)
if err != nil {
// Clean up on error
if cleanupErr := helper.cleanup(); cleanupErr != nil {
fmt.Fprintf(os.Stderr, "failed to cleanup Git helper: %v\n", cleanupErr)
}

return false, "", fmt.Errorf("failed to check file in repo %s: %w", repoURL, err)
}

if exists {
// Cache the cloned repo path for future use
// The path will be cleaned up when Parser.Cleanup() is called
if len(helper.clonedRepos) > 0 {
for _, clonePath := range helper.clonedRepos {
p.gitRepoCache[repoURL] = clonePath

break
}
}
} else {
// If file doesn't exist, clean up immediately
if cleanupErr := helper.cleanup(); cleanupErr != nil {
fmt.Fprintf(os.Stderr, "failed to cleanup Git helper: %v\n", cleanupErr)
}
}

return exists, localPath, nil
}
Loading