Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,3 @@ version: "2"

run:
timeout: 5m

linters-settings:
staticcheck:
go: "1.25"
checks: ["all", "-ST1005"]

issues:
exclude:
- "ST1005"
Comment on lines -6 to -13
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please consider reverting these changes since they are not focused on the specific issue and also they have been taken care in another PR.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure i ll revert the latest commit

162 changes: 150 additions & 12 deletions generators/github/git_repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,29 @@ func (gr GitRepo) GetContent() (models.Package, error) {
_ = br.Flush()
_ = fd.Close()
}()

// If root is not specified, enable recursive traversal from root to discover CRDs automatically
// This makes the generator robust to repository structure changes
rootPath := root
isAutoDiscovery := rootPath == ""
if isAutoDiscovery {
// Use "/**" to enable recursive traversal from repository root
rootPath = "/**"
}

gw := gitWalker.
Owner(owner).
Repo(repo).
Branch(branch).
Root(root).
RegisterFileInterceptor(fileInterceptor(br)).
RegisterDirInterceptor(dirInterceptor(br))
Root(rootPath).
RegisterFileInterceptor(crdAwareFileInterceptor(br))

// Register dirInterceptor to handle Helm charts which may contain CRDs
// Note: When doing automatic discovery (recurse mode), dirInterceptor processes directories
// and fileInterceptor processes files. For Helm charts, dirInterceptor extracts CRDs from
// the chart structure, while fileInterceptor finds standalone CRD files. This ensures we
// discover CRDs in both formats without missing any.
gw = gw.RegisterDirInterceptor(dirInterceptor(br))

if version != "" {
gw = gw.ReferenceName(fmt.Sprintf("refs/tags/%s", version))
Expand All @@ -78,31 +94,153 @@ func (gr GitRepo) GetContent() (models.Package, error) {
}

func (gr GitRepo) extractRepoDetailsFromSourceURL() (owner, repo, branch, root string, err error) {
parts := strings.SplitN(strings.TrimPrefix(gr.URL.Path, "/"), "/", 4)
path := strings.TrimPrefix(gr.URL.Path, "/")
parts := strings.Split(path, "/")
size := len(parts)
if size > 3 {
owner = parts[0]
repo = parts[1]
branch = parts[2]
root = parts[3]

} else {
err = ErrInvalidGitHubSourceURL(fmt.Errorf("Source URL %s is invalid, specify owner, repo, branch and filepath in the url according to the specified source url format", gr.URL.String()))

// Minimum required: owner and repo
if size < 2 {
err = ErrInvalidGitHubSourceURL(fmt.Errorf("Source URL %s is invalid, must specify at least owner and repo", gr.URL.String()))
return
}

owner = parts[0]
repo = parts[1]

// Remove .git suffix from repo name if present
repo = strings.TrimSuffix(repo, ".git")
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's find a library that offers this functionality.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure i will look for a library that offers this and update it

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okay i will check this out @parthivsaikia .


// Handle standard GitHub URL formats:
// - https://github.com/owner/repo
// - https://github.com/owner/repo/tree/branch
// - https://github.com/owner/repo/tree/branch/path/to/dir
// - git://github.com/owner/repo/branch/path (legacy format)

branch = "main" // default branch
root = ""

if size >= 3 {
// Check if this is a standard GitHub URL with /tree/branch format
if parts[2] == "tree" && size >= 4 {
// Format: owner/repo/tree/branch[/path...]
branch = parts[3]
if size > 4 {
// Reconstruct the path after branch
root = strings.Join(parts[4:], "/")
}
} else if parts[2] == "blob" {
// Format: owner/repo/blob/branch/path/to/file
// This is a file URL, not a directory - we'll treat it as root path
if size >= 4 {
branch = parts[3]
if size > 4 {
root = strings.Join(parts[4:], "/")
}
}
} else {
// Legacy format: owner/repo/branch[/path...]
branch = parts[2]
if size > 3 {
// Reconstruct the path after branch
root = strings.Join(parts[3:], "/")
}
}
}

// If root is empty, we'll use "/**" for recursive traversal in GetContent
// This enables automatic CRD discovery

return
}

func (gr GitRepo) ExtractRepoDetailsFromSourceURL() (owner, repo, branch, root string, err error) {
return gr.extractRepoDetailsFromSourceURL()
}

// fileInterceptor processes all files (original behavior)
func fileInterceptor(br *bufio.Writer) walker.FileInterceptor {
return func(file walker.File) error {
tempPath := filepath.Join(os.TempDir(), utils.GetRandomAlphabetsOfDigit(5))
return ProcessContent(br, tempPath, file.Path)
}
}

// crdAwareFileInterceptor only processes files that contain CRDs
// This enables automatic CRD discovery without requiring specific directory paths
func crdAwareFileInterceptor(br *bufio.Writer) walker.FileInterceptor {
return func(file walker.File) error {
// Check if the file is a YAML/JSON file that might contain CRDs
fileName := strings.ToLower(file.Name)
isYAML := strings.HasSuffix(fileName, ".yaml") || strings.HasSuffix(fileName, ".yml")
isJSON := strings.HasSuffix(fileName, ".json")

if !isYAML && !isJSON {
// Skip non-YAML/JSON files
return nil
}

// Check if the file content contains a CRD
// Handle both single-document and multi-document YAML files
content := file.Content

// For multi-document YAML, split by document separator and check each
documents := strings.Split(content, "\n---\n")
// Also handle documents separated by "---" at the start of a line
if len(documents) == 1 {
// Try splitting by lines starting with "---"
lines := strings.Split(content, "\n")
var docs []string
var currentDoc strings.Builder
for _, line := range lines {
if strings.TrimSpace(line) == "---" && currentDoc.Len() > 0 {
docs = append(docs, currentDoc.String())
currentDoc.Reset()
} else {
if currentDoc.Len() > 0 {
currentDoc.WriteString("\n")
}
currentDoc.WriteString(line)
}
}
if currentDoc.Len() > 0 {
docs = append(docs, currentDoc.String())
}
if len(docs) > 1 {
documents = docs
}
}
Comment on lines +164 to +188
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The logic for splitting multi-document YAML files is complex and has a few issues:

  1. The initial split strings.Split(content, "\n---\n") on line 187 is not robust for all multi-document YAML files.
  2. In the fallback logic, the condition && currentDoc.Len() > 0 on line 195 can cause the --- separator at the beginning of a file to be incorrectly included in the first document.
  3. The else block on line 198 will append the --- line to the next document if the if condition on line 195 is false.
  4. On line 208, if len(docs) > 1 prevents processing files with a single document that might be discovered by this splitting logic.

Consider simplifying this entire block with a more robust method, such as using a regular expression to split documents, which would be cleaner and less error-prone.


// Check each document for CRD
hasCRD := false
for _, doc := range documents {
doc = strings.TrimSpace(doc)
if doc == "" {
continue
}
// Check for YAML format
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use libraries, not string parsing.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okay noted !

if strings.Contains(doc, "kind: CustomResourceDefinition") {
hasCRD = true
break
}
// Check for JSON format
if strings.Contains(doc, "\"kind\":\"CustomResourceDefinition\"") ||
strings.Contains(doc, `"kind":"CustomResourceDefinition"`) {
hasCRD = true
break
}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The current string-based checks for kind: CustomResourceDefinition are fragile and may fail with minor formatting variations, such as extra whitespace. Additionally, the check for JSON on lines 226-227 contains a redundant condition, as both sides of the || operator are identical.

To improve robustness, consider using regular expressions to handle whitespace variations. For example:

// Note: This requires importing the `regexp` package.

// For YAML
if match, _ := regexp.MatchString(`kind:\s*CustomResourceDefinition`, doc); match {
    hasCRD = true
    break
}

// For JSON
if match, _ := regexp.MatchString(`"kind"\s*:\s*"CustomResourceDefinition"`, doc); match {
    hasCRD = true
    break
}

Alternatively, a full unmarshal would be even more robust, though potentially slower.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider these suggestions @CWAbhi

}

if !hasCRD {
// File doesn't contain a CRD, skip it
return nil
}

// File contains a CRD, process it
tempPath := filepath.Join(os.TempDir(), utils.GetRandomAlphabetsOfDigit(5))
return ProcessContent(br, tempPath, file.Path)
}
}

// When passing a directory to extract charts and the format introspector is provided as file/dir interceptor i.e. ConvertToK8sManifest ensure the Recurese is off. It is required othweise we will process the dir as well as process the file in that dir separately.
// Add more calrifying commment and entry inside docs.
func dirInterceptor(br *bufio.Writer) walker.DirInterceptor {
Expand Down
33 changes: 33 additions & 0 deletions generators/github/scheme_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package github

import (
"net/url"
"strings"

"github.com/meshery/meshkit/generators/models"
)
Expand All @@ -11,6 +12,14 @@ type DownloaderScheme interface {
}

func NewDownloaderForScheme(scheme string, url *url.URL, packageName string) DownloaderScheme {
// Check if this is a GitHub URL - route to GitRepo for automatic CRD discovery
if isGitHubURL(scheme, url) {
return GitRepo{
URL: url,
PackageName: packageName,
}
}

switch scheme {
case "git":
return GitRepo{
Expand All @@ -27,3 +36,27 @@ func NewDownloaderForScheme(scheme string, url *url.URL, packageName string) Dow
}
return nil
}

// isGitHubURL checks if the URL is a GitHub repository URL
// This enables automatic CRD discovery for standard GitHub URLs
func isGitHubURL(scheme string, url *url.URL) bool {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The scheme parameter is unused within this function. Removing it would improve code clarity. Remember to update the call to this function on line 16 as well.

Suggested change
func isGitHubURL(scheme string, url *url.URL) bool {
func isGitHubURL(url *url.URL) bool {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using a library instead of manual parsing would be more robust.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure i will change from manual parsing of this to a library

host := strings.ToLower(url.Host)
// Check for github.com domain
if host == "github.com" || strings.HasSuffix(host, ".github.com") {
// Check if it looks like a repository URL (has at least owner/repo in path)
path := strings.TrimPrefix(url.Path, "/")
parts := strings.Split(path, "/")
// Valid GitHub repo URL should have at least owner and repo
if len(parts) >= 2 && parts[0] != "" && parts[1] != "" {
// Exclude certain paths that aren't repositories
excluded := []string{"settings", "explore", "marketplace", "pulls", "issues", "new", "organizations", "login", "join"}
for _, exclude := range excluded {
if parts[0] == exclude {
return false
}
}
return true
}
}
return false
}