diff --git a/.drsconfig b/.drsconfig new file mode 100644 index 0000000..13553b9 --- /dev/null +++ b/.drsconfig @@ -0,0 +1,9 @@ +{ + "queryServer": { + "baseURL": "https://calypr.ohsu.edu/ga4gh" + }, + "writeServer": { + "baseURL": "https://calypr.ohsu.edu/ga4gh" + }, + "gen3Profile": "" +} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0088ced --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.idea/ +.DS_Store +/tmp \ No newline at end of file diff --git a/client/README.md b/client/README.md new file mode 100644 index 0000000..45e7cf8 --- /dev/null +++ b/client/README.md @@ -0,0 +1,14 @@ +# Git DRS Client + +## Getting Started + +1. Configure gen3 with your credentials ([docs](https://aced-idp.github.io/requirements/#1-download-gen3-client)) +2. Edit platform URL and gen3 profile in `.drsconfig` +3. Build from source + ```bash + go build + ``` +4. Access through command line + ```bash + ./git-drs --help + ``` diff --git a/client/config.go b/client/config.go new file mode 100644 index 0000000..12845db --- /dev/null +++ b/client/config.go @@ -0,0 +1,59 @@ +package client + +import ( + "io" + "log" + "os" + "path/filepath" + + "github.com/bmeg/git-drs/utils" + "sigs.k8s.io/yaml" +) + +type Server struct { + BaseURL string `json:"baseURL"` + ExtensionType string `json:"type,omitempty"` +} + +type Config struct { + QueryServer Server `json:"queryServer"` + WriteServer Server `json:"writeServer"` + Gen3Profile string `json:"gen3Profile"` +} + +const ( + DRS_CONFIG = ".drsconfig" +) + +func LoadConfig() (*Config, error) { + //look in Git base dir and find .drsconfig file + + topLevel, err := utils.GitTopLevel() + + if err != nil { + return nil, err + } + + configPath := filepath.Join(topLevel, DRS_CONFIG) + + log.Printf("Looking for %s", configPath) + //check if config exists + reader, err := os.Open(configPath) + if err != nil { + return nil, err + } + + b, err := io.ReadAll(reader) + if err != nil { + return nil, err + } + + conf := Config{} + err = yaml.Unmarshal(b, &conf) + if err != nil { + return nil, err + } + + log.Printf("Config: %s %#v", string(b), conf) + return &conf, nil +} diff --git a/client/indexd.go b/client/indexd.go new file mode 100644 index 0000000..1d0a592 --- /dev/null +++ b/client/indexd.go @@ -0,0 +1,166 @@ +package client + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "os" + "path/filepath" + + "github.com/bmeg/git-drs/drs" + "github.com/uc-cdis/gen3-client/gen3-client/jwt" +) + +var conf jwt.Configure +var profileConfig jwt.Credential + +type IndexDClient struct { + base *url.URL + profile string +} + +func NewIndexDClient(base string) (ObjectStoreClient, error) { + baseURL, err := url.Parse(base) + // print baseURL + if err != nil { + return nil, err + } + + cfg, err := LoadConfig() + if err != nil { + return nil, err + } + + // get the gen3Profile from the config + profile := cfg.Gen3Profile + if profile == "" { + return nil, fmt.Errorf("No gen3 profile specified. Please provide a gen3Profile key in your .drsconfig") + } + + fmt.Printf("Base URL: %s\n", baseURL.String()) + fmt.Printf("Profile: %s\n", profile) + + return &IndexDClient{baseURL, profile}, err +} + +// DownloadFile implements ObjectStoreClient +func (cl *IndexDClient) DownloadFile(id string, access_id string, dstPath string) (*drs.AccessURL, error) { + // get file from indexd + a := *cl.base + a.Path = filepath.Join(a.Path, "drs/v1/objects", id, "access", access_id) + // a.Path = filepath.Join("https://calypr.ohsu.edu/user/data/download/", id) + + // unmarshal response + req, err := http.NewRequest("GET", a.String(), nil) + if err != nil { + return nil, err + } + // extract accessToken from gen3 profile and insert into header of request + profileConfig = conf.ParseConfig(cl.profile) + if profileConfig.AccessToken == "" { + return nil, fmt.Errorf("access token not found in profile config") + } + + // Add headers to the request + authStr := "Bearer " + profileConfig.AccessToken + req.Header.Set("Authorization", authStr) + + client := &http.Client{} + response, err := client.Do(req) + if err != nil { + return nil, err + } + defer response.Body.Close() + + body, err := io.ReadAll(response.Body) + if err != nil { + return nil, err + } + + out := drs.AccessURL{} + err = json.Unmarshal(body, &out) + if err != nil { + return nil, err + } + + // Extract the signed URL from the response + signedURL := out.URL + if signedURL == "" { + return nil, fmt.Errorf("signed URL not found in response.") + } + + // Download the file using the signed URL + fileResponse, err := http.Get(signedURL) + if err != nil { + return nil, err + } + defer fileResponse.Body.Close() + + // Check if the response status is OK + if fileResponse.StatusCode != http.StatusOK { + return nil, fmt.Errorf("failed to download file using signed URL: %s", fileResponse.Status) + } + + // Create the destination directory if it doesn't exist + err = os.MkdirAll(filepath.Dir(dstPath), os.ModePerm) + if err != nil { + return nil, err + } + + // Create the destination file + dstFile, err := os.Create(dstPath) + if err != nil { + return nil, err + } + defer dstFile.Close() + + // Write the file content to the destination file + _, err = io.Copy(dstFile, fileResponse.Body) + if err != nil { + return nil, err + } + + fmt.Printf("File written to %s\n", dstFile.Name()) + + return &out, nil +} + +// RegisterFile implements ObjectStoreClient. +func (cl *IndexDClient) RegisterFile(path string, name string) (*drs.DRSObject, error) { + panic("unimplemented") +} + +func (cl *IndexDClient) QueryID(id string) (*drs.DRSObject, error) { + + a := *cl.base + a.Path = filepath.Join(a.Path, "drs/v1/objects", id) + + req, err := http.NewRequest("GET", a.String(), nil) + if err != nil { + return nil, err + } + // Add headers to the request + req.Header.Set("Authorization", "Bearer ") + req.Header.Set("Custom-Header", "HeaderValue") + + client := &http.Client{} + response, err := client.Do(req) + if err != nil { + return nil, err + } + defer response.Body.Close() + + body, err := io.ReadAll(response.Body) + if err != nil { + return nil, err + } + + out := drs.DRSObject{} + err = json.Unmarshal(body, &out) + if err != nil { + return nil, err + } + return &out, nil +} diff --git a/client/interface.go b/client/interface.go new file mode 100644 index 0000000..8b6bba1 --- /dev/null +++ b/client/interface.go @@ -0,0 +1,14 @@ +package client + +import "github.com/bmeg/git-drs/drs" + +type ObjectStoreClient interface { + //Given a DRS string ID, retrieve the object describing it + QueryID(id string) (*drs.DRSObject, error) + + //Put file into object storage and obtain a DRS record pointing to it + RegisterFile(path string, name string) (*drs.DRSObject, error) + + //Download file given a DRS ID + DownloadFile(id string, access_id string, dstPath string) (*drs.AccessURL, error) +} diff --git a/cmd/add/main.go b/cmd/add/main.go new file mode 100644 index 0000000..0a63c26 --- /dev/null +++ b/cmd/add/main.go @@ -0,0 +1,27 @@ +package add + +import ( + "fmt" + "path/filepath" + + "github.com/spf13/cobra" +) + +// Cmd line declaration +var Cmd = &cobra.Command{ + Use: "add", + Short: "Add a file", + Long: ``, + Args: cobra.MinimumNArgs(0), + RunE: func(cmd *cobra.Command, args []string) error { + for _, fileArg := range args { + matches, err := filepath.Glob(fileArg) + if err == nil { + for _, f := range matches { + fmt.Printf("Adding %s\n", f) + } + } + } + return nil + }, +} diff --git a/cmd/download/main.go b/cmd/download/main.go new file mode 100644 index 0000000..067d642 --- /dev/null +++ b/cmd/download/main.go @@ -0,0 +1,57 @@ +package download + +import ( + "github.com/bmeg/git-drs/client" + "github.com/bmeg/git-drs/drs" + "github.com/spf13/cobra" +) + +var ( + server string + dstPath string + drsObj *drs.DRSObject +) + +// Cmd line declaration +// Cmd line declaration +var Cmd = &cobra.Command{ + Use: "download ", + Short: "Download file using DRS ID and access ID", + Long: "Download file using DRS ID and access ID. The access ID is the access method used to download the file.", + Args: cobra.ExactArgs(2), + RunE: func(cmd *cobra.Command, args []string) error { + drsId := args[0] + accessId := args[1] + cfg, err := client.LoadConfig() + if err != nil { + return err + } + + baseURL := cfg.QueryServer.BaseURL + + client, err := client.NewIndexDClient(baseURL) + if err != nil { + return err + } + + if dstPath == "" { + + drsObj, err = client.QueryID(drsId) + if err != nil { + return err + } + dstPath = drsObj.Name + } + + _, err = client.DownloadFile(drsId, accessId, dstPath) + if err != nil { + return err + } + + return nil + }, +} + +func init() { + Cmd.Flags().StringVarP(&dstPath, "dstPath", "d", "", "Optional destination file path") +} diff --git a/cmd/filterprocess/main.go b/cmd/filterprocess/main.go new file mode 100644 index 0000000..70fda3b --- /dev/null +++ b/cmd/filterprocess/main.go @@ -0,0 +1,59 @@ +package filterprocess + +import ( + "fmt" + "io" + "log" + "os" + + "github.com/git-lfs/git-lfs/v3/git" + "github.com/spf13/cobra" +) + +// Cmd line declaration +var Cmd = &cobra.Command{ + Use: "filter-process", + Short: "filter proces", + Long: ``, + Args: cobra.MinimumNArgs(0), + RunE: func(cmd *cobra.Command, args []string) error { + s := git.NewFilterProcessScanner(os.Stdin, os.Stdout) + err := s.Init() + if err != nil { + return err + } + + caps, err := s.NegotiateCapabilities() + if err != nil { + return err + } + log.Printf("Caps: %#v\n", caps) + log.Printf("Running filter-process: %s\n", args) + + for s.Scan() { + req := s.Request() + switch req.Header["command"] { + case "clean": + log.Printf("Request to clean %#v %s\n", req.Payload, req.Header["pathname"]) + + clean(os.Stdout, req.Payload, req.Header["pathname"], -1) + + case "smudge": + log.Printf("Request to smudge %s %s\n", req.Payload, req.Header["pathname"]) + case "list_available_blobs": + log.Printf("Request for list_available_blobs\n") + + default: + return fmt.Errorf("don't know what to do: %s", req.Header["command"]) + } + log.Printf("Request: %#v\n", req) + } + + return nil + }, +} + +func clean(to io.Writer, from io.Reader, fileName string, fileSize int64) error { + + return nil +} diff --git a/cmd/list/main.go b/cmd/list/main.go index f6deccb..27f36ae 100644 --- a/cmd/list/main.go +++ b/cmd/list/main.go @@ -5,7 +5,7 @@ import ( "os" "path/filepath" - "github.com/bmeg/git-gen3/git" + "github.com/bmeg/git-drs/utils" "github.com/spf13/cobra" ) @@ -17,7 +17,7 @@ var Cmd = &cobra.Command{ Long: ``, Args: cobra.MinimumNArgs(0), RunE: func(cmd *cobra.Command, args []string) error { - gitTop, err := git.GitTopLevel() + gitTop, err := utils.GitTopLevel() if err != nil { fmt.Printf("Error: %s\n", err) return err diff --git a/cmd/query/main.go b/cmd/query/main.go new file mode 100644 index 0000000..b9463d7 --- /dev/null +++ b/cmd/query/main.go @@ -0,0 +1,43 @@ +package query + +import ( + "encoding/json" + "fmt" + + "github.com/bmeg/git-drs/client" + "github.com/spf13/cobra" +) + +// Cmd line declaration +var Cmd = &cobra.Command{ + Use: "query ", + Short: "Query DRS server by DRS ID", + Long: "Query DRS server by DRS ID", + Args: cobra.MinimumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + + cfg, err := client.LoadConfig() + if err != nil { + return err + } + + //fix this later + baseURL := cfg.QueryServer.BaseURL + + client, err := client.NewIndexDClient(baseURL) + if err != nil { + return err + } + + obj, err := client.QueryID(args[0]) + if err != nil { + return err + } + out, err := json.MarshalIndent(*obj, "", " ") + if err != nil { + return err + } + fmt.Printf("%s\n", string(out)) + return nil + }, +} diff --git a/cmd/register/main.go b/cmd/register/main.go new file mode 100644 index 0000000..2f8bd41 --- /dev/null +++ b/cmd/register/main.go @@ -0,0 +1,34 @@ +package register + +import ( + "log" + "path/filepath" + + "github.com/bmeg/git-drs/client" + "github.com/spf13/cobra" +) + +var server string = "https://calypr.ohsu.edu/ga4gh" + +// Cmd line declaration +var Cmd = &cobra.Command{ + Use: "register", + Short: "", + Long: ``, + Args: cobra.MinimumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + log.Printf("Registering file %s", args[0]) + client, err := client.NewIndexDClient(server) + if err != nil { + return err + } + + //upload the file, name would probably be relative to the base of the git repo + client.RegisterFile(args[0], filepath.Base(args[0])) + + //remove later + _ = client + + return nil + }, +} diff --git a/cmd/root.go b/cmd/root.go index 9afb30e..cb463eb 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -3,16 +3,21 @@ package cmd import ( "os" - "github.com/bmeg/git-gen3/cmd/initialize" - "github.com/bmeg/git-gen3/cmd/list" - "github.com/bmeg/git-gen3/cmd/pull" - "github.com/bmeg/git-gen3/cmd/push" + "github.com/bmeg/git-drs/cmd/add" + "github.com/bmeg/git-drs/cmd/download" + "github.com/bmeg/git-drs/cmd/filterprocess" + "github.com/bmeg/git-drs/cmd/initialize" + "github.com/bmeg/git-drs/cmd/list" + "github.com/bmeg/git-drs/cmd/pull" + "github.com/bmeg/git-drs/cmd/push" + "github.com/bmeg/git-drs/cmd/query" + "github.com/bmeg/git-drs/cmd/register" "github.com/spf13/cobra" ) // RootCmd represents the root command var RootCmd = &cobra.Command{ - Use: "git-gen3", + Use: "git-drs", SilenceErrors: true, SilenceUsage: true, PersistentPreRun: func(cmd *cobra.Command, args []string) { @@ -25,6 +30,11 @@ func init() { RootCmd.AddCommand(push.Cmd) RootCmd.AddCommand(pull.Cmd) RootCmd.AddCommand(list.Cmd) + RootCmd.AddCommand(add.Cmd) + RootCmd.AddCommand(filterprocess.Cmd) + RootCmd.AddCommand(query.Cmd) + RootCmd.AddCommand(register.Cmd) + RootCmd.AddCommand(download.Cmd) RootCmd.AddCommand(genBashCompletionCmd) } diff --git a/cmd/track/main.go b/cmd/track/main.go new file mode 100644 index 0000000..31d1964 --- /dev/null +++ b/cmd/track/main.go @@ -0,0 +1,21 @@ +package track + +import ( + "fmt" + + "github.com/spf13/cobra" +) + +// Cmd line declaration +var Cmd = &cobra.Command{ + Use: "track", + Short: "Set a file track filter", + Long: ``, + Args: cobra.MinimumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + for i := range args { + fmt.Printf("Track %s\n", args[i]) + } + return nil + }, +} diff --git a/docs/README-comparison.md b/docs/README-comparison.md new file mode 100644 index 0000000..a2a8df5 --- /dev/null +++ b/docs/README-comparison.md @@ -0,0 +1,66 @@ +# Comparison: Git LFS and g3t Integrated Data Platform (CALIPER-IDP) +A comparative overview of two distinct approaches to managing and storing large project data files: Git Large File Storage (Git LFS) and the CALIPER Integrated Data Platform (CALIPER-IDP). + +--- + +## Git Large File Storage (Git LFS) + +**Purpose:** Git LFS is an open-source Git extension designed to handle large files efficiently within Git repositories. + +**Key Features:** + +- **Pointer-Based Storage:** Replaces large files (e.g., audio, video, datasets) in the Git repository with lightweight text pointers, while storing the actual file contents on a remote server. + +- **Seamless Git Integration:** Allows developers to use standard Git commands (`add`, `commit`, `push`, `pull`) without altering their workflow. + +- **Selective File Tracking:** Developers specify which file types to track using `.gitattributes`, enabling granular control over large file management. + +- **Storage Efficiency:** By offloading large files, it keeps the Git repository size manageable, improving performance for cloning and fetching operations. + +**Use Cases:** + +- Software development projects involving large binary assets, such as game development, multimedia applications, or data science projects. + +--- + +## CALIPER Integrated Data Platform (CALIPER-IDP) + +**Purpose:** CALIPER-IDP is a specialized data commons platform developed by the International Alliance for Cancer Early Detection (CALIPER) to facilitate secure and structured sharing of research data among member institutions. + +**Key Features:** + +- **Gen3-Based Infrastructure:** Utilizes Gen3, an open-source data commons framework, to manage data submission, storage, and access. + +- **Command-Line Interface (CLI):** Provides the `gen3-tracker (g3t)` CLI tool for researchers to create projects, upload files, and associate metadata incrementally. + +- **FHIR Metadata Integration:** Supports the addition of Fast Healthcare Interoperability Resources (FHIR) metadata, enhancing data interoperability and standardization. + +- **Role-Based Access Control:** Implements fine-grained access controls to ensure data security and compliance with privacy regulations. + +- **Data Exploration and Querying:** Offers tools for data exploration and querying, facilitating collaborative research and analysis. +**Use Cases:** + +- Biomedical research projects requiring secure, standardized, and collaborative data management, particularly in multi-institutional settings. + +--- + +## Comparative Summary + +| Feature | Git LFS | CALIPER-IDP | +|---------------------------|--------------------------------------------------------|-----------------------------------------------------------| +| **Primary Use Case** | Managing large files in software development projects | Collaborative biomedical research data management | +| **Integration** | Seamless with Git workflows | Built on Gen3 framework with specialized CLI tools | +| **Data Storage** | Remote storage with Git pointers | Structured data commons with metadata support | +| **Access Control** | Inherits Git repository permissions | Role-based access control for data security | +| **Metadata Support** | Limited | Comprehensive, including FHIR standards | +| **Collaboration Features**| Standard Git collaboration tools | Enhanced tools for data exploration and querying | + +--- + +**Conclusion:** + +- **Git LFS** is ideal for developers seeking to manage large files within their existing Git workflows, offering a straightforward solution without the need for additional infrastructure. + +- **CALIPER-IDP** caters to the complex needs of collaborative biomedical research, providing a robust platform for secure data sharing, standardized metadata integration, and advanced data exploration capabilities. + +The choice between Git LFS and CALIPER-IDP depends on the specific requirements of the project, including the nature of the data, collaboration needs, and compliance considerations. diff --git a/docs/README-epic.md b/docs/README-epic.md new file mode 100644 index 0000000..1a0fee1 --- /dev/null +++ b/docs/README-epic.md @@ -0,0 +1,167 @@ + + +# πŸš€ Epic: Develop `git-gen3` Tool for Git-Based Gen3 Integration + +> Create a Git-native utility to track and synchronize remote object metadata, generate FHIR-compliant metadata, and manage Gen3 access control using `git-sync`. + +--- + +## 🧭 Sprint 0: Architecture Spike + +### 🎯 Goal: +De-risk implementation by validating core architectural assumptions and tool compatibility. + +### πŸ”¬ Tasks: +| ID | Task Description | Est. | +|-------|------------------------------------------------------------------------------|------| +| SPK-0 | Learning - team spends time, becomes familiar with "stock" git-lfs | 1d | +| SPK-1 | Prototype `track-remote` to fetch metadata (e.g., ETag, size) from S3/GCS | 1d | +| SPK-2 | Simulate `.lfs-meta/metadata.json` usage in Git repo + commit/push | 0.5d | +| SPK-3 | Test `init-meta` to produce `DocumentReference.ndjson` via `g3t`-style logic | 1d | +| SPK-4 | Validate `git-sync` role mappings and diffs against Gen3 fence API | 1d | +| SPK-5 | Evaluate GitHub template DX: hooks, portability, local usage | 0.5d | +| SPK-6 | Validate `auth-sync` deprecate client facing project management | 4d | +| SPK-7 | Validate `gen3-client` can UChicago's go code be installed and called? | 4d | + +### βœ… Deliverables: + +- Prototype CLI for `track-remote` - not currently part of got-lfs + - How are user credentials handled? +- Sample `.lfs-meta/metadata.json` and generated `META/DocumentReference.ndjson` +- Feasibility report for Git-driven role syncing via `git-sync` +- Recommendation on proceeding with full implementation + +--- + +## 🧭 Sprint 1: CLI Bootstrapping & Remote File Tracking + +### 🎯 Goal: +Create the `git-gen3` CLI structure and implement the ability to track remote cloud objects in Git without downloading them. + +### πŸ”¨ Tasks: +| ID | Task Description | Est. | +|------|------------------------------------------------------|------| +| S1-1 | Scaffold `git-gen3` CLI with Click (Python) or Cobra (Go) | 2d | +| S1-2 | Implement `track` and `track-remote` subcommands | 2d | +| S1-3 | Write to `.lfs-meta/metadata.json` | 1d | +| S1-4 | Support auth with AWS, GCS, Azure (env vars + profiles) | 1d | +| S1-5 | Add `pre-push` hook to validate metadata before push | 1d | +| S1-6 | Unit tests for `track-remote` and metadata structure | 1d | + +### βœ… Deliverables: +- Functional CLI command: `git-gen3 track-remote s3://...` +- `.lfs-meta/metadata.json` updated and committed in Git +- Git hook active for metadata validation +- CI-ready foundation for next sprint + +--- + +## 🧭 Sprint 2: Metadata Initialization + FHIR Generation + +### 🎯 Goal: +Transform `.lfs-meta/metadata.json` entries into Gen3-compatible `DocumentReference.ndjson` metadata using FHIR structure. + +### πŸ”¨ Tasks: +| ID | Task Description | Est. | +|------|--------------------------------------------------------------------|------| +| S2-1 | Implement `init-meta` to emit `META/DocumentReference.ndjson` | 2d | +| S2-2 | Populate FHIR fields: `subject`, `context.related`, `attachment` | 1d | +| S2-3 | Create `validate-meta` command to check metadata completeness | 1d | +| S2-4 | Write tests for `init-meta` and FHIR formatting | 1d | +| S2-5 | Document schema, CLI usage, and FHIR integration points | 1d | + +### βœ… Deliverables: +- `git-gen3 init-meta` produces valid FHIR NDJSON +- Tool handles patient/specimen references +- Tests validate output conformance +- Documentation aligns with `g3t upload` workflows + +--- + +## 🧭 Sprint 3: Git-Sync Integration & Access Control + +### 🎯 Goal: +Replace `collaborator` and `project-management` with Git-based role assignments using `git-sync` and Gen3 fence APIs. + +### πŸ”¨ Tasks: +| ID | Task Description | Est. | +|------|-------------------------------------------------------------------|------| +| S3-1 | Integrate `git-sync` YAML/CSV parser into `git-gen3 sync-users` | 2d | +| S3-2 | Implement dry-run and apply modes for syncing to Gen3 fence | 1d | +| S3-3 | Add change auditing (diff viewer from Git commits) | 1d | +| S3-4 | End-to-end test: Git β†’ Gen3 user role propagation | 1d | +| S3-5 | Write user guide and governance documentation | 1d | + +### βœ… Deliverables: +- `git-gen3 sync-users` CLI reads Git-tracked access config +- Git diffs capture permission changes over time +- Gen3 access control (via Fence) is synced reliably +- Finalized documentation for institutional onboarding + +--- + +## πŸ“… Sprint Timeline Summary + +| Sprint | Focus | Duration | Deliverables | +|--------|----------------------------------|----------|-----------------------------------------------| +| 0 | Architecture validation (spike) | 1 week | Prototypes + greenlight for implementation | +| 1 | Remote file tracking | 2 weeks | `track-remote`, `.lfs-meta`, validation hooks | +| 2 | Metadata generation (FHIR) | 2 weeks | FHIR output, `init-meta`, validation tooling | +| 3 | Git-based access control | 2 weeks | `sync-users`, Git audit trail, Fence sync | + +--- + +## πŸ›  Toolchain + +| Purpose | Tool/Stack | +|------------------------|---------------------------| +| CLI Language | Python (Click) or Go (Cobra) | +| Object Store APIs | boto3 (S3), gcsfs, Azure SDK | +| Metadata Serialization | JSON, FHIR NDJSON | +| Access Sync | git-sync + Gen3 Fence | +| Testing | `pytest` or `go test` | +| Docs | Markdown, GitHub Pages | + +--- + +## 🧭 Sprint 4: User Testing, Documentation, and Release Planning + +### 🎯 Goal: +Conduct functional and usability testing, finalize user documentation, and prepare for internal/external release of the `git-gen3` tool. + +--- + +### πŸ”¨ Tasks: +| ID | Task Description | Est. | +|------|------------------------------------------------------------------------------|------| +| S4-1 | Recruit early adopters from internal teams or pilot projects | 0.5d | +| S4-2 | Collect and triage feedback via GitHub issues or survey | 1d | +| S4-3 | Perform functional validation of all workflows (track, init-meta, sync) | 1d | +| S4-4 | Finalize and polish all CLI command help strings and usage messages | 0.5d | +| S4-5 | Write end-user guide (markdown or GitHub Pages) with examples and FAQs | 1d | +| S4-6 | Create changelog and release notes for v1.0 | 0.5d | +| S4-7 | Define release checklist and governance process (e.g., approval flow) | 0.5d | +| S4-8 | Tag first release, publish GitHub release, optionally register PyPI/Homebrew| 0.5d | + +--- + +### βœ… Deliverables: +- End-user documentation published and linked from the repo +- Feedback collected from test users and incorporated as GitHub issues +- Final `v1.0.0` tag and release notes +- Optional: Package published to PyPI (Python) or Homebrew (Go binary) + +--- + +### πŸ“… Sprint Timeline Summary (Updated) + +| Sprint | Focus | Duration | Deliverables | +|--------|----------------------------------|----------|-----------------------------------------------| +| 0 | Architecture validation (spike) | 1 week | Prototypes + greenlight for implementation | +| 1 | Remote file tracking | 2 weeks | `track-remote`, `.lfs-meta`, validation hooks | +| 2 | Metadata generation (FHIR) | 2 weeks | FHIR output, `init-meta`, validation tooling | +| 3 | Git-based access control | 2 weeks | `sync-users`, Git audit trail, Fence sync | +| 4 | Testing, docs, release planning | 1 week | Docs, feedback, `v1.0.0` release | + + +--- diff --git a/docs/README-git-plugin-dev.md b/docs/README-git-plugin-dev.md new file mode 100644 index 0000000..f33fa15 --- /dev/null +++ b/docs/README-git-plugin-dev.md @@ -0,0 +1,25 @@ + +# Notes about the development of git plugins + + +To attach the plugin into the configutation. In the global config `~/.gitconfig` add the lines: +``` +[filter "drs"] + clean = git-drs clean -- %f + smudge = git-drs smudge -- %f + process = git-drs filter-process + required = true +``` + +Then to add tracking in a project, add entries to `.gitattributes` in the working directory. Example: +``` +*.tsv filter=drs diff=drs merge=drs -text +``` + +For when `git status` or `git diff` are invoked on `*.tsv` file, the process `git-drs filter-process` will be +invoked. The communication between git and the subprocess is outlined in [gitprotocol-common](https://git-scm.com/docs/gitprotocol-common). A library for parsing this event stream is part of the git-lfs code base https://github.com/git-lfs/git-lfs/blob/main/git/filter_process_scanner.go +An example of responding to these requests can be found at https://github.com/git-lfs/git-lfs/blob/main/commands/command_filter_process.go + +My understanding: The main set of command the the filter-process command responds to are `clean` and `smudge`. +The `clean` process cleans an input document before running diff, things like run auto formatting before committing. This is where the change from the file to the remote data pointer could take place. An example of the +clean process can be found at https://github.com/git-lfs/git-lfs/blob/main/commands/command_clean.go#L27 diff --git a/docs/README-git-sync.md b/docs/README-git-sync.md new file mode 100644 index 0000000..0aa9527 --- /dev/null +++ b/docs/README-git-sync.md @@ -0,0 +1,301 @@ +# Overview `git-sync` + +Contents: + +* A **high-level architecture for the `git-sync` project**, where **GitHub,etc. becomes the source of truth (system of record)** for project roles, replacing Synapse. The target system remains **Gen3**, where roles and access need to be synchronized. +* Support for **GitLab** or **other Git servers**, we introduce an **abstraction layer** for the identity and role retrieval logic. This layer separates the **source system** (GitHub, GitLab, etc.) from the **target system** (Gen3), making the architecture **extensible and pluggable**. +* **Unit and integration test specifications** for the `git-sync` project with pluggable Git-based role sources and Gen3 as the target system. + +--- + +## 🎯 Goal + +> **Sync project role assignments from GitHub teams and collaborators to Gen3's access control system**. + +--- + +## 🧭 Conceptual Overview + +```text + +---------------------+ + | GitHub Org | <-- system of record + | - Teams | + | - Collaborators | + +---------+-----------+ + | + | REST API + v + +---------------------+ +---------------------+ + | git-sync CLI +------> + Gen3 Access API | + | - Fetch & map roles | | - Projects & ACLs | + | - Transform to Gen3 | | - Policies | + +---------------------+ +---------------------+ +``` + +--- + +## 🧱 Architectural Components + +### 1. **GitHub as the System of Record** + +- **Source of role info**: + - [Organization Teams](https://docs.github.com/en/organizations/collaborating-with-groups-in-your-organization/about-teams) + - Represent Gen3 "project roles" (e.g., `project-admins`, `project-members`) + - [Repository Collaborators](https://docs.github.com/en/rest/collaborators/collaborators?apiVersion=2022-11-28) + - Fallback or direct per-project access info + +- **API Use**: + - List team members + - Map team slugs to Gen3 projects/roles + - Fetch repo collaborators and permission levels (`pull`, `push`, `admin`) + +--- + +### 2. **git-sync CLI Service** + +A command-line tool/service that: + +- Authenticates with GitHub and Gen3 +- Loads configuration mapping GitHub entities to Gen3 projects and roles +- Periodically or on-demand performs synchronization + +#### Functions: + +| Function | Description | +|-----------------------------|-------------| +| `fetch_github_teams()` | Get org teams, members, and slugs | +| `map_to_gen3_roles()` | Transform GitHub teams β†’ Gen3 roles | +| `fetch_repo_collaborators()`| Identify individual users & access | +| `sync_to_gen3()` | Write roles to Gen3 using `fence` API | + +--- + +### 3. **Configuration Layer** + +Example YAML mapping: + +```yaml +projects: + project-xyz: + github_teams: + - aced-project-xyz-admins: project-admin + - aced-project-xyz-members: project-member + repos: + - aced/project-xyz +``` + +--- + +### 4. **Target System: Gen3 (Fence Authz)** + +- **API Integration**: + - Use Gen3/fence endpoint: `PUT /user` or `PATCH /access` for roles +- **Mapped Roles**: + - `project-admin` + - `project-member` + - `data-submitter` + +--- + +## πŸ”’ Security & Auth + +- **GitHub Auth**: + - Use GitHub App or PAT with `read:org` and `repo` scopes +- **Gen3 Auth**: + - Use API key or JWT token authorized to manage users + +--- + +## πŸ“‹ Sync Flow + +![](images/github-sync-flowchart.png) +--- + +## πŸš€ Future Enhancements + +- Bi-directional diff reporting (GitHub vs Gen3) +- Dry-run and audit modes +- GitHub webhooks for near-real-time sync +- GitHub Actions-based CI for automation +- Slack/email alerts for sync failures + +--- + + +## 🧱 High-Level Architecture with Abstraction Layer +To enable support for **GitLab** or **other Git servers**, we introduce an **abstraction layer** for the identity and role retrieval logic. This layer separates the **source system** (GitHub, GitLab, etc.) from the **target system** (Gen3), making the architecture **extensible and pluggable**. + + +```text + +---------------------+ +---------------------+ +----------------------+ + | GitHub / GitLab | | Bitbucket etc. | | Other Identity | + | (source plugins) | | (optional source) | | Providers | + +----------+----------+ +----------+----------+ +----------+-----------+ + | | | + | | | + +------------+---------------+------------------------------+ + | + v + +-----------------------------------------------+ + | RoleSourceAdapter Interface (Abstract) | + | - get_users_for_project(project_id) | + | - get_teams_for_project(project_id) | + | - get_user_roles() | + +-----------------------------------------------+ + | + +---------------+-----------------+ + | | + +-------v--------+ +--------v--------+ + | GitHubAdapter | | GitLabAdapter | ← add more: BitbucketAdapter, etc. + +----------------+ +-----------------+ + + | + v + +--------------------------------------+ + | git-sync Core Logic | + | - Loads config & adapter | + | - Maps source roles to Gen3 roles | + | - Pushes to Gen3 via Gen3 API | + +--------------------------------------+ + | + v + +------------------------+ + | Gen3 API (Fence) | + +------------------------+ +``` + +--- + + +## πŸ›  CLI Usage Example + +```bash +# In config.yaml +source: + type: github + org: aced + token_env: GITHUB_TOKEN + +# Could also be: +# type: gitlab +# group: aced-projects +# token_env: GITLAB_TOKEN +``` + +The CLI would dynamically load the adapter based on the `type`. + +--- + +## βœ… Benefits of This Design + +| Benefit | Description | +|--------------------------|-------------| +| πŸ”Œ **Extensible** | Easily add new providers (GitHub, GitLab, Bitbucket, etc.) | +| πŸ”„ **Pluggable** | Source logic swappable without changing Gen3 sync logic | +| πŸ”’ **Encapsulated Auth** | Each adapter handles its own tokens/API nuances | +| πŸ§ͺ **Testable** | Adapters are unit-testable in isolation | + +--- + +## βœ… **Unit Test Specifications** +**Unit and integration test specifications** for the `git-sync` project with pluggable Git-based role sources and Gen3 as the target system. + +### πŸ”§ 1. **Adapter Interface Compliance** +Test that all adapter classes correctly implement the `RoleSourceAdapter` interface. + +- `test_github_adapter_implements_interface()` +- `test_gitlab_adapter_implements_interface()` + +### πŸ§ͺ 2. **GitHubAdapter** +- Mock GitHub API responses using `requests-mock` or `unittest.mock` +- Tests: + - `test_get_teams_for_project_returns_expected_structure()` + - `test_get_user_roles_maps_correct_roles()` + - `test_handles_empty_team_membership_gracefully()` + +### πŸ§ͺ 3. **GitLabAdapter** +- Stubbed or mocked API interactions +- Tests: + - `test_get_users_from_gitlab_group()` + - `test_role_mapping_with_gitlab_permissions()` + +### πŸ§ͺ 4. **Core Logic** +- Tests for transformation and mapping: + - `test_map_team_to_gen3_role()` + - `test_generate_patch_payload_for_gen3()` + - `test_detect_added_and_removed_users()` + +### πŸ§ͺ 5. **Config Loader** +- Validate schema and default fallbacks: + - `test_config_validates_required_fields()` + - `test_loads_correct_adapter_from_type()` + +### πŸ§ͺ 6. **CLI Command** +Use `click.testing.CliRunner`: +- `test_sync_command_runs_with_github_config()` +- `test_invalid_config_returns_error()` + +--- + +## πŸ” **Integration Test Specifications** + +### 🌐 1. **Mock GitHub + Gen3** +Set up mocks or test containers for GitHub API and Gen3 Fence. + +- Use `responses` or `httpx_mock` for mocking GitHub/Gen3 endpoints +- Tests: + - `test_full_sync_applies_roles_to_gen3()` + - `test_users_removed_from_github_are_removed_from_gen3()` + - `test_users_added_to_team_are_reflected_in_gen3()` + +### πŸ” 2. **End-to-End Sync Flow** +- Fixture: + - `config.yaml` + - Mocked GitHub API for team memberships + - Mocked Gen3 `/user` and `/access` endpoints +- Validate: + - API calls are made + - Correct role updates are sent + - Logging and reporting capture expected output + +### πŸ”’ 3. **Security Edge Cases** +- Token expiration +- Invalid org/repo +- Permissions mismatch +- Tests: + - `test_invalid_github_token_raises_error()` + - `test_gen3_rejects_unauthorized_user_changes()` + +--- + +## πŸ“ Suggested Test Directory Structure + +``` +tests/ +β”œβ”€β”€ unit/ +β”‚ β”œβ”€β”€ test_github_adapter +β”‚ β”œβ”€β”€ test_gitlab_adapter +β”‚ β”œβ”€β”€ test_core_logic +β”‚ └── test_config_loader +β”œβ”€β”€ integration/ +β”‚ β”œβ”€β”€ test_end_to_end_github_sync +β”‚ β”œβ”€β”€ test_error_handling +β”‚ └── test_gen3_interactions +└── fixtures/ + └── github_team_response.json +``` + +--- + +## βœ… Tools & Fixtures + +| Tool | Purpose | +|-----------------|-----------------------------------| +| `pytest` | Core testing framework | +| `responses` | HTTP mocking for REST APIs | +| `click.testing` | CLI command testing | +| `pydantic` | Config schema validation | +| `tox` or `nox` | Multi-environment test automation | + +--- + diff --git a/docs/README-gitlfs-meta.md b/docs/README-gitlfs-meta.md new file mode 100644 index 0000000..4345b42 --- /dev/null +++ b/docs/README-gitlfs-meta.md @@ -0,0 +1,341 @@ + +--- + +## 🧩 Overview + +**Goal**: Enable this usage: + +```bash +git add path/to/file --patient Patient/1234 --specimen Specimen/ABC567 +``` + +…and have the `--patient` and `--specimen` metadata passed through to your **Git LFS custom transfer agent**, such as [`lfs-s3`](https://github.com/nicolas-graves/lfs-s3), for use in metadata handling or cloud-side tagging. + +--- + +## ❗Problem + +Git **does not support arbitrary flags on `git add`**. + +**Solution**: Use **Git LFS pre-push hooks and custom transfer metadata** to attach additional metadata. + +--- + +### 1. **Capture Extra Metadata Outside `git add`** + +Since we can't modify `git add`: + +- Track extra metadata in a sidecar file (e.g., `.lfs-meta`) +- Use an extended command like: + + ```bash + git lfs-meta track path/to/file --patient Patient/1234 --specimen Specimen/ABC567 + ``` + +That command would append this to `.lfs-meta.json`: + +```json +{ + "path/to/file": { + "patient": "Patient/1234", + "specimen": "Specimen/ABC567" + } +} +``` + +--- + +### 2. **Enhance the Git LFS Transfer Agent** + +> Optional: Adding S3 tags or other metadata to the object in S3. + +Git LFS passes information to your custom transfer agent (like `lfs-s3`) using stdin/stdout JSON messages. + +You can modify `lfs-s3` to: + +- Parse the filename it's transferring +- Look up `patient`/`specimen` metadata from `.lfs-meta.json` +- Push that metadata to S3 (e.g., as object tags or upload metadata) + +πŸ”§ **Example agent snippet (Go, inside `lfs-s3`)**: + +```go +meta := readMetaFile(".lfs-meta.json") +filePath := filepath.Base(obj.Path) + +if info, ok := meta[filePath]; ok { + s3Client.PutObjectTagging(&s3.PutObjectTaggingInput{ + Bucket: aws.String(bucket), + Key: aws.String(obj.Oid), + Tagging: &s3.Tagging{ + TagSet: []*s3.Tag{ + {Key: aws.String("Patient"), Value: aws.String(info.Patient)}, + {Key: aws.String("Specimen"), Value: aws.String(info.Specimen)}, + }, + }, + }) +} +``` + +--- + +## πŸ“„ Example Workflow + +```bash +git lfs track "*.bin" +git add foo.bin + +# Add metadata via a companion command +git lfs-meta tag foo.bin --patient Patient/001 --specimen Specimen/XYZ + +git commit -m "Add patient-associated data" +git push +``` + +--- + +## πŸ›  Implementation Notes + +| Component | Description | +|------------------|-------------| +| `.lfs-meta.json` | Project-local map of file path β†’ metadata | +| `git lfs-meta` | New CLI wrapper to manage sidecar file | +| `lfs-s3` | Enhanced to load `.lfs-meta.json` and inject metadata during upload | + +--- + +## βœ… Advantages + +- No change to Git core or Git LFS binary +- Clean separation of metadata via `.lfs-meta.json` +- Reuses standard Git + LFS behavior +- Fully compatible with custom transfer agents + +--- + +## βš™οΈ Configuration: `lfs-meta` Git Integration + +### πŸ“¦ 1. Install the `lfs-meta` Tool + +Install globally or per-project. Example (GO Python-based): + +```bash +pip install git-lfs-meta +# or +go install github.com/username/repository@latest +``` + +Ensure it's in your `$PATH`: + +```bash +which lfs-meta +``` + +--- + +### πŸ—‚οΈ 2. Create `.lfs-meta/metadata.json` + +In your Git repo: + +```bash +mkdir -p .lfs-meta +touch .lfs-meta/metadata.json +``` + +Track this in your repo: + +```bash +echo ".lfs-meta/metadata.json" >> .gitignore +``` + +Optionally, use `.lfs-meta/.metaignore` to exclude paths from metadata. + +--- + +### 🧩 3. Add `lfs-meta` as a Git subcommand + +You can use Git's alias feature: + +```bash +git config alias.lfs-meta '!lfs-meta' +``` + +Now you can run: + +```bash +git lfs-meta tag path/to/file --patient Patient/1234 +``` + +--- + +### πŸͺ 4. Configure a Git LFS Pre-Push Hook (Optional) + +To automatically sync metadata during push, create: + +`.git/hooks/pre-push` + +```bash +#!/bin/bash +# Hook to prepare metadata for LFS transfer agent + +if [ -f ".lfs-meta/metadata.json" ]; then + echo "[lfs-meta] Metadata file detected." +else + echo "[lfs-meta] No metadata file present." +fi +``` + +Make it executable: + +```bash +chmod +x .git/hooks/pre-push +``` + +For more advanced use, this hook could: +- Validate `.lfs-meta/metadata.json` +- Ensure required fields are set before push + +--- + +### πŸ›  5. Custom Git Config (optional) + +To keep Git aware of `lfs-meta` behavior, configure: + +```bash +git config --local lfs.meta.enabled true +git config --local lfs.meta.path .lfs-meta/metadata.json +``` + +Read these with: + +```bash +git config --get lfs.meta.path +``` + +--- + +## 🧬 Section: FHIR Metadata Initialization via `lfs-meta` +Extends the `lfs-meta` command to **initialize FHIR metadata** (in the style of `g3t meta init` from the [`gen3_util`](https://github.com/ACED-IDP/gen3_util) project), by reading a sidecar metadata file (like `.lfs-meta/metadata.json`) and generating a `META/DocumentReference.ndjson` file. + +### 🎯 Goal + +Add a command to `lfs-meta`: + +```bash +lfs-meta init-meta +``` + +This reads `.lfs-meta/metadata.json` and generates a valid FHIR `DocumentReference, Patient, ... ` ndjson file in `META/`. + +--- + +### πŸ“‚ Input: `.lfs-meta/metadata.json` + +```json +{ + "foo.vcf": { + "patient": "Patient/1234", + "specimen": "Specimen/XYZ" + }, + "bar.pdf": { + "patient": "Patient/5678" + } +} +``` + +--- + +### πŸ“„ Output: `META/DocumentReference.ndjson, META/Patient.ndjson, ...` + +```json +{"resourceType":"DocumentReference","content":[{"attachment":{"url":"s3://bucket/foo.vcf","title":"foo.vcf"}}],"subject":{"reference":"Patient/1234"},"context":{"related":[{"reference":"Specimen/XYZ"}]}} +{"resourceType":"DocumentReference","content":[{"attachment":{"url":"s3://bucket/bar.pdf","title":"bar.pdf"}}],"subject":{"reference":"Patient/5678"}} +``` + + +--- + +### πŸ§ͺ CLI Integration + +Add to `lfs-meta` as a subcommand: + +```bash +lfs-meta init-meta +``` + +Options: +- `--output`: Where to write the `.ndjson` +- `--bucket`: Base URI for constructing FHIR `attachment.url` + +--- + +### πŸ“ Directory Structure After Init + +``` +. +β”œβ”€β”€ .lfs-meta/ +β”‚ └── metadata.json +β”œβ”€β”€ META/ +β”‚ └── DocumentReference.ndjson, etc. +``` + +--- + +### βœ… Benefits + +| Feature | Value | +|----------------------------------|--------------------------------------| +| πŸ” Integrates with Gen3 Uploads | Compatible with `gen3_util` metadata flow | +| 🧬 FHIR-compliant | Proper `DocumentReference` structure | +| πŸ“¦ Reusable | Automates metadata for downstream sync tools | + +--- + + +## βœ… Resulting Workflow + +```bash +git lfs track "*.vcf" +git add foo.vcf + +# Associate metadata (via configured alias) +git lfs-meta track foo.vcf --patient Patient/123 --specimen Specimen/ABC + +git commit -m "Added foo.vcf with metadata" +git push +``` + +Absolutely β€” here’s a **test specification section** for the `lfs-meta` feature that initializes FHIR metadata from a sidecar file, compatible with `gen3_util`. + +--- + +## βœ… Section: Unit and Integration Tests for `lfs-meta init-meta` + +--- + +### πŸ§ͺ Unit Test Specifications + +| Test Name | Description | +|---------------------------------------------|-------------| +| `test_generate_single_documentreference()` | Generates a valid FHIR `DocumentReference` for one file | +| `test_generate_with_patient_only()` | Handles entries that include only the patient reference | +| `test_generate_with_patient_and_specimen()` | Handles entries with both patient and specimen | +| `test_missing_metadata_fields()` | Gracefully skips or warns on invalid metadata (e.g. missing file or fields) | +| `test_output_is_valid_ndjson()` | Validates that output is newline-delimited JSON objects | +| `test_bucket_override()` | Ensures custom S3 base path is respected | +| `test_empty_metadata()` | Outputs nothing (or warns) if input metadata is empty | + + +--- + +### πŸ” Integration Test Specifications + +| Scenario | Setup | Expected Behavior | +|-----------------------------------------|-------------------------------------------|-------------------| +| `test_lfs_meta_end_to_end_minimal()` | .lfs-meta/metadata.json β†’ `init-meta` | Produces `META/DocumentReference.ndjson` | +| `test_meta_used_in_gen3_upload()` | Full Git repo, push to Gen3 | Metadata is accepted by Gen3 API or `g3t upload` | +| `test_multiple_files_ndjson_format()` | Multiple entries in sidecar | Multiple NDJSON lines generated | +| `test_script_idempotency()` | Run `init-meta` twice | Output is consistent and append-safe | +| `test_no_metadata_file()` | No `.lfs-meta/metadata.json` | Graceful failure or warning message | + + diff --git a/docs/README-gitlfs-remote-buckets.md b/docs/README-gitlfs-remote-buckets.md new file mode 100644 index 0000000..4b22e0b --- /dev/null +++ b/docs/README-gitlfs-remote-buckets.md @@ -0,0 +1,247 @@ +Git LFS (Large File Storage) **does not natively handle files stored in cloud buckets (like S3, GCS, Azure Blob)** unless additional tools or integrations are introduced. + +--- + +## 🧠 What Git LFS Does + +Git LFS is designed to: + +- Replace large files in a Git repo with lightweight pointers. +- Store the actual file content on **a Git LFS server**, which is usually: + - Co-located with Git hosting (e.g., GitHub, GitLab, Bitbucket), or + - Hosted separately (e.g., custom LFS servers). + +When you `git clone` or `git pull`, the Git LFS client fetches the file content from the LFS server. + +--- + +## 🚫 Limitations with Cloud Buckets + +Git LFS: + +- **Does not automatically recognize or manage files** stored in external cloud buckets like S3, GCS, or Azure. +- **Cannot directly sync or link to files in a cloud bucket** without: + - Downloading them manually and committing them via LFS. + - Using custom tooling to bridge Git LFS pointers with cloud bucket contents. + +--- + +## βœ… Possible Workarounds or Integrations + +To incorporate cloud bucket storage with Git LFS, users may: + +### 1. **Use a custom Git LFS server that backs to S3** +- Example: [lfs-test-server](https://github.com/git-lfs/lfs-test-server) or other S3-backed LFS servers. +- Stores LFS objects in S3, **but still requires Git LFS client operations** for tracking/pulling. + +### 2. **Manually sync cloud bucket contents with Git LFS** +- Download from the cloud bucket. +- Use `git lfs track` to commit and push to Git LFS server. +- Duplication risk: The file now exists in both Git LFS and the cloud bucket. + +### 3. **Symlink or metadata approaches (not portable)** +- Use `.gitattributes` to track LFS pointer. +- Maintain cloud object metadata (e.g., S3 URI) in the repo. +- Requires external scripts or hooks to resolve and download actual content. + +--- + +## βœ… Summary Table + +| Feature | Git LFS Support | +|--------------------------------------|------------------| +| Track large files in Git | βœ… Yes | +| Native cloud bucket integration (S3, GCS) | ❌ No | +| Support for S3-backed custom LFS servers | βœ… With setup | +| Automatically fetch from cloud buckets | ❌ No | +| Point LFS to a cloud bucket URI | ❌ No native support | + +--- + +## 🧩 Missing Feature: Index Remote Files without Download/Upload + +To support tracking **remote URLs** (e.g., S3, GCS, etc.) **without downloading or uploading files**, and storing that information in `.lfs-meta/metadata.json`, you can extend the current design as follows: + +--- + + +### βœ… Motivation +You want to track and index remote files (e.g., in object stores) using Git + LFS-like semantics, but **without actually downloading or uploading files**. Instead, file metadata is logged, version-controlled, and made available for later workflows like validation or FHIR metadata generation. + +--- + +## πŸ”„ Updated `.lfs-meta/metadata.json` Format + +Now includes a `remote_url` key, replacing the need to add the actual file content to the repo: + +```json +{ + "data/foo.vcf": { + "remote_url": "s3://my-bucket/data/foo.vcf", + "etag": "abc123etag", + "size": 12345678, + "patient": "Patient/1234", + "specimen": "Specimen/XYZ" + } +} +``` + +This file is tracked in Git, but the referenced file is **never downloaded or uploaded**. + +--- + +## πŸš€ Updated Usage Workflow + +### Step 1: Track a Remote File +```bash +lfs-meta track-remote s3://my-bucket/data/foo.vcf \ + --path data/foo.vcf \ + --patient Patient/1234 \ + --specimen Specimen/XYZ +``` + +This command: +- Writes to `.lfs-meta/metadata.json` +- Extracts or validates remote file size, ETag, etc. via cloud APIs + +### Step 2: Skip `git add data/foo.vcf` β€” no file is present + +Instead, only the metadata is committed: + +```bash +git add .lfs-meta/metadata.json +git commit -m "Track remote file foo.vcf without downloading" +``` + +--- + +## βš™οΈ Updates to `README.md` + +You should update the **Usage Workflow** and **metadata.json example** in the README to include: + +### πŸ“¦ Track Remote File +```bash +lfs-meta track-remote s3://my-bucket/data/foo.vcf \ + --path data/foo.vcf \ + --patient Patient/1234 +``` + +### πŸ“ Sample `.lfs-meta/metadata.json` +```json +{ + "data/foo.vcf": { + "remote_url": "s3://my-bucket/data/foo.vcf", + "etag": "abc123etag", + "size": 12345678, + "patient": "Patient/1234" + } +} +``` + +### 🧬 Generate FHIR Metadata +```bash +lfs-meta init-meta \ + --input .lfs-meta/metadata.json \ + --output META/DocumentReference.ndjson +``` + +Generates: +```json +{ + "resourceType": "DocumentReference", + "content": [ + { + "attachment": { + "url": "s3://my-bucket/data/foo.vcf", + "title": "foo.vcf" + } + } + ], + "subject": { + "reference": "Patient/1234" + } +} +``` + +--- + +## βœ… Benefits + +| Feature | Description | +|--------------------------|-------------| +| ☁️ Remote index only | Tracks remote data without local storage | +| πŸ“‹ Auditable metadata | Commit metadata to Git without binary bloat | +| πŸ”„ Interoperable with Gen3| Downstream tools can consume this | +| πŸ” Permissions respected | No direct copy of sensitive files | + +--- + +# Credential management `track-remote` command obtains the credentials it needs to read metadata from remote object stores (e.g., S3, GCS, Azure Blob). + +--- + +## πŸ” Credential Handling for `track-remote` + +The `lfs-meta track-remote` command must authenticate to the cloud provider in order to retrieve metadata such as file size, ETag, or content type. This is done **without downloading the file**, using read-only **head/object metadata** APIs. + +Supported cloud providers (initial targets): +- βœ… AWS S3 +- βœ… Google Cloud Storage (GCS) +- βœ… Azure Blob Storage + +### πŸ”‘ Credential Lookup Strategy + +The command uses the following order of precedence to locate credentials: + +--- + +### πŸ“¦ AWS S3 + +| Method | Description | +|----------------------------|-------------| +| `AWS_PROFILE` | Use a named profile from `~/.aws/credentials` | +| `AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY` | Set directly as environment variables | +| EC2/ECS/IRSA IAM Roles | Automatically used in cloud environments with role-based access | + +> πŸ’‘ You can simulate this locally: +```bash +export AWS_PROFILE=aced-research +lfs-meta track-remote s3://my-bucket/data/foo.vcf --path data/foo.vcf +``` + +--- + +### 🌍 Google Cloud Storage (GCS) + +| Method | Description | +|------------------------------|-------------| +| `GOOGLE_APPLICATION_CREDENTIALS` | Path to a service account key JSON | +| gcloud CLI default credentials | Automatically picked up if `gcloud auth application-default login` is used | + +> πŸ’‘ Example: +```bash +export GOOGLE_APPLICATION_CREDENTIALS=~/gcs-access-key.json +lfs-meta track-remote gs://my-bucket/data/foo.vcf --path data/foo.vcf +``` + +--- + +### ☁️ Azure Blob Storage + +| Method | Description | +|------------------------------|-------------| +| `AZURE_STORAGE_CONNECTION_STRING` | Full connection string for access | +| `AZURE_STORAGE_ACCOUNT` + `AZURE_STORAGE_KEY` | Account name and key variables | +| Azure CLI login | Supports `az login` if the SDK allows fallback | + +--- + +### πŸ”§ Fallback + +If credentials are not detected automatically, `lfs-meta track-remote` should: +- Display a clear error message +- Suggest how to set environment variables +- Optionally support a `--credentials` flag for custom paths or credential profiles + +--- + diff --git a/docs/README-gitlfs-template-project.md b/docs/README-gitlfs-template-project.md new file mode 100644 index 0000000..cdb8249 --- /dev/null +++ b/docs/README-gitlfs-template-project.md @@ -0,0 +1,164 @@ +# git-lfs-meta-template + +> **Template Repository** +> +> This is a GitHub **template repository**. Click **"Use this template"** on GitHub to bootstrap a new project with Git LFS support, metadata tracking, and FHIR integration. + +A Git project archetype for managing large files with Git LFS + S3 and synchronizing metadata with FHIR DocumentReferences, supporting integration with Gen3 via `g3t`. Tool-agnostic: the `lfs-meta` utility can be written in **Python**, **Go**, or any language that conforms to expected input/output behavior. + +--- + +## 🌐 Project Layout + +```bash +git-lfs-meta-template/ +β”œβ”€β”€ .gitignore +β”œβ”€β”€ .gitattributes +β”œβ”€β”€ .lfs-meta/ +β”‚ └── metadata.json +β”œβ”€β”€ META/ +β”‚ └── DocumentReference.ndjson +β”‚ └── Patient.ndjson, etc... +β”œβ”€β”€ hooks/ +β”‚ └── pre-push +β”œβ”€β”€ lfs_meta/ # Optional directory for your implementation +β”‚ └── ... +β”œβ”€β”€ tests/ +β”‚ β”œβ”€β”€ unit/ +β”‚ └── integration/ +β”œβ”€β”€ requirements.txt # If Python is used +β”œβ”€β”€ go.mod # If Go is used +└── README.md +``` + +--- + +## πŸš€ Getting Started + +### 1. Use This Template on GitHub + +1. Go to the repository page on GitHub. +2. Click the green **"Use this template"** button. +3. Create a new repository from this template. + +Alternatively, clone the project manually: + +```bash +git clone https://github.com/YOUR_ORG/git-lfs-meta-template.git +cd git-lfs-meta-template +``` + +### 2. Install the `lfs-meta` Tool +Install the `lfs-meta` tool in your preferred language: + +#### Python +```bash +pip install -e . # assumes setup.py or pyproject.toml exists +``` + +#### Go +```bash +go build -o lfs-meta ./cmd/lfs-meta +mv lfs-meta /usr/local/bin/ +``` + +Ensure it's available on your `$PATH`: +```bash +which lfs-meta +``` + +--- + +## βš–οΈ Git LFS Setup +```bash +git lfs install +git lfs track "*.bin" +echo "*.bin filter=lfs diff=lfs merge=lfs -text" >> .gitattributes +``` + +--- + +## βš™οΈ Configure Git Hooks + +Enable the `pre-push` hook: +```bash +chmod +x hooks/pre-push +ln -s ../../hooks/pre-push .git/hooks/pre-push +``` + +This will validate `.lfs-meta/metadata.json` before push. + +--- + +## ⚑ Usage Workflow + +### Add a Large File +```bash +git add data/foo.vcf +git commit -m "Add file" +``` + +### Track Metadata +```bash +lfs-meta track data/foo.vcf --patient Patient/1234 --specimen Specimen/XYZ +``` + +### Generate FHIR Metadata +```bash +lfs-meta init-meta --output META/DocumentReference.ndjson --bucket s3://my-bucket +``` + +--- + +## βœ… Tests + +Run all tests: +```bash +pytest tests/ # If Python is used +# or +go test ./... # If Go is used +``` + +--- + +## 🌿 Example .lfs-meta/metadata.json +```json +{ + "data/foo.vcf": { + "patient": "Patient/1234", + "specimen": "Specimen/XYZ" + } +} +``` + +--- + +## πŸ”Ή Pre-Push Hook +```bash +#!/bin/bash +# hooks/pre-push + +if [ ! -f ".lfs-meta/metadata.json" ]; then + echo "[lfs-meta] No metadata file found. Skipping." + exit 0 +fi + +lfs-meta validate --file .lfs-meta/metadata.json || { + echo "[lfs-meta] Metadata validation failed" + exit 1 +} +``` + +--- + +## πŸ† Credits +- Inspired by `g3t meta init` (ACED-IDP) +- Custom LFS support with S3 via [lfs-s3](https://github.com/nicolas-graves/lfs-s3) + +--- + +## ✨ Future Extensions +- Auto-tag S3 objects with metadata +- Metadata schema validation + +--- diff --git a/docs/README-hybrid-oid.md b/docs/README-hybrid-oid.md new file mode 100644 index 0000000..aea7a3e --- /dev/null +++ b/docs/README-hybrid-oid.md @@ -0,0 +1,154 @@ +If the user **doesn't have the SHA256 hash** of the remote file (which Git LFS requires for the pointer), but they do have an **MD5 hash** or **ETag** (common in object stores like S3), then you can implement a **two-stage mapping approach** in your Git LFS custom transfer agent. + +--- + +## 🧠 Strategy: Use ETag or MD5 to Resolve to SHA256 + +> TODO - 🚧 this needs prototyping - completely untested 🚧 + +Instead of requiring the user to download the file, your system can: + +### πŸ”Ή 1. **Store metadata keyed by ETag or MD5** +```json +{ + "etag": "abc123etag", + "url": "https://mybucket.s3.amazonaws.com/file.bam", + "size": 1048576, + "sha256": null +} +``` + +### πŸ”Ή 2. **During transfer (download/upload):** +- Use ETag to identify the file. +- At the **first transfer**, download the file, compute SHA256 once, and cache it. +- Store the mapping: `etag β†’ sha256` +- Update the `.lfs-meta/.json` so it can be reused. + +--- + +## βœ… Workflow + +### βš™οΈ `git lfs track-remote` (No SHA256) + +```bash +git lfs track-remote data/file.bam \ + --url https://mybucket.s3.amazonaws.com/file.bam \ + --etag abc123etag \ + --size 1048576 +``` + +1. Writes: + - `data/file.bam` β†’ Git LFS pointer file with **temporary SHA** (placeholder) + - `.lfs-meta/etag/abc123etag.json` β†’ URL + metadata + +2. On `git lfs pull`: + - Transfer agent: + - Resolves `etag β†’ url` + - Downloads file + - Calculates `sha256` + - Rewrites `.git/lfs/objects/...` with correct SHA + - Creates `.lfs-meta/.json` for future use + +3. Subsequent pulls/commits: + - The SHA256 is known and directly used. + +--- + +## πŸ“ Directory Layout + +``` +repo/ +β”œβ”€β”€ .lfs-meta/ +β”‚ β”œβ”€β”€ etag/ +β”‚ β”‚ └── abc123etag.json # early metadata keyed by ETag +β”‚ └── sha256/ +β”‚ └── 6a7e3...json # full metadata keyed by SHA once known +└── file.bam # Git LFS pointer (eventually points to 6a7e3...) +``` + +--- + +## πŸ§‘β€πŸ’» Tips for Implementation + +- Use ETag or MD5 **as a temporary key** until the SHA256 is known. +- Populate `.lfs-meta` with: + - `etag β†’ url` + - `etag β†’ sha256` (once resolved) +- Optional: warn user if size mismatches during transfer +- You can support `track-remote` with: + ```bash + --etag abc123etag + --size 1048576 + ``` + +--- + +## πŸ” Cloud-Friendly Bonus + +For object stores like AWS S3: +- `HEAD` requests return `ContentLength` and `ETag` β€” no download needed. +- You can cache remote metadata efficiently. + +--- +If the user wants to **mix standard Git LFS files** (stored in a Git LFS server or local LFS cache) with **custom β€œremote” LFS files** (tracked via metadata like ETag/URL), the best approach is to **register multiple transfer agents and selectively route files** to the right agent based on their OID or file path. + +--- + +## 🧭 Strategy Overview + +1. **Standard LFS files** are handled by the default `basic` agent. +2. **Remote-tracked files** are handled by your custom agent (e.g., `remote`), using metadata like ETag or MD5. +3. Use **OID prefixes** (e.g., `etag-abc123`) or filename patterns to differentiate. + +--- + +## βœ… Use Custom OID Prefix (Recommended) + +### πŸ”‘ Idea: +When registering a remote file via `track-remote`, prefix its OID with `etag-` instead of a real SHA256. Your custom agent handles these, while standard files still use SHA-based OIDs. + +### `.gitconfig` +```ini +[lfs.customtransfer] + remote.path = python3 transfer_agent.py + +[lfs] + concurrenttransfers = 3 + tusTransferMaxRetries = 1 + transfer = remote,basic # order matters +``` + +### In `transfer_agent.py`, match only `etag-*` OIDs: + +```python +if cmd["event"] == "download" and cmd["operation"]["oid"].startswith("etag-"): + ... +``` + +### Standard files (with SHA256 OIDs) bypass this agent and fall back to `basic`. + +--- + +## πŸ” Hybrid Considerations + +| Concern | Standard LFS | Remote LFS (custom) | +|------------------------|--------------|----------------------| +| SHA256 available | Yes | Optional (resolved on pull) | +| Pointer format | Standard | Compatible, but custom `oid` | +| Transfer storage | Git LFS server | External (e.g., S3, HTTP) | +| Pull/Push supported | Yes | Yes (via agent) | +| Integrity verification | SHA256 | SHA256 (on first download) | + +--- + +## πŸš€ Summary + +| Use Case | Solution | +|----------------------------|-----------------------------------------------| +| Mixed LFS file support | Register multiple agents (`remote`, `basic`) | +| Route remote files | Use `oid` prefix like `etag-*` | +| Route standard files | Leave `oid` as normal SHA256 | +| Optional: path-based split | Use `.gitattributes` with multiple filters | + +--- + diff --git a/docs/README-release-test.md b/docs/README-release-test.md new file mode 100644 index 0000000..604338e --- /dev/null +++ b/docs/README-release-test.md @@ -0,0 +1,154 @@ +# πŸ§ͺ `lfs-meta` Pilot Test Script + +> Please follow the steps below to test core functionality of the `lfs-meta` tool. Report any issues to the project team via GitHub or the feedback form. + +--- + +## βœ… Prerequisites + +Before starting, ensure you have: + +- Access to a Git repo cloned from the `lfs-meta` template +- A working installation of the `lfs-meta` CLI +- Access to an S3, GCS, or Azure bucket (read-only) +- Python or Go runtime (depending on implementation) +- A `fence` endpoint (or staging Gen3 system) if testing user sync + +--- + +## 🧭 Part 0 – Track a Local File + +### 1.1 Track a local File + +```bash +git add data/test.vcf +``` + +βœ… Expected result: +- `.lfs-meta/metadata.json` is updated + + +--- + +## 🧭 Part 1 – Track a Remote File + +### 1.1 Track a Remote File + +```bash +lfs-meta track-remote s3://my-bucket/path/to/test.vcf \ + --path data/test.vcf \ + --patient Patient/1234 \ + --specimen Specimen/XYZ +``` + +βœ… Expected result: +- `.lfs-meta/metadata.json` is updated with `remote_url`, `size`, `etag`, etc. + +--- + +### 1.2 Commit the Metadata + +```bash +git add .lfs-meta/metadata.json +git commit -m "Track remote object test.vcf" +``` + +βœ… Expected result: +- Git diff shows new metadata +- No large file is downloaded or committed + +--- + +## 🧬 Part 2 – Generate FHIR Metadata + +### 2.1 Generate `DocumentReference.ndjson` + +```bash +lfs-meta init-meta \ + --input .lfs-meta/metadata.json \ + --output META/DocumentReference.ndjson \ + --bucket s3://my-bucket +``` + +βœ… Expected result: +- `META/DocumentReference.ndjson` is created +- File includes `Patient`, `Specimen`, and S3 URL as FHIR attachment + +--- + +### 2.2 Validate the Output + +```bash +lfs-meta validate-meta --file META/DocumentReference.ndjson +``` + +βœ… Expected result: +- β€œValidation passed” message (or warning if required fields are missing) + +--- + +## πŸ‘₯ Part 3 – Sync User Roles with Gen3 + +### 3.1 Create Access Config + +Create a YAML file at `.access.yaml`: + +```yaml +project_id: test-project +roles: + - username: alice@example.org + role: submitter + - username: bob@example.org + role: reader +``` + +βœ… Expected result: +- YAML is committed to Git and version-controlled + +--- + +### 3.2 Dry-Run the Sync + +```bash +lfs-meta sync-users --dry-run --input .access.yaml +``` + +βœ… Expected result: +- Diff is shown: who will be added/removed from Gen3 +- No changes are applied + +--- + +### 3.3 Apply the Sync (Optional) + +```bash +lfs-meta sync-users --input .access.yaml --confirm +``` + +βœ… Expected result: +- Users are updated in Gen3 +- Git commit acts as audit trail + +--- + +## πŸ“‹ Part 4 – Submit Feedback + +Please provide feedback on: + +- 🧠 Was the tool intuitive to use? +- 🧱 Did any commands fail or behave unexpectedly? +- πŸ“ Were the docs clear and complete? +- πŸ§ͺ Any bugs or unexpected behavior? + +➑ Submit GitHub Issues or fill out the pilot feedback form: +**[Feedback Form Link]** + +--- + +## πŸ’‘ Optional Tests + +- Try with GCS or Azure remote objects +- Test invalid metadata (missing patient/specimen) +- Clone the repo on another machine and repeat the workflow + +--- \ No newline at end of file diff --git a/docs/README-trackremote.md b/docs/README-trackremote.md new file mode 100644 index 0000000..d08576f --- /dev/null +++ b/docs/README-trackremote.md @@ -0,0 +1,97 @@ +If the user **doesn't have the SHA256 hash** of the remote file (which Git LFS requires for the pointer), but they do have an **MD5 hash** or **ETag** (common in object stores like S3), then you can implement a **two-stage mapping approach** in your Git LFS custom transfer agent. + +## πŸ” User-Friendly Bonus + +For object stores like AWS S3: +- `HEAD` requests return `ContentLength` and `ETag` β€” no download needed. +- You can cache remote metadata efficiently. +- User should just have to specify the url and the system can retrieve + + +--- + +## 🧠 Strategy: Use ETag or MD5 to Resolve to SHA256 + +Instead of requiring the user to download the file, the system can: + +### πŸ”Ή 1. **Store metadata keyed by ETag or MD5** +```json +{ + "etag": "abc123etag", + "url": "https://mybucket.s3.amazonaws.com/file.bam", + "size": 1048576, + "sha256": null +} +``` + +### πŸ”Ή 2. **During transfer (download/upload):** +- Use ETag to identify the file. +- At the **first transfer**, download the file, compute SHA256 once, and cache it. +- Store the mapping: `etag β†’ sha256` +- Update the `.lfs-meta/.json` so it can be reused. + +--- + +## βœ… Workflow + +### βš™οΈ `git lfs track-remote` (No SHA256) + +```bash +# user has attributes and specifies a local path +git lfs track-remote data/file.bam \ + --url https://mybucket.s3.amazonaws.com/file.bam \ + --etag abc123etag \ + --size 1048576 + +# user simply specifies a remote path +git lfs track-remote --url https://mybucket.s3.amazonaws.com/file.bam +# system HEADs url and retrieves: +# path = file.bam +# etag abc123etag +# size 1048576 +# TODO: specify where AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY AWS_DEFAULT_REGION are set + +# user specifies path and remote path +git lfs track-remote my-directory/my-name.bam --url https://mybucket.s3.amazonaws.com/file.bam + +``` + +1. Writes: + - `data/file.bam` β†’ Git LFS pointer file with **temporary SHA** (placeholder) + - `.lfs-meta/etag/abc123etag.json` β†’ URL + metadata + +2. On `git lfs pull`: + - Transfer agent: + - Resolves `etag β†’ url` + - Downloads file + - Calculates `sha256` + - Rewrites `.git/lfs/objects/...` with correct SHA + - Creates `.lfs-meta/.json` for future use + +3. Subsequent pulls/commits: + - If the file is intended to be stored in one of "our" buckets:The SHA256 is known and directly used. + - Otherwise, the transfer agent can still use the ETag to identify the file. + +--- + +## πŸ“ Directory Layout + +``` +repo/ +β”œβ”€β”€ .lfs-meta/ +β”‚ β”œβ”€β”€ etag/ +β”‚ β”‚ └── abc123etag.json # early metadata keyed by ETag +β”‚ └── sha256/ +β”‚ └── 6a7e3...json # full metadata keyed by SHA once known +└── file.bam # Git LFS pointer (eventually points to 6a7e3...) +``` + +--- + +## πŸ§‘β€πŸ’» Tips for Implementation + +- Use ETag or MD5 **as a temporary key** until the SHA256 is known. +- Populate `.lfs-meta` with: + - `etag β†’ url` + - `etag β†’ sha256` (once resolved) +- Optional: warn user if size mismatches during transfer diff --git a/docs/README-usage-story.md b/docs/README-usage-story.md new file mode 100644 index 0000000..44d72a9 --- /dev/null +++ b/docs/README-usage-story.md @@ -0,0 +1,200 @@ + +# Usage story + +This story tracks a propose usage pattern for an analyist interactive with +a Calypr project using the git plugin. In this case, the plugin is named git-drs, although +that may be modified in the future. + +In this case, there is an existing project defined at github.com/ohsu-comp-bio/test-project.git + +## Install plugin configuratin plugin + +```bash +$ git drs install +``` + +## clone a project + +```bash +$ git clone git@github.com:ohsu-comp-bio/test-project.git +``` + +At this point no file have been downloaded. A hidden folder tracks all document references. + +## List document references + +This lists all document references added to a project. A document reference includes: + - The name of the file + - The project relative path of the file + - The size of the file + - File identifiers, such as etags, MD5 or SHA256 + - An array of locations. This could include multiple URLs, file paths or other download methods + +```bash +$ git drs list +R ./my-data/sample1.bam +R ./my-data/sample1.bam.bai +L ./my-data/sample2.bam +M ./my-data/sample2.bam.bai +U ./my-data/sample1.vcf +G ./my-data/sample1.txt +``` + +Codes are: + +| code| meaning | +|-----|-------| +|R | Remote | +|L | Local | +|M | Modified | +|U | Untracked | +|G | Git tracked file| + + +## Download a file + +```bash +$ git drs ls ./my-data/simple1.bam +R ./my-data/simple1.bam +$ git drs pull ./my-data/sample1.bam +L ./my-data/simple1.bam +``` + +## Add a local file +```bash +$ git drs add ./my-data/simple1.vcf +M ./my-data/simple1.vcf +``` +In this version, the file is moved to a `Modified` state. The file will be uploaded to the default bucket for the project on `push`, at which point it will be changed to a `Local` state. + +Add a local file to a non default bucket +```bash +$ git drs add ./my-data/simple1.vcf -r alt-bucket +M ./my-data/simple1.vcf +``` + +## Add symlink + +Add a local file that is a symbolic link to a shared folder +```bash +$ git drs add ./my-data/simple1.vcf -l /mnt/shared/results/simple1.vcf +L ./my-data/simple1.vcf +``` +In this version, the file is added as a reference, but not pushed to a project repository. A +symlnk to the actual file is added to the project folder and the state is changed to Local. + +Add an existing S3 resource to the project +```bash +$ git drs add ./my-data/simple1.vcf --s3 forterra/my-bucket/results/simple1.vcf +L ./my-data/simple1.vcf +``` +This moves the file from the `Untracked` state to `Remote` state. + +## Add an Existing DRS object +```bash +$ git drs add ./my-data/simple1.vcf --drs drs://example.org/12345 +R ./my-data/simple1.vcf +``` + +# Push +move any files in the modified state to remote repositories +```bash +$ git drs ls ./data/ +R ./my-data/sample1.bam +R ./my-data/sample1.bam.bai +M ./my-data/simple1.vcf +$ git drs push +L ./my-data/simple1.vcf +``` + +# Remote management + +List repositories that are associated with project +```bash +$ git drs remote list +default gen3 calypr.ohsu.edu compbio/my-project +alternate s3 rgw.ohsu.edu MyLab +arc-local local arc.ohsu.edu,*.arc.ohsu.edu /mnt/shared/dir/ +anvil drs anvilproject.org +``` + +The output pattern is resource name, interface type, hostname, remote base path, with +default being the name of the default storage resource. +In the case of S3 objects, hostname is the server URL. For local storage entries, +the list of host names (comma delimited with * wildcards) is host names where the local file +storage should be valid + + +## Add remote server +```bash +$ git drs remote add gen3 origin compbio/my-project +``` +The base command `git drs remote add` takes the arguments +`type` `name` and `URL` + +## Add a DRS server +```bash +$ git drs remote add drs anvil https://drs.anvilproject.org +``` + +## Add a local shared folder +This is a common drive, often network attached, that holds large common files. +When files are added or pulled from this resource, symlinks are used to point to the +original file. +```bash +$ git drs remote add shared arc-data /mnt/shared/data +``` + +# DRS info +View the DRS record for a file +```bash +$ git drs info ./my-data/simple1.vcf +``` + +Should return something like: +```json +{ + "id": "drs://example.org/12345", + "name": "simple1.vcf", + "self_uri": "drs://example.org/12345", + "size": 2684354560, + "created_time": "2023-01-15T12:34:56Z", + "updated_time": "2023-06-20T14:22:10Z", + "version": "1.0", + "mime_type": "application/octet-stream", + "checksums": [ + { + "type": "md5", + "checksum": "1a79a4d60de6718e8e5b326e338ae533" + } + ], + "access_methods": [ + { + "type": "https", + "access_url": { + "url": "https://example.org/data/HG00096.bam" + }, + "region": "us-east-1" + } + ], + "description": "BAM file for sample HG00096 from the 1000 Genomes Project", + "aliases": [ + "1000G_HG00096_bam" + ], + "contents": [] +} +``` + + +## Storage +All DRS records will be in a folder under git top level directory in a folder named `.drs` + +```bash +$ find .drs/ +./drs/my-data/sample1.bam +./drs/my-data/sample1.bam.bai +./drs/my-data/sample2.bam +./drs/my-data/sample2.bam.bai +./drs/my-data/sample1.vcf +./drs/my-data/sample1.txt +``` diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..a27a009 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,115 @@ +# Documentation Index + +Welcome to the `git-gen3` documentation! Below is an index of the available documentation files in this directory. + +--- +## Overview + +Based on the current structure of the ACED Integrated Data Platform (IDP), which utilizes the `g3t` command-line tool for project creation, file uploads, and metadata association, it's advisable to refactor this monolithic approach into modular utilities. This will enhance maintainability, scalability, and facilitate targeted enhancements. + +--- + +## 🧱 Proposed Modular Architecture +Transitioning to a modular architecture involves decomposing the monolith into distinct utilities, each responsible for a specific domain + +### 1. **Project Management Utility** + +**Responsibilities:** + +- Initialize and manage project structures +- Handle user roles and permissions +- Integrate with git servers for audit trails project membership + + +**Implementation Suggestions:** + +- Develop a CLI tool, e.g., `auth-sync`, to manage project lifecycles +- Utilize configuration files (YAML/JSON) to define project metadata +- Integrate with Git for version control and collaboration + +### 2. **File Transfer Utility** + +**Responsibilities:** + +- Handle uploading and downloading of data files +- Support resumable transfers and integrity checks +- Manage storage backend interactions (e.g., S3, GCS) + +**Implementation Suggestions:** + +- Create a tools, e.g., `git-lfs extentions, git add/pull url`, to abstract file operations +- Incorporate support for various storage backends using plugins or adapters +- Implement checksum verification to ensure data integrity + +### 3. **Metadata Management Utility** + +**Responsibilities:** + +- Facilitate the creation, validation, and submission of metadata +- Transform metadata into required formats (e.g., FHIR resources) +- Ensure compliance with data standards and schemas +- See [General Discussion](https://github.com/bmeg/git-gen3/pull/3#issuecomment-2835614773) +- See [Bulk tagging](https://github.com/bmeg/git-gen3/pull/3#issuecomment-2835728266) + +**Implementation Suggestions:** + +- Develop a utility, e.g., `git meta init/validate/etc`, to manage metadata workflows +- Leverage existing tools like `g3t_etl` for data transformation +- Incorporate schema validation to enforce data quality + +--- + +## πŸ”„ Integration Strategy + +To ensure seamless operation between these utilities: + +* Establish a shared configuration system to maintain consistency across tool. +* Provide comprehensive documentation and user guides to facilitate adoption. + +--- + +## πŸ› οΈ Implementation Roadmap + +1. **Assessment Phase:** + - Evaluate the current monolithic system to identify components for extraction. + - Prioritize functionalities based on user needs and system dependencies. + +2. **Development Phase:** + - Iteratively develop and test each utility. + - Ensure backward compatibility where necessary. + +3. **Deployment Phase:** + - Roll out utilities to a subset of users for feedback. + - Monitor performance and gather user input for refinements. + +4. **Documentation and Training:** + - Update documentation to reflect the new modular structure. + - Conduct training sessions to familiarize users with the new tools. + + +## πŸ“š Documentation Files + +The following files provide **very rough, draft 🚧 ** information about the `git-gen3` project, its architecture, and its components: + +1. [README-comparison](README-comparison.md) + - A comparison of the `lfs-meta` tool with other tools and approaches. + - Discusses the advantages and disadvantages of each approach. + - Provides a summary of the key features and capabilities of `lfs-meta`. + - [DRS Useage Story](README-usage-story.md) + - [Comparison with git lfs](https://github.com/bmeg/git-gen3/pull/3#issuecomment-2835614773) +2. [Epics and Sprint Plan](README-epic.md) + - Overview of project goals, sprint breakdowns, and deliverables. +3. [Auth-sync](README-git-sync.md) + - Details on how to sync authentication and authorization with github/synpase/etc as the system of record for project membership. +4. [Git LFS project archetype](README-gitlfs-template-project.md) + - Exemplar user project structure +5. [Git LFS Metadata](README-gitlfs-meta.md) + - Information on how to manage metadata for large files in Git. +6. [Git LFS Remote Buckets](README-gitlfs-remote-buckets.md) + - Details for tracking remote files without downloading them. + - See also [Hybrid OID](README-hybrid-oid.md) for more information. +7. [Release Testing](README-release-test.md) + - Guidelines for testing the release process and ensuring functionality. + +## Overview +![](images/gen3-lfs.png) diff --git a/docs/images/gen3-lfs.png b/docs/images/gen3-lfs.png new file mode 100644 index 0000000..5da3f55 Binary files /dev/null and b/docs/images/gen3-lfs.png differ diff --git a/docs/images/gen3-lfs.txt b/docs/images/gen3-lfs.txt new file mode 100644 index 0000000..573ae6e --- /dev/null +++ b/docs/images/gen3-lfs.txt @@ -0,0 +1,76 @@ +title gen3-lfs +participant remote-bucket-or-filesystem +participant local-filesystem +actor user +participant git +participant gen3-lfs +participant git-sync +participant git-server +participant gen3 +participant gen3-managed-bucket +participant transfer-service +note right of transfer-service: globus, rsync + +alt project-init +user->git-server: create project, add collaborators +note right of git-sync: cron | web-hook +git-sync->git-server: harvest repo users and roles +git-sync->gen3: create project, add users +user->gen3: get credentials +end + +alt add-local +note right of user: upload local file to gen3-managed-bucket +user->git: add local-file +gen3-lfs->local-filesystem: read attributes +user->git: commit +git->gen3-lfs: create / update meta +user->git: push +git->gen3-lfs: hook: validate +git->gen3-lfs: "clean" +gen3-lfs->local-filesystem: read contents +gen3-lfs->gen3: index +gen3-lfs->gen3-managed-bucket: upload +end + +alt add-remote-index +note right of user: upload remote file to gen3 index only +user->git: add remote-file +gen3-lfs->remote-bucket-or-filesystem: read attributes +user->git: commit +git->gen3-lfs: create / update meta +user->git: push +git->gen3-lfs: hook: validate +git->gen3-lfs: "clean" +gen3-lfs->local-filesystem: read contents +gen3-lfs->gen3: index +end + +alt add-remote-content +note right of user: schedule remote file to gen3 managed bucket +user->git: add remote-file +gen3-lfs->remote-bucket-or-filesystem: read attributes +user->git: commit +git->gen3-lfs: create / update meta +user->git: push +git->gen3-lfs: hook: validate +git->gen3-lfs: "clean" +gen3-lfs->transfer-service: xfer request +gen3-lfs->gen3: index +transfer-service->remote-bucket-or-filesystem: xfer read +transfer-service->gen3-managed-bucket: xfer write +end + +alt read/download/pull +note right of user: git pull, website download +user->gen3: read index +alt index only +note right of user: if index only generate scp or bucket copy commands +end +alt gen3 managed +note right of user: download or pull +git->gen3-lfs: "smudge" +gen3-lfs->gen3-managed-bucket: read +gen3-lfs->local-filesystem: write +end +end \ No newline at end of file diff --git a/docs/images/github-sync-flowchart.png b/docs/images/github-sync-flowchart.png new file mode 100644 index 0000000..a72ac5c Binary files /dev/null and b/docs/images/github-sync-flowchart.png differ diff --git a/drs/README.md b/drs/README.md new file mode 100644 index 0000000..f18d028 --- /dev/null +++ b/drs/README.md @@ -0,0 +1,119 @@ + + +DRS OpenAPI definition + +```yaml +type: object +required: + - id + - self_uri + - size + - created_time + - checksums +properties: + id: + type: string + description: An identifier unique to this `DrsObject` + name: + type: string + description: |- + A string that can be used to name a `DrsObject`. + This string is made up of uppercase and lowercase letters, decimal digits, hyphen, period, and underscore [A-Za-z0-9.-_]. See http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_282[portable filenames]. + self_uri: + type: string + description: |- + A drs:// hostname-based URI, as defined in the DRS documentation, that tells clients how to access this object. + The intent of this field is to make DRS objects self-contained, and therefore easier for clients to store and pass around. For example, if you arrive at this DRS JSON by resolving a compact identifier-based DRS URI, the `self_uri` presents you with a hostname and properly encoded DRS ID for use in subsequent `access` endpoint calls. + example: + drs://drs.example.org/314159 + size: + type: integer + format: int64 + description: |- + For blobs, the blob size in bytes. + For bundles, the cumulative size, in bytes, of items in the `contents` field. + created_time: + type: string + format: date-time + description: |- + Timestamp of content creation in RFC3339. + (This is the creation time of the underlying content, not of the JSON object.) + updated_time: + type: string + format: date-time + description: >- + Timestamp of content update in RFC3339, identical to `created_time` in systems + that do not support updates. + (This is the update time of the underlying content, not of the JSON object.) + version: + type: string + description: >- + A string representing a version. + + (Some systems may use checksum, a RFC3339 timestamp, or an incrementing version number.) + mime_type: + type: string + description: A string providing the mime-type of the `DrsObject`. + example: + application/json + checksums: + type: array + minItems: 1 + items: + $ref: './Checksum.yaml' + description: >- + The checksum of the `DrsObject`. At least one checksum must be provided. + + For blobs, the checksum is computed over the bytes in the blob. + + For bundles, the checksum is computed over a sorted concatenation of the + checksums of its top-level contained objects (not recursive, names not included). + The list of checksums is sorted alphabetically (hex-code) before concatenation + and a further checksum is performed on the concatenated checksum value. + + For example, if a bundle contains blobs with the following checksums: + + md5(blob1) = 72794b6d + + md5(blob2) = 5e089d29 + + Then the checksum of the bundle is: + + md5( concat( sort( md5(blob1), md5(blob2) ) ) ) + + = md5( concat( sort( 72794b6d, 5e089d29 ) ) ) + + = md5( concat( 5e089d29, 72794b6d ) ) + + = md5( 5e089d2972794b6d ) + + = f7a29a04 + access_methods: + type: array + minItems: 1 + items: + $ref: './AccessMethod.yaml' + description: |- + The list of access methods that can be used to fetch the `DrsObject`. + Required for single blobs; optional for bundles. + contents: + type: array + description: >- + If not set, this `DrsObject` is a single blob. + + If set, this `DrsObject` is a bundle containing the listed `ContentsObject` s (some of which may be further nested). + items: + $ref: './ContentsObject.yaml' + description: + type: string + description: A human readable description of the `DrsObject`. + aliases: + type: array + items: + type: string + description: >- + A list of strings that can be used to find other metadata + about this `DrsObject` from external metadata sources. These + aliases can be used to represent secondary + accession numbers or external GUIDs. +``` \ No newline at end of file diff --git a/drs/object.go b/drs/object.go new file mode 100644 index 0000000..2199e3c --- /dev/null +++ b/drs/object.go @@ -0,0 +1,44 @@ +package drs + +type Checksum struct { + Checksum string `json:"checksum"` + Type string `json:"type"` +} + +type AccessURL struct { + URL string `json:"url"` + Headers []string `json:"headers"` +} + +type Authorizations struct { + //This structue is not stored in the file system +} + +type AccessMethod struct { + Type string `json:"type"` + AccessURL AccessURL `json:"access_url"` + AccessID string `json:"access_id,omitempty"` + Cloud string `json:"cloud,omitempty"` + Region string `json:"region,omitempty"` + Avalible string `json:"available,omitempty"` + Authorizations *Authorizations `json:"Authorizations,omitempty"` +} + +type Contents struct { +} + +type DRSObject struct { + Id string `json:"id"` + Name string `json:"name"` + SelfURL string `json:"self_url,omitempty"` + Size int64 `json:"size"` + CreatedTime string `json:"created_time,omitempty"` + UpdatedTime string `json:"updated_time,omitempty"` + Version string `json:"version,omitempty"` + MimeType string `json:"mime_type,omitempty"` + Checksums []Checksum `json:"checksums"` + AccessMethods []AccessMethod `json:"access_methods"` + Contents []Contents `json:"contents,omitempty"` + Description string `json:"description,omitempty"` + Aliases []string `json:"aliases,omitempty"` +} diff --git a/drs/util.go b/drs/util.go new file mode 100644 index 0000000..8bde784 --- /dev/null +++ b/drs/util.go @@ -0,0 +1,53 @@ +package drs + +import ( + "encoding/json" + "io/fs" + "os" + "path/filepath" + + "github.com/bmeg/git-drs/utils" +) + +const DRS_DIR = ".drs" + +type DrsWalkFunc func(path string, d *DRSObject) error + +func BaseDir() (string, error) { + gitTopLevel, err := utils.GitTopLevel() + if err != nil { + return "", err + } + return filepath.Join(gitTopLevel, DRS_DIR), nil +} + +type dirWalker struct { + baseDir string + userFunc DrsWalkFunc +} + +func (d *dirWalker) call(path string, dir fs.DirEntry, cErr error) error { + data, err := os.ReadFile(path) + if err != nil { + return nil + } + obj := DRSObject{} + err = json.Unmarshal(data, &obj) + if err != nil { + return err + } + relPath, err := filepath.Rel(d.baseDir, path) + if err != nil { + return err + } + return d.userFunc(relPath, &obj) +} + +func ObjectWalk(f DrsWalkFunc) error { + baseDir, err := BaseDir() + if err != nil { + return err + } + ud := dirWalker{baseDir, f} + return filepath.WalkDir(baseDir, ud.call) +} diff --git a/git-gen3.go b/git-drs.go similarity index 59% rename from git-gen3.go rename to git-drs.go index a6b9838..237cfd1 100644 --- a/git-gen3.go +++ b/git-drs.go @@ -1,15 +1,15 @@ package main import ( - "fmt" + "log" "os" - "github.com/bmeg/git-gen3/cmd" + "github.com/bmeg/git-drs/cmd" ) func main() { if err := cmd.RootCmd.Execute(); err != nil { - fmt.Println("Error:", err.Error()) + log.Println("Error:", err.Error()) os.Exit(1) } } diff --git a/go.mod b/go.mod index e9c6287..1743ff5 100644 --- a/go.mod +++ b/go.mod @@ -1,9 +1,36 @@ -module github.com/bmeg/git-gen3 +module github.com/bmeg/git-drs go 1.24.0 require ( + github.com/git-lfs/git-lfs/v3 v3.6.1 + github.com/spf13/cobra v1.9.1 + github.com/uc-cdis/gen3-client v0.0.23 + sigs.k8s.io/yaml v1.4.0 +) + +require ( + github.com/avast/retry-go v2.4.2+incompatible // indirect + github.com/git-lfs/gitobj/v2 v2.1.1 // indirect + github.com/git-lfs/pktline v0.0.0-20210330133718-06e9096e2825 // indirect + github.com/git-lfs/wildmatch/v2 v2.0.1 // indirect + github.com/google/go-github v17.0.0+incompatible // indirect + github.com/google/go-querystring v1.1.0 // indirect + github.com/hashicorp/go-version v1.4.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect - github.com/spf13/cobra v1.9.1 // indirect + github.com/leonelquinteros/gotext v1.5.0 // indirect + github.com/mattn/go-runewidth v0.0.13 // indirect + github.com/mitchellh/go-homedir v1.1.0 // indirect + github.com/pkg/errors v0.0.0-20170505043639-c605e284fe17 // indirect + github.com/rivo/uniseg v0.2.0 // indirect + github.com/rubyist/tracerx v0.0.0-20170927163412-787959303086 // indirect github.com/spf13/pflag v1.0.6 // indirect + github.com/tcnksm/go-latest v0.0.0-20170313132115-e3007ae9052e // indirect + golang.org/x/net v0.23.0 // indirect + golang.org/x/sys v0.18.0 // indirect + golang.org/x/text v0.14.0 // indirect + gopkg.in/cheggaaa/pb.v1 v1.0.28 // indirect + gopkg.in/ini.v1 v1.66.3 // indirect ) + +replace github.com/uc-cdis/gen3-client => ../cdis-data-client diff --git a/go.sum b/go.sum index ffae55e..e94a1a5 100644 --- a/go.sum +++ b/go.sum @@ -1,10 +1,106 @@ +github.com/alexbrainman/sspi v0.0.0-20210105120005-909beea2cc74 h1:Kk6a4nehpJ3UuJRqlA3JxYxBZEqCeOmATOvrbT4p9RA= +github.com/alexbrainman/sspi v0.0.0-20210105120005-909beea2cc74/go.mod h1:cEWa1LVoE5KvSD9ONXsZrj0z6KqySlCCNKHlLzbqAt4= +github.com/avast/retry-go v2.4.2+incompatible h1:+ZjCypQT/CyP0kyJO2EcU4d/ZEJWSbP8NENI578cPmA= +github.com/avast/retry-go v2.4.2+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dpotapov/go-spnego v0.0.0-20210315154721-298b63a54430 h1:oempk9HjNt6rVKyKmpdnoN7XABQv3SXLWu3pxUI7Vlk= +github.com/dpotapov/go-spnego v0.0.0-20210315154721-298b63a54430/go.mod h1:AVSs/gZKt1bOd2AhkhbS7Qh56Hv7klde22yXVbwYJhc= +github.com/git-lfs/git-lfs/v3 v3.6.1 h1:0RA2HzkMVl69KE5zCGY1PxqkDSbd/f/O7Du6CNkTYtY= +github.com/git-lfs/git-lfs/v3 v3.6.1/go.mod h1:1YO3nafGw2wKBR5LTZ7/LXJ7U7ELdvIGvcCBrLt6mfM= +github.com/git-lfs/gitobj/v2 v2.1.1 h1:tf/VU6zL1kxa3he+nf6FO/syX+LGkm6WGDsMpfuXV7Q= +github.com/git-lfs/gitobj/v2 v2.1.1/go.mod h1:q6aqxl6Uu3gWsip5GEKpw+7459F97er8COmU45ncAxw= +github.com/git-lfs/go-netrc v0.0.0-20210914205454-f0c862dd687a h1:6pskVZacdMUL93pCpMAYnMDLjH1yDFhssPYGe32sjdk= +github.com/git-lfs/go-netrc v0.0.0-20210914205454-f0c862dd687a/go.mod h1:70O4NAtvWn1jW8V8V+OKrJJYcxDLTmIozfi2fmSz5SI= +github.com/git-lfs/pktline v0.0.0-20210330133718-06e9096e2825 h1:riQhgheTL7tMF4d5raz9t3+IzoR1i1wqxE1kZC6dY+U= +github.com/git-lfs/pktline v0.0.0-20210330133718-06e9096e2825/go.mod h1:fenKRzpXDjNpsIBhuhUzvjCKlDjKam0boRAenTE0Q6A= +github.com/git-lfs/wildmatch/v2 v2.0.1 h1:Ds+aobrV5bK0wStILUOn9irllPyf9qrFETbKzwzoER8= +github.com/git-lfs/wildmatch/v2 v2.0.1/go.mod h1:EVqonpk9mXbREP3N8UkwoWdrF249uHpCUo5CPXY81gw= +github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-github v17.0.0+incompatible h1:N0LgJ1j65A7kfXrZnUDaYCs/Sf4rEjNlfyDHW9dolSY= +github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+umXrS52loVEgC2AApnigrVQ= +github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8= +github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU= +github.com/hashicorp/go-uuid v1.0.2 h1:cfejS+Tpcp13yd5nYHWDI6qVCny6wyX2Mt5SGur2IGE= +github.com/hashicorp/go-uuid v1.0.2/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/go-version v1.4.0 h1:aAQzgqIrRKRa7w75CKpbBxYsmUoPjzVm1W59ca1L0J4= +github.com/hashicorp/go-version v1.4.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8= +github.com/jcmturner/aescts/v2 v2.0.0/go.mod h1:AiaICIRyfYg35RUkr8yESTqvSy7csK90qZ5xfvvsoNs= +github.com/jcmturner/dnsutils/v2 v2.0.0 h1:lltnkeZGL0wILNvrNiVCR6Ro5PGU/SeBvVO/8c/iPbo= +github.com/jcmturner/dnsutils/v2 v2.0.0/go.mod h1:b0TnjGOvI/n42bZa+hmXL+kFJZsFT7G4t3HTlQ184QM= +github.com/jcmturner/gofork v1.0.0 h1:J7uCkflzTEhUZ64xqKnkDxq3kzc96ajM1Gli5ktUem8= +github.com/jcmturner/gofork v1.0.0/go.mod h1:MK8+TM0La+2rjBD4jE12Kj1pCCxK7d2LK/UM3ncEo0o= +github.com/jcmturner/goidentity/v6 v6.0.1 h1:VKnZd2oEIMorCTsFBnJWbExfNN7yZr3EhJAxwOkZg6o= +github.com/jcmturner/goidentity/v6 v6.0.1/go.mod h1:X1YW3bgtvwAXju7V3LCIMpY0Gbxyjn/mY9zx4tFonSg= +github.com/jcmturner/gokrb5/v8 v8.4.2 h1:6ZIM6b/JJN0X8UM43ZOM6Z4SJzla+a/u7scXFJzodkA= +github.com/jcmturner/gokrb5/v8 v8.4.2/go.mod h1:sb+Xq/fTY5yktf/VxLsE3wlfPqQjp0aWNYyvBVK62bc= +github.com/jcmturner/rpc/v2 v2.0.3 h1:7FXXj8Ti1IaVFpSAziCZWNzbNuZmnvw/i6CqLNdWfZY= +github.com/jcmturner/rpc/v2 v2.0.3/go.mod h1:VUJYCIDm3PVOEHw8sgt091/20OJjskO/YJki3ELg/Hc= +github.com/jmhodges/clock v1.2.0 h1:eq4kys+NI0PLngzaHEe7AmPT90XMGIEySD1JfV1PDIs= +github.com/jmhodges/clock v1.2.0/go.mod h1:qKjhA7x7u/lQpPB1XAqX1b1lCI/w3/fNuYpI/ZjLynI= +github.com/leonelquinteros/gotext v1.5.0 h1:ODY7LzLpZWWSJdAHnzhreOr6cwLXTAmc914FOauSkBM= +github.com/leonelquinteros/gotext v1.5.0/go.mod h1:OCiUVHuhP9LGFBQ1oAmdtNCHJCiHiQA8lf4nAifHkr0= +github.com/mattn/go-isatty v0.0.4 h1:bnP0vzxcAdeI1zdubAl5PjU6zsERjGZb7raWodagDYs= +github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= +github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU= +github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= +github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/olekukonko/ts v0.0.0-20171002115256-78ecb04241c0 h1:LiZB1h0GIcudcDci2bxbqI6DXV8bF8POAnArqvRrIyw= +github.com/olekukonko/ts v0.0.0-20171002115256-78ecb04241c0/go.mod h1:F/7q8/HZz+TXjlsoZQQKVYvXTZaFH4QRa3y+j1p7MS0= +github.com/pkg/errors v0.0.0-20170505043639-c605e284fe17 h1:chPfVn+gpAM5CTpTyVU9j8J+xgRGwmoDlNDLjKnJiYo= +github.com/pkg/errors v0.0.0-20170505043639-c605e284fe17/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/rubyist/tracerx v0.0.0-20170927163412-787959303086 h1:mncRSDOqYCng7jOD+Y6+IivdRI6Kzv2BLWYkWkdQfu0= +github.com/rubyist/tracerx v0.0.0-20170927163412-787959303086/go.mod h1:YpdgDXpumPB/+EGmGTYHeiW/0QVFRzBYTNFaxWfPDk4= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/ssgelm/cookiejarparser v1.0.1 h1:cRdXauUbOTFzTPJFaeiWbHnQ+tRGlpKKzvIK9PUekE4= +github.com/ssgelm/cookiejarparser v1.0.1/go.mod h1:DUfC0mpjIzlDN7DzKjXpHj0qMI5m9VrZuz3wSlI+OEI= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/tcnksm/go-latest v0.0.0-20170313132115-e3007ae9052e h1:IWllFTiDjjLIf2oeKxpIUmtiDV5sn71VgeQgg6vcE7k= +github.com/tcnksm/go-latest v0.0.0-20170313132115-e3007ae9052e/go.mod h1:d7u6HkTYKSv5m6MCKkOQlHwaShTMl3HjqSGW3XtVhXM= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= +golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= +golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= +golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= +golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/tools v0.0.0-20200221224223-e1da425f72fd/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/cheggaaa/pb.v1 v1.0.28 h1:n1tBJnnK2r7g9OW2btFH91V92STTUevLXYFb8gy9EMk= +gopkg.in/cheggaaa/pb.v1 v1.0.28/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw= +gopkg.in/ini.v1 v1.66.3 h1:jRskFVxYaMGAMUbN0UZ7niA9gzL9B49DOqE78vg0k3w= +gopkg.in/ini.v1 v1.66.3/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/utils/common.go b/utils/common.go new file mode 100644 index 0000000..cd016f2 --- /dev/null +++ b/utils/common.go @@ -0,0 +1,5 @@ +package utils + +const ( + DRS_DIR = ".drs" +) diff --git a/utils/util.go b/utils/util.go new file mode 100644 index 0000000..3f6d71a --- /dev/null +++ b/utils/util.go @@ -0,0 +1,34 @@ +package utils + +import ( + "bytes" + "os/exec" + "path/filepath" + "strings" +) + +func GitTopLevel() (string, error) { + path, err := SimpleRun([]string{"git", "rev-parse", "--show-toplevel"}) + path = strings.TrimSuffix(path, "\n") + return path, err +} + +func SimpleRun(cmds []string) (string, error) { + exePath, err := exec.LookPath(cmds[0]) + if err != nil { + return "", err + } + buf := &bytes.Buffer{} + cmd := exec.Command(exePath, cmds[1:]...) + cmd.Stdout = buf + err = cmd.Run() + return buf.String(), err +} + +func DrsTopLevel() (string, error) { + base, err := GitTopLevel() + if err != nil { + return "", err + } + return filepath.Join(base, DRS_DIR), nil +}