Skip to content

Commit 0fc2bed

Browse files
committed
tar: Add the WithIgnore option.
This option allows to exclude certain files from extraction. This is going to be used by `flux diff artifact` to only extract "interesting" files from an archive for comparison with another source. See also: fluxcd/flux2#4916 Signed-off-by: Florian Forster <[email protected]>
1 parent a703510 commit 0fc2bed

File tree

5 files changed

+111
-2
lines changed

5 files changed

+111
-2
lines changed

tar/go.mod

+12-1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,15 @@ module github.com/fluxcd/pkg/tar
22

33
go 1.22.0
44

5-
require github.com/cyphar/filepath-securejoin v0.2.4
5+
require (
6+
github.com/cyphar/filepath-securejoin v0.2.4
7+
github.com/go-git/go-git/v5 v5.12.0
8+
)
9+
10+
require (
11+
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect
12+
github.com/go-git/go-billy/v5 v5.5.0 // indirect
13+
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect
14+
golang.org/x/net v0.22.0 // indirect
15+
gopkg.in/warnings.v0 v0.1.2 // indirect
16+
)

tar/go.sum

+13
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,15 @@
11
github.com/cyphar/filepath-securejoin v0.2.4 h1:Ugdm7cg7i6ZK6x3xDF1oEu1nfkyfH53EtKeQYTC3kyg=
22
github.com/cyphar/filepath-securejoin v0.2.4/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
3+
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 h1:+zs/tPmkDkHx3U66DAb0lQFJrpS6731Oaa12ikc+DiI=
4+
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376/go.mod h1:an3vInlBmSxCcxctByoQdvwPiA7DTK7jaaFDBTtu0ic=
5+
github.com/go-git/go-billy/v5 v5.5.0 h1:yEY4yhzCDuMGSv83oGxiBotRzhwhNr8VZyphhiu+mTU=
6+
github.com/go-git/go-billy/v5 v5.5.0/go.mod h1:hmexnoNsr2SJU1Ju67OaNz5ASJY3+sHgFRpCtpDCKow=
7+
github.com/go-git/go-git/v5 v5.12.0 h1:7Md+ndsjrzZxbddRDZjF14qK+NN56sy6wkqaVrjZtys=
8+
github.com/go-git/go-git/v5 v5.12.0/go.mod h1:FTM9VKtnI2m65hNI/TenDDDnUf2Q9FHnXYjuz9i5OEY=
9+
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A=
10+
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo=
11+
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
12+
golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc=
13+
golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
14+
gopkg.in/warnings.v0 v0.1.2 h1:wFXVbFY8DY5/xOe1ECiWdKCzZlxgshcYVNkBHstARME=
15+
gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI=

tar/tar.go

+8
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"time"
2323

2424
securejoin "github.com/cyphar/filepath-securejoin"
25+
"github.com/go-git/go-git/v5/plumbing/format/gitignore"
2526
)
2627

2728
const (
@@ -45,6 +46,9 @@ type tarOpts struct {
4546

4647
// skipGzip skip gzip reader an un-tar a plain tar file.
4748
skipGzip bool
49+
50+
// ignoreMatcher allows to exclude specific files from extraction.
51+
ignoreMatcher gitignore.Matcher
4852
}
4953

5054
// Untar reads the gzip-compressed tar file from r and writes it into dir.
@@ -121,6 +125,10 @@ func Untar(r io.Reader, dir string, inOpts ...TarOption) (err error) {
121125
fi := f.FileInfo()
122126
mode := fi.Mode()
123127

128+
if opts.ignore(f.Name, mode.IsDir()) {
129+
continue
130+
}
131+
124132
switch {
125133
case mode.IsRegular():
126134
// Make the directory. This is redundant because it should

tar/tar_opts.go

+25
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@ limitations under the License.
1616

1717
package tar
1818

19+
import (
20+
"strings"
21+
22+
"github.com/go-git/go-git/v5/plumbing/format/gitignore"
23+
)
24+
1925
// TarOption represents options to be applied to Tar.
2026
type TarOption func(*tarOpts)
2127

@@ -41,8 +47,27 @@ func WithSkipGzip() TarOption {
4147
}
4248
}
4349

50+
// WithIgnore allows to exclude certain files from being extracted.
51+
func WithIgnore(m gitignore.Matcher) TarOption {
52+
return func(t *tarOpts) {
53+
t.ignoreMatcher = m
54+
}
55+
}
56+
4457
func (t *tarOpts) applyOpts(tarOpts ...TarOption) {
4558
for _, clientOpt := range tarOpts {
4659
clientOpt(t)
4760
}
4861
}
62+
63+
// ignore is a convenience function around t.ignoreMatcher.Match(). It handles
64+
// the absense of a matcher gracefully and takes care of splitting the path into
65+
// its components. The `path` argument must be a slash-delimited path, i.e. the
66+
// file name from the tar archive *before* it gets converted to a filepath.
67+
func (t *tarOpts) ignore(path string, isDir bool) bool {
68+
if t.ignoreMatcher == nil {
69+
return false
70+
}
71+
72+
return t.ignoreMatcher.Match(strings.Split(path, "/"), isDir)
73+
}

tar/tar_test.go

+53-1
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,14 @@ import (
2121
"bytes"
2222
"compress/gzip"
2323
"crypto/rand"
24+
"errors"
2425
"fmt"
26+
"io/fs"
2527
"os"
2628
"path/filepath"
2729
"testing"
30+
31+
"github.com/go-git/go-git/v5/plumbing/format/gitignore"
2832
)
2933

3034
type untarTestCase struct {
@@ -35,6 +39,8 @@ type untarTestCase struct {
3539
content []byte
3640
wantErr string
3741
maxUntarSize int
42+
ignore gitignore.Matcher
43+
wantNotExist bool
3844
}
3945

4046
func TestUntar(t *testing.T) {
@@ -128,6 +134,39 @@ func TestUntar(t *testing.T) {
128134
targetDir: symlink,
129135
wantErr: fmt.Sprintf(`dir '%s' must be a directory`, symlink),
130136
},
137+
{
138+
name: "ignore",
139+
fileName: "file1",
140+
content: geRandomContent(256),
141+
targetDir: targetDirOutput,
142+
secureTargetDir: targetDirOutput,
143+
ignore: gitignore.NewMatcher([]gitignore.Pattern{
144+
gitignore.ParsePattern("file1", nil),
145+
}),
146+
wantNotExist: true,
147+
},
148+
{
149+
name: "ignore does not match",
150+
fileName: "file1",
151+
content: geRandomContent(256),
152+
targetDir: targetDirOutput,
153+
secureTargetDir: targetDirOutput,
154+
ignore: gitignore.NewMatcher([]gitignore.Pattern{
155+
gitignore.ParsePattern("no_match", nil),
156+
}),
157+
wantNotExist: false,
158+
},
159+
{
160+
name: "ignore with glob",
161+
fileName: "path/to/file.ignored",
162+
content: geRandomContent(256),
163+
targetDir: targetDirOutput,
164+
secureTargetDir: targetDirOutput,
165+
ignore: gitignore.NewMatcher([]gitignore.Pattern{
166+
gitignore.ParsePattern("*.ignored", nil),
167+
}),
168+
wantNotExist: true,
169+
},
131170
}
132171

133172
for _, tt := range cases {
@@ -143,6 +182,9 @@ func TestUntar(t *testing.T) {
143182
if tt.maxUntarSize != 0 {
144183
opts = append(opts, WithMaxUntarSize(tt.maxUntarSize))
145184
}
185+
if tt.ignore != nil {
186+
opts = append(opts, WithIgnore(tt.ignore))
187+
}
146188

147189
err = Untar(f, tt.targetDir, opts...)
148190
var got string
@@ -161,11 +203,21 @@ func TestUntar(t *testing.T) {
161203
if tt.wantErr == "" {
162204
abs := filepath.Join(tt.secureTargetDir, tt.fileName)
163205
fi, err := os.Stat(abs)
164-
if err != nil {
206+
207+
gotNotExist := errors.Is(err, fs.ErrNotExist)
208+
if err != nil && gotNotExist != tt.wantNotExist {
165209
t.Errorf("stat %q: %v", abs, err)
166210
return
167211
}
168212

213+
if !gotNotExist && tt.wantNotExist {
214+
t.Errorf("os.Stat(%q) = (%v, nil), want %v", abs, fi, fs.ErrNotExist)
215+
}
216+
217+
if tt.wantNotExist {
218+
return
219+
}
220+
169221
if fi.Size() != int64(len(tt.content)) {
170222
t.Errorf("file size wanted: %d got: %d", len(tt.content), fi.Size())
171223
}

0 commit comments

Comments
 (0)