-
Notifications
You must be signed in to change notification settings - Fork 807
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
buildah build
: use the same overlay for the context directory for the whole build
#5975
base: main
Are you sure you want to change the base?
Changes from all commits
fd5ce16
f6ab6d5
0bc20d2
efc7a88
2d7ce30
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
package imagebuildah | ||
|
||
import ( | ||
"errors" | ||
"fmt" | ||
"io/fs" | ||
"os" | ||
"path/filepath" | ||
"slices" | ||
|
||
"github.com/containers/buildah/define" | ||
"github.com/containers/buildah/internal/tmpdir" | ||
"github.com/containers/buildah/pkg/overlay" | ||
"github.com/containers/storage" | ||
"github.com/opencontainers/selinux/go-selinux/label" | ||
"github.com/sirupsen/logrus" | ||
"golang.org/x/sys/unix" | ||
) | ||
|
||
// platformSetupContextDirectoryOverlay() sets up an overlay _over_ the build | ||
// context directory, and sorts out labeling. Returns the location which | ||
// should be used as the default build context; the process label and mount | ||
// label for the build, if any; a boolean value that indicates whether we did, | ||
// in fact, mount an overlay; and a cleanup function which should be called | ||
// when the location is no longer needed (on success). Returned errors should | ||
// be treated as fatal. | ||
func platformSetupContextDirectoryOverlay(store storage.Store, options *define.BuildOptions) (string, string, string, bool, func(), error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Return values are a bit unwieldy, maybe a |
||
var succeeded bool | ||
var tmpDir, contentDir string | ||
cleanup := func() { | ||
if contentDir != "" { | ||
if err := overlay.CleanupContent(tmpDir); err != nil { | ||
logrus.Debugf("cleaning up overlay scaffolding for build context directory: %v", err) | ||
} | ||
} | ||
if tmpDir != "" { | ||
if err := os.Remove(tmpDir); err != nil && !errors.Is(err, fs.ErrNotExist) { | ||
logrus.Debugf("removing should-be-empty temporary directory %q: %v", tmpDir, err) | ||
} | ||
} | ||
} | ||
defer func() { | ||
if !succeeded { | ||
cleanup() | ||
} | ||
}() | ||
// double-check that the context directory location is an absolute path | ||
contextDirectoryAbsolute, err := filepath.Abs(options.ContextDirectory) | ||
if err != nil { | ||
return "", "", "", false, nil, fmt.Errorf("determining absolute path of %q: %w", options.ContextDirectory, err) | ||
} | ||
var st unix.Stat_t | ||
if err := unix.Stat(contextDirectoryAbsolute, &st); err != nil { | ||
return "", "", "", false, nil, fmt.Errorf("checking ownership of %q: %w", options.ContextDirectory, err) | ||
} | ||
// figure out the labeling situation | ||
processLabel, mountLabel, err := label.InitLabels(options.CommonBuildOpts.LabelOpts) | ||
if err != nil { | ||
return "", "", "", false, nil, err | ||
} | ||
// create a temporary directory | ||
tmpDir, err = os.MkdirTemp(tmpdir.GetTempDir(), "buildah-context-") | ||
if err != nil { | ||
return "", "", "", false, nil, fmt.Errorf("creating temporary directory: %w", err) | ||
} | ||
// create the scaffolding for an overlay mount under it | ||
contentDir, err = overlay.TempDir(tmpDir, 0, 0) | ||
if err != nil { | ||
return "", "", "", false, nil, fmt.Errorf("creating overlay scaffolding for build context directory: %w", err) | ||
} | ||
// mount an overlay that uses it as a lower | ||
overlayOptions := overlay.Options{ | ||
GraphOpts: slices.Clone(store.GraphOptions()), | ||
ForceMount: true, | ||
MountLabel: mountLabel, | ||
} | ||
targetDir := filepath.Join(contentDir, "target") | ||
contextDirMountSpec, err := overlay.MountWithOptions(contentDir, contextDirectoryAbsolute, targetDir, &overlayOptions) | ||
if err != nil { | ||
return "", "", "", false, nil, fmt.Errorf("creating overlay scaffolding for build context directory: %w", err) | ||
} | ||
// going forward, pretend that the merged directory is the actual context directory | ||
logrus.Debugf("mounted an overlay at %q over %q", contextDirMountSpec.Source, contextDirectoryAbsolute) | ||
succeeded = true | ||
return contextDirMountSpec.Source, processLabel, mountLabel, true, cleanup, nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
//go:build !linux | ||
|
||
package imagebuildah | ||
|
||
import ( | ||
"github.com/containers/buildah/define" | ||
"github.com/containers/storage" | ||
) | ||
|
||
// platformSetupContextDirectoryOverlay() should set up an overlay _over_ the | ||
// build context directory, and sort out labeling. Should return the location | ||
// which should be used as the default build context; the process label and | ||
// mount label for the build, if any; a boolean value that indicates whether we | ||
// did, in fact, mount an overlay; and a cleanup function which should be | ||
// called when the location is no longer needed (on success). Returned errors | ||
// should be treated as fatal. | ||
// TODO: currenty a no-op on this platform. | ||
func platformSetupContextDirectoryOverlay(store storage.Store, options *define.BuildOptions) (string, string, string, bool, func(), error) { | ||
return options.ContextDirectory, "", "", false, func() {}, nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ import ( | |
"github.com/containers/buildah/define" | ||
buildahdocker "github.com/containers/buildah/docker" | ||
"github.com/containers/buildah/internal" | ||
"github.com/containers/buildah/internal/sanitize" | ||
"github.com/containers/buildah/internal/tmpdir" | ||
internalUtil "github.com/containers/buildah/internal/util" | ||
"github.com/containers/buildah/pkg/parse" | ||
|
@@ -537,7 +538,7 @@ func (s *StageExecutor) performCopy(excludes []string, copies ...imagebuilder.Co | |
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (not commenting on the content of this commit) there's a typo in the commit subject There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dang it, that's embarrassing. Fixing. |
||
contextDir = mountPoint | ||
} | ||
// Original behaviour of buildah still stays true for COPY irrespective of additional context. | ||
// This isn't --from the build context directory, so we don't want to force everything to 0:0 | ||
preserveOwnership = true | ||
copyExcludes = excludes | ||
} else { | ||
|
@@ -606,9 +607,14 @@ func (s *StageExecutor) performCopy(excludes []string, copies ...imagebuilder.Co | |
} | ||
|
||
// Returns a map of StageName/ImageName:internal.StageMountDetails for the | ||
// items in the passed-in mounts list which include a "from=" value. | ||
// items in the passed-in mounts list which include a "from=" value. The "" | ||
// key in the returned map corresponds to the default build context. | ||
func (s *StageExecutor) runStageMountPoints(mountList []string) (map[string]internal.StageMountDetails, error) { | ||
stageMountPoints := make(map[string]internal.StageMountDetails) | ||
stageMountPoints[""] = internal.StageMountDetails{ | ||
MountPoint: s.executor.contextDir, | ||
IsWritesDiscardedOverlay: s.executor.contextDirWritesAreDiscarded, | ||
} | ||
for _, flag := range mountList { | ||
if strings.Contains(flag, "from") { | ||
tokens := strings.Split(flag, ",") | ||
|
@@ -638,7 +644,7 @@ func (s *StageExecutor) runStageMountPoints(mountList []string) (map[string]inte | |
if additionalBuildContext.IsImage { | ||
mountPoint, err := s.getImageRootfs(s.ctx, additionalBuildContext.Value) | ||
if err != nil { | ||
return nil, fmt.Errorf("%s from=%s: image found with that name", flag, from) | ||
return nil, fmt.Errorf("%s from=%s: image not found with that name", flag, from) | ||
} | ||
// The `from` in stageMountPoints should point | ||
// to `mountPoint` replaced from additional | ||
|
@@ -922,6 +928,29 @@ func (s *StageExecutor) UnrecognizedInstruction(step *imagebuilder.Step) error { | |
return errors.New(err) | ||
} | ||
|
||
// sanitizeFrom limits which image transports we'll accept. For those it | ||
// accepts which refer to filesystem objects, where relative path names are | ||
// evaluated relative to "contextDir", it will create a copy of the original | ||
// image, under "tmpdir", which contains no symbolic links, and return either | ||
// the original image reference or a reference to a sanitized copy which should | ||
// be used instead. | ||
func (s *StageExecutor) sanitizeFrom(from, tmpdir string) (newFrom string, err error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we document what is value contained in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adding. |
||
transportName, restOfImageName, maybeHasTransportName := strings.Cut(from, ":") | ||
if !maybeHasTransportName || transports.Get(transportName) == nil { | ||
if _, err = reference.ParseNormalizedNamed(from); err == nil { | ||
// this is a normal-looking image-in-a-registry-or-named-in-storage name | ||
return from, nil | ||
} | ||
if img, err := s.executor.store.Image(from); img != nil && err == nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. … is it expected that Containerfiles can refer to image IDs? That can potentially allow builds on a shared host to refer to images which the invoking user has no right to pull, if the invoking user can somehow determine the config digest. OTOH… I can imagine that there might be uses. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Internally, we rewrite references to nicknames of previous stages to their IDs before we get to this point, since they don't get names assigned to them. |
||
// this is an image ID | ||
return from, nil | ||
} | ||
return "", fmt.Errorf("parsing image name %q: %w", from, err) | ||
} | ||
// TODO: drop this part and just return an error... someday | ||
return sanitize.ImageName(transportName, restOfImageName, s.executor.contextDir, tmpdir) | ||
} | ||
|
||
// prepare creates a working container based on the specified image, or if one | ||
// isn't specified, the first argument passed to the first FROM instruction we | ||
// can find in the stage's parsed tree. | ||
|
@@ -938,6 +967,19 @@ func (s *StageExecutor) prepare(ctx context.Context, from string, initializeIBCo | |
} | ||
from = base | ||
} | ||
sanitizedDir, err := os.MkdirTemp(tmpdir.GetTempDir(), "buildah-context-") | ||
if err != nil { | ||
return nil, fmt.Errorf("creating temporary directory: %w", err) | ||
} | ||
defer func() { | ||
if err := os.RemoveAll(sanitizedDir); err != nil { | ||
logrus.Warn(err) | ||
} | ||
}() | ||
sanitizedFrom, err := s.sanitizeFrom(from, tmpdir.GetTempDir()) | ||
if err != nil { | ||
return nil, fmt.Errorf("invalid base image specification %q: %w", from, err) | ||
} | ||
displayFrom := from | ||
if ib.Platform != "" { | ||
displayFrom = "--platform=" + ib.Platform + " " + displayFrom | ||
|
@@ -976,7 +1018,7 @@ func (s *StageExecutor) prepare(ctx context.Context, from string, initializeIBCo | |
|
||
builderOptions := buildah.BuilderOptions{ | ||
Args: ib.Args, | ||
FromImage: from, | ||
FromImage: sanitizedFrom, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A small question since I did not understand it completely, why are are we creating copy of entire image for every stage ? Is it just to make sure no symbolic links are present ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Discussed here #5975 (comment) and again here #5975 (comment) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be fair to ask and see about how much overhead does this patch adds both I could be wrong but I could think of use-case where a stage copied lot of external artifacts but only parts of it are being used in further stages, this will end up creating copies for every future stage even though those parts are not being used by future stages. Should we consider making this behavior optional ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The disk cost is a second copy of the base image being copied to a temporary location.
I don't understand what you're describing here. Can you elaborate on that? Initializing a stage using a previous stage's result as its base doesn't involve the build context directory, so I don't follow where additional copies would appear.
Using an overlay over the build context directory is not something we should turn off. Turning off sharing like this PR introduces doesn't reduce any of the costs that are incurred when they're not shared, so I don't really see the benefit of adding complexity there. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I was thinking for use-case something like this FROM alpine as first
COPY /large-fileA-from-host .
COPY /large-fileB-from-host .
COPY /large-fileC-from-host .
FROM first as second
# Use only large-file-A in this stage
# but this will create another copy of `first` containing all three files
FROM first as third
# Use only large-file-B in this stage
# # but this will create another copy of `first` containing all three files
FROM first as fourth
# Use only large-file-C in this stage
# but this will create another copy of `first` containing all three files @nalind If i understand correctly for all the three stages There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The stages nicknamed "second", "third", and "fourth" would each start with their own working container based on the image that was committed at the end of the stage nicknamed "first", as before. This PR doesn't affect how we handle the root filesystems of working containers, or committing images. The COPY instructions have to read contents through the mounted overlay for consistency's sake, but I don't see additional copies of them being made here. Since they're not being modified, they'd be read directly from the build context directory that's being used as the "lowerdir" for the overlay, without being "copied up". There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah i see sounds fair to me. |
||
GroupAdd: s.executor.groupAdd, | ||
PullPolicy: pullPolicy, | ||
ContainerSuffix: s.executor.containerSuffix, | ||
|
@@ -1014,16 +1056,6 @@ func (s *StageExecutor) prepare(ctx context.Context, from string, initializeIBCo | |
return nil, fmt.Errorf("creating build container: %w", err) | ||
} | ||
|
||
// If executor's ProcessLabel and MountLabel is empty means this is the first stage | ||
// Make sure we share first stage's ProcessLabel and MountLabel with all other subsequent stages | ||
// Doing this will ensure and one stage in same build can mount another stage even if `selinux` | ||
// is enabled. | ||
|
||
if s.executor.mountLabel == "" && s.executor.processLabel == "" { | ||
s.executor.mountLabel = builder.MountLabel | ||
s.executor.processLabel = builder.ProcessLabel | ||
} | ||
|
||
if initializeIBConfig { | ||
volumes := map[string]struct{}{} | ||
for _, v := range builder.Volumes() { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit and not a blocker probably a todo for future: probably
buildDockerfilesOnce
should start consuming astruct
.