diff --git a/Dockerfile.docker-swarm-agent b/Dockerfile.docker-swarm-agent new file mode 100644 index 00000000..31c8816c --- /dev/null +++ b/Dockerfile.docker-swarm-agent @@ -0,0 +1,26 @@ +FROM golang:1.24 AS builder +ARG TARGETOS +ARG TARGETARCH +ARG VERSION +ARG COMMIT + +WORKDIR /workspace +COPY go.mod go.mod +COPY go.sum go.sum +RUN go mod download + +COPY api/ api/ +COPY cmd/agent/docker-swarm/ cmd/agent/docker-swarm/ +# doesn't exist (yet?) +# COPY pkg/ pkg/ +COPY internal/ internal/ +RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} \ + go build -a -o agent \ + -ldflags="-s -w -X github.com/glasskube/distr/internal/buildconfig.version=${VERSION:-snapshot} -X github.com/glasskube/distr/internal/buildconfig.commit=${COMMIT}" \ + ./cmd/agent/docker-swarm/ + +FROM docker:27.3.1-alpine3.20 +WORKDIR / +COPY --from=builder /workspace/agent . + +ENTRYPOINT ["/agent"] diff --git a/Makefile b/Makefile index 76e32bc1..5acf3ac2 100644 --- a/Makefile +++ b/Makefile @@ -73,6 +73,10 @@ docker-build-docker-agent: docker-build-kubernetes-agent: docker build -f Dockerfile.kubernetes-agent --tag ghcr.io/glasskube/distr/kubernetes-agent:$(VERSION) --build-arg VERSION=$(VERSION) --build-arg COMMIT=$(COMMIT) --network host . +.PHONY: docker-build-docker-swarm-agent +docker-build-docker-swarm-agent: + docker build -f Dockerfile.docker-swarm-agent --tag ghcr.io/glasskube/distr/docker-swarm-agent:$(VERSION) --build-arg VERSION=$(VERSION) --build-arg COMMIT=$(COMMIT) --network host . + .PHONY: docker-build docker-build: docker-build-hub docker-build-docker-agent docker-build-kubernetes-agent diff --git a/cmd/agent/docker-swarm/agent_deployment.go b/cmd/agent/docker-swarm/agent_deployment.go new file mode 100644 index 00000000..1e8054df --- /dev/null +++ b/cmd/agent/docker-swarm/agent_deployment.go @@ -0,0 +1,99 @@ +package main + +import ( + "encoding/json" + "errors" + "fmt" + "os" + "path" + + "github.com/glasskube/distr/api" + "github.com/google/uuid" +) + +type AgentDeployment struct { + ID uuid.UUID `json:"id"` + RevisionID uuid.UUID `json:"revisionId"` + ProjectName string `json:"projectName"` +} + +func (d *AgentDeployment) FileName() string { + return path.Join(agentDeploymentDir(), d.ID.String()) +} + +func agentDeploymentDir() string { + return path.Join(ScratchDir(), "deployments") +} + +func NewAgentDeployment(deployment api.DockerAgentDeployment) (*AgentDeployment, error) { + if name, err := getProjectName(deployment.ComposeFile); err != nil { + return nil, err + } else { + return &AgentDeployment{ID: deployment.ID, RevisionID: deployment.RevisionID, ProjectName: name}, nil + } +} + +func getProjectName(data []byte) (string, error) { + if compose, err := DecodeComposeFile(data); err != nil { + return "", err + } else if name, ok := compose["name"].(string); !ok { + return "", fmt.Errorf("name is not a string") + } else { + return name, nil + } +} + +func GetExistingDeployments() ([]AgentDeployment, error) { + if entries, err := os.ReadDir(agentDeploymentDir()); err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil, nil + } + return nil, err + } else { + fn := func(name string) (*AgentDeployment, error) { + if file, err := os.Open(path.Join(agentDeploymentDir(), name)); err != nil { + return nil, err + } else { + defer file.Close() + var d AgentDeployment + if err := json.NewDecoder(file).Decode(&d); err != nil { + return nil, err + } + return &d, nil + } + } + result := make([]AgentDeployment, 0, len(entries)) + for _, entry := range entries { + if !entry.IsDir() { + if d, err := fn(entry.Name()); err != nil { + return nil, err + } else { + result = append(result, *d) + } + } + } + return result, nil + } +} + +func SaveDeployment(deployment AgentDeployment) error { + if err := os.MkdirAll(path.Dir(deployment.FileName()), 0o700); err != nil { + return err + } + + file, err := os.Create(deployment.FileName()) + if err != nil { + return err + } + defer file.Close() + + if err := json.NewEncoder(file).Encode(deployment); err != nil { + return err + } + + return nil +} + +func DeleteDeployment(deployment AgentDeployment) error { + return os.Remove(deployment.FileName()) +} diff --git a/cmd/agent/docker-swarm/config.go b/cmd/agent/docker-swarm/config.go new file mode 100644 index 00000000..e3ab4f36 --- /dev/null +++ b/cmd/agent/docker-swarm/config.go @@ -0,0 +1,10 @@ +package main + +import "os" + +func ScratchDir() string { + if dir := os.Getenv("DISTR_AGENT_SCRATCH_DIR"); dir != "" { + return dir + } + return "./scratch" +} diff --git a/cmd/agent/docker-swarm/docker_actions.go b/cmd/agent/docker-swarm/docker_actions.go new file mode 100644 index 00000000..0cd5e7c3 --- /dev/null +++ b/cmd/agent/docker-swarm/docker_actions.go @@ -0,0 +1,123 @@ +package main + +import ( + "bufio" + "bytes" + "context" + "errors" + "fmt" + "os" + "os/exec" + "strings" + + "github.com/glasskube/distr/api" + "github.com/glasskube/distr/internal/agentauth" + "go.uber.org/zap" +) + +func cleanComposeFile(composeData []byte) []byte { + lines := strings.Split(string(composeData), "\n") + cleanedLines := make([]string, 0, 50) + + for _, line := range lines { + // Skip lines that define `name:` + if strings.HasPrefix(strings.TrimSpace(line), "name:") { + continue + } + cleanedLines = append(cleanedLines, line) + } + return []byte(strings.Join(cleanedLines, "\n")) +} +func parseEnvFile(envData []byte) (map[string]string, error) { + envVars := make(map[string]string) + scanner := bufio.NewScanner(bytes.NewReader(envData)) + for scanner.Scan() { + line := scanner.Text() + if strings.TrimSpace(line) == "" || strings.HasPrefix(line, "#") { + continue // Skip empty lines and comments + } + parts := strings.SplitN(line, "=", 2) + if len(parts) != 2 { + return nil, fmt.Errorf("invalid environment variable: %s", line) + } + envVars[parts[0]] = parts[1] + } + return envVars, scanner.Err() +} + +func ApplyComposeFileSwarm( + ctx context.Context, + deployment api.DockerAgentDeployment, +) (*AgentDeployment, string, error) { + agentDeployment, err := NewAgentDeployment(deployment) + if err != nil { + return nil, "", err + } + + // Ensure Docker Swarm is initialized + initCmd := exec.CommandContext(ctx, "docker", "info", "--format", "{{.Swarm.LocalNodeState}}") + initOutput, err := initCmd.CombinedOutput() + if err != nil { + logger.Error("Failed to check Docker Swarm state", zap.Error(err)) + return nil, "", fmt.Errorf("failed to check Docker Swarm state: %w", err) + } + + if !strings.Contains(strings.TrimSpace(string(initOutput)), "active") { + logger.Error("Docker Swarm not initialized", zap.String("output", string(initOutput))) + return nil, "", fmt.Errorf("docker Swarm not initialized: %s", string(initOutput)) + } + + // Read the Compose file as is, without replacing environment variables + cleanedCompose := cleanComposeFile(deployment.ComposeFile) + + // Run `docker stack deploy` + composeArgs := []string{"stack", "deploy", "-c", "-", agentDeployment.ProjectName} + cmd := exec.CommandContext(ctx, "docker", composeArgs...) + cmd.Stdin = bytes.NewReader(cleanedCompose) + cmd.Env = append(os.Environ(), agentauth.DockerConfigEnv(deployment.AgentDeployment)...) + // Add environment variables to the process + cmd.Env = os.Environ() + + // If an env file is provided, load its values into the command environment + if deployment.EnvFile != nil { + envVars, err := parseEnvFile(deployment.EnvFile) + if err != nil { + logger.Error("Failed to parse env file", zap.Error(err)) + return nil, "", fmt.Errorf("failed to parse env file: %w", err) + } + for key, value := range envVars { + cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", key, value)) + } + } + + // Execute the command and capture output + cmdOut, err := cmd.CombinedOutput() + statusStr := string(cmdOut) + + logger.Debug("docker stack deploy returned", zap.String("output", statusStr)) + + if err != nil { + logger.Error("Docker stack deploy failed", zap.String("output", statusStr)) + return nil, "", errors.New(statusStr) + } + + return agentDeployment, statusStr, nil +} + +func UninstallDockerSwarm(ctx context.Context, deployment AgentDeployment) error { + + cmd := exec.CommandContext(ctx, "docker", "stack", "rm", deployment.ProjectName) + out, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to remove Docker Swarm stack: %w: %v", err, string(out)) + } + + // Optional: Prune unused networks created by Swarm + pruneCmd := exec.CommandContext(ctx, "docker", "network", "prune", "-f") + pruneOut, pruneErr := pruneCmd.CombinedOutput() + if pruneErr != nil { + logger.Warn("Failed to prune networks", zap.String("output", string(pruneOut)), zap.Error(pruneErr)) + } + + return nil +} diff --git a/cmd/agent/docker-swarm/main.go b/cmd/agent/docker-swarm/main.go new file mode 100644 index 00000000..c643639e --- /dev/null +++ b/cmd/agent/docker-swarm/main.go @@ -0,0 +1,109 @@ +package main + +import ( + "context" + "os" + "os/signal" + "syscall" + "time" + + "github.com/glasskube/distr/internal/agentauth" + "github.com/glasskube/distr/internal/agentclient" + "github.com/glasskube/distr/internal/util" + "go.uber.org/multierr" + "go.uber.org/zap" +) + +var ( + interval = 5 * time.Second + logger = util.Require(zap.NewDevelopment()) + client = util.Require(agentclient.NewFromEnv(logger)) + agentVersionID = os.Getenv("DISTR_AGENT_VERSION_ID") +) + +func init() { + if intervalStr, ok := os.LookupEnv("DISTR_INTERVAL"); ok { + interval = util.Require(time.ParseDuration(intervalStr)) + } + if agentVersionID == "" { + logger.Warn("DISTR_AGENT_VERSION_ID is not set. self updates will be disabled") + } +} + +func main() { + ctx, cancel := context.WithCancel(context.Background()) + go func() { + sigint := make(chan os.Signal, 1) + signal.Notify(sigint, syscall.SIGTERM, syscall.SIGINT) + <-sigint + logger.Info("received termination signal") + cancel() + }() + tick := time.Tick(interval) +loop: + for ctx.Err() == nil { + select { + case <-tick: + case <-ctx.Done(): + break loop + } + + if resource, err := client.DockerResource(ctx); err != nil { + logger.Error("failed to get resource", zap.Error(err)) + } else { + if agentVersionID != "" { + if agentVersionID != resource.Version.ID.String() { + logger.Info("agent version has changed. starting self-update") + if err := RunAgentSelfUpdate(ctx); err != nil { + logger.Error("self update failed", zap.Error(err)) + // TODO: Support status without revision ID? + if resource.Deployment != nil { + if err := client.Status(ctx, resource.Deployment.RevisionID, "", err); err != nil { + logger.Error("failed to send status", zap.Error(err)) + } + } + } else { + logger.Info("self-update has been applied") + continue + } + } else { + logger.Debug("agent version is up to date") + } + } + + if deployments, err := GetExistingDeployments(); err != nil { + logger.Error("could not get existing deployments", zap.Error(err)) + } else { + for _, deployment := range deployments { + if resource.Deployment == nil || resource.Deployment.ID != deployment.ID { + logger.Info("uninstalling old deployment", zap.String("id", deployment.ID.String())) + if err := UninstallDockerSwarm(ctx, deployment); err != nil { + logger.Error("could not uninstall deployment", zap.Error(err)) + } else if err := DeleteDeployment(deployment); err != nil { + logger.Error("could not delete deployment", zap.Error(err)) + } + } + } + } + + if resource.Deployment == nil { + logger.Info("no deployment in resource response") + continue + } + + var agentDeployment *AgentDeployment + var status string + _, err = agentauth.EnsureAuth(ctx, resource.Deployment.AgentDeployment) + if err != nil { + logger.Error("docker auth error", zap.Error(err)) + } else if agentDeployment, status, err = ApplyComposeFileSwarm(ctx, *resource.Deployment); err == nil { + multierr.AppendInto(&err, SaveDeployment(*agentDeployment)) + } + + if statusErr := client.Status(ctx, resource.Deployment.RevisionID, status, err); statusErr != nil { + logger.Error("failed to send status", zap.Error(statusErr)) + } + } + } + logger.Info("shutting down") +} diff --git a/cmd/agent/docker-swarm/self_update.go b/cmd/agent/docker-swarm/self_update.go new file mode 100644 index 00000000..2d39f297 --- /dev/null +++ b/cmd/agent/docker-swarm/self_update.go @@ -0,0 +1,102 @@ +package main + +import ( + "context" + "errors" + "fmt" + "os" + "os/exec" + "path" + "strings" + + "gopkg.in/yaml.v3" +) + +func RunAgentSelfUpdate(ctx context.Context) error { + if manifest, err := client.Manifest(ctx); err != nil { + return fmt.Errorf("error fetching agent manifest: %w", err) + } else if parsedManifest, err := DecodeComposeFile(manifest); err != nil { + return fmt.Errorf("error parsing agent manifest: %w", err) + } else if err := PatchAgentManifest(parsedManifest); err != nil { + return fmt.Errorf("error patching agent manifest: %w", err) + } else if err := ApplyAgentComposeFile(ctx, parsedManifest); err != nil { + return fmt.Errorf("error applying agent manifest: %w", err) + } else { + return nil + } +} + +func PatchAgentManifest(manifest map[string]any) error { + if svcs, ok := manifest["services"].(map[string]any); ok { + if svc, ok := svcs["agent"].(map[string]any); ok { + if env, ok := svc["environment"].(map[string]any); ok { + env["DISTR_TARGET_SECRET"] = os.Getenv("DISTR_TARGET_SECRET") + } else { + return errors.New("env is not an object") + } + } else { + return errors.New("service \"agent\" is not an object") + } + } else { + return errors.New("services is not an object") + } + return nil +} + +func GetAgentImageFromManifest(manifest map[string]any) (string, error) { + if svcs, ok := manifest["services"].(map[string]any); ok { + if svc, ok := svcs["agent"].(map[string]any); ok { + if image, ok := svc["image"].(string); ok { + return image, nil + } else { + return "", errors.New("image is not a string") + } + } else { + return "", errors.New("service \"agent\" is not an object") + } + } else { + return "", errors.New("services is not an object") + } +} + +// ApplyAgentComposeFile runs the agent self-update in a separate docker container. +// This is necessary because if called by the agent directly, the "docker compose up" never +// finishes, leaving the installation in a broken state. +func ApplyAgentComposeFile(ctx context.Context, manifest map[string]any) error { + // I tried using something like "echo ... | base64 -d | docker compose ...", but I kept getting + // "filename too long" errors with that approach. + // It is therefore necessary to write the docker-compose.yaml data to a file instead. + // Because of how DinD works, this file, which is also mounted in the new container must be + // either on the host filesystem or in a shared volume. + file, err := os.Create(path.Join(ScratchDir(), "distr-update.yaml")) + if err != nil { + return err + } + if err := yaml.NewEncoder(file).Encode(manifest); err != nil { + file.Close() + return err + } + file.Close() + + // The self-update container uses the same image as the new agent. + // This should save some time and disk space on the host, but it means that we have to be + // careful about migrating to a different base image for the agent. + imageName, err := GetAgentImageFromManifest(manifest) + if err != nil { + return err + } + + cmd := exec.CommandContext(ctx, + "docker", "run", "--detach", "--rm", + "--entrypoint", "/usr/local/bin/docker-entrypoint.sh", + "--env", "HOST_DOCKER_CONFIG_DIR="+os.Getenv("HOST_DOCKER_CONFIG_DIR"), + // TODO: Not sure if it's correct to assume this will always be the correct container name, + // but AFAIK there is no reliable way to get the name of a container from the "inside" + "--volumes-from", "distr-agent-1", + imageName, + "docker", "compose", "-f", file.Name(), "up", "-d", + ) + out, err := cmd.CombinedOutput() + logger.Sugar().Infof("self-update output: %v", strings.TrimSpace(string(out))) + return err +} diff --git a/cmd/agent/docker-swarm/util.go b/cmd/agent/docker-swarm/util.go new file mode 100644 index 00000000..a9d4695d --- /dev/null +++ b/cmd/agent/docker-swarm/util.go @@ -0,0 +1,8 @@ +package main + +import "gopkg.in/yaml.v3" + +func DecodeComposeFile(manifest []byte) (result map[string]any, err error) { + err = yaml.Unmarshal(manifest, &result) + return +} diff --git a/internal/resources/embedded/agent/agent-swarm/v1/docker-compose.yaml b/internal/resources/embedded/agent/agent-swarm/v1/docker-compose.yaml new file mode 100644 index 00000000..08095c48 --- /dev/null +++ b/internal/resources/embedded/agent/agent-swarm/v1/docker-compose.yaml @@ -0,0 +1,23 @@ +name: distr +services: + agent: + network_mode: host + restart: unless-stopped + image: 'ghcr.io/glasskube/distr/docker-swarm-agent:{{ .agentVersion }}' + environment: + DISTR_TARGET_ID: '{{ .targetId }}' + DISTR_TARGET_SECRET: '{{ .targetSecret }}' + DISTR_LOGIN_ENDPOINT: '{{ .loginEndpoint }}' + DISTR_MANIFEST_ENDPOINT: '{{ .manifestEndpoint }}' + DISTR_RESOURCE_ENDPOINT: '{{ .resourcesEndpoint }}' + DISTR_STATUS_ENDPOINT: '{{ .statusEndpoint }}' + DISTR_INTERVAL: '{{ .agentInterval }}' + DISTR_AGENT_VERSION_ID: '{{ .agentVersionId }}' + DISTR_AGENT_SCRATCH_DIR: /scratch + HOST_DOCKER_CONFIG_DIR: ${HOST_DOCKER_CONFIG_DIR-${HOME}/.docker} + volumes: + - /var/run/docker.sock:/var/run/docker.sock + - scratch:/scratch + - ${HOST_DOCKER_CONFIG_DIR-${HOME}/.docker}:/root/.docker:ro +volumes: + scratch: