portainer
diff --git a/‎Gopkg.lock‎
Lines changed: 4 additions & 3 deletions b/‎Gopkg.lock‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎Gopkg.toml‎
Lines changed: 1 addition & 1 deletion b/‎Gopkg.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 105 additions & 4 deletions b/‎README.md‎
Lines changed: 105 additions & 4 deletions
diff --git a/‎agent.go‎
Lines changed: 2 additions & 2 deletions b/‎agent.go‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎cmd/agent/main.go‎
Lines changed: 20 additions & 23 deletions b/‎cmd/agent/main.go‎
Lines changed: 20 additions & 23 deletions
diff --git a/‎dev.sh‎
Lines changed: 3 additions & 5 deletions b/‎dev.sh‎
Lines changed: 3 additions & 5 deletions
diff --git a/‎docker/docker.go‎
Lines changed: 38 additions & 2 deletions b/‎docker/docker.go‎
Lines changed: 38 additions & 2 deletions
@@ -76,7 +76,7 @@
 
 [[constraint]]
   name = "github.com/portainer/libhttp"
-  version = "=1.0.1"
+  version = "=1.1.0"
 
 [[constraint]]
   name = "github.com/portainer/libcrypto"
 
@@ -10,6 +10,10 @@ Containers, networks, volumes and images are node specific resources, not cluste
 
 The purpose of the agent aims to allows previously node specific resources to be cluster-aware, all while keeping the Docker API request format. As aforementioned, this means that you only need to execute one Docker API request to retrieve all these resources from every node inside the cluster. In all bringing a better Docker user experience when managing Swarm clusters.
 
+## Security
+
+Here at Portainer, we believe in [responsible disclosure](https://en.wikipedia.org/wiki/Responsible_disclosure) of security issues. If you have found a security issue, please report it to <security@portainer.io>.
+
 ## Technical details
 
 The Portainer agent is basically a cluster of Docker API proxies. Deployed inside a Swarm cluster on each node, it allows the
@@ -22,6 +26,11 @@ At startup, the agent will communicate with the Docker node it is deployed on vi
 This implementation is using *serf* to form a cluster over a network, each agent requires an address where it will advertise its
 ability to be part of a cluster and a join address where it will be able to reach other agents.
 
+The agent retrieves the IP address it can use to create
+a cluster by inspecting the Docker networks associated to the agent container. If multiple networks are available, it will pickup the first network available and retrieve the IP address inside this network.
+
+Note: Be careful when deploying the agent to not deploy it inside the Swarm ingress network (by not using `mode=host` when exposing ports). This could lead to the agent being unable to create a cluster correctly, if picking the IP address inside the ingress network.
+
 ### Proxy
 
 The agent works as a proxy to the Docker API on which it is deployed as well as a proxy to the other agents inside the cluster.
@@ -72,6 +81,10 @@ The agent also exposes the following endpoints:
 * `/browse/put` (*POST*): Upload a file under a specific path on the filesytem
 * `/host/info` (*GET*): Get information about the underlying host system
 * `/ping` (*GET*): Returns a 204. Public endpoint that do not require any form of authentication
+* `/key` (*GET*): Returns the Edge key associated to the agent **only available when agent is started in Edge mode**
+* `/key` (*POST*): Set the Edge key on this agent **only available when agent is started in Edge mode**
+* `/websocket/attach` (*GET*): Websocket attach endpoint (for container console usage)
+* `/websocket/exec` (*GET*): Websocket exec endpoint (for container console usage)
 
 Note: The `/browse/*` endpoints can be used to manage a filesystem. By default, it allows manipulation of files in Docker volumes (available under `/var/run/docker/volumes` when bind-mounted in the agent container) but can also manipulate files anywhere on the filesystem. To enable global
 filesystem manipulation support for these endpoints, the `CAP_HOST_MANAGEMENT` environment variable must be set to `1`.
@@ -80,7 +93,87 @@ filesystem manipulation support for these endpoints, the `CAP_HOST_MANAGEMENT` e
 
 The agent API version is exposed via the `Portainer-Agent-API-Version` in each response of the agent.
 
-## Security
+## Using the agent in Edge mode
+
+The following information is only relevant for an Agent that was started in Edge mode.
+
+### Purpose
+
+The Edge mode is mainly used in the case of your remote environment being not in the same network as your Portainer instance. When started in Edge mode, the agent will reach out to the Portainer instance
+and will take care of creating a reverse tunnel allowing the Portainer instance to query it. It uses a token (Edge key) that contains the required information to connect to a specific Portainer instance.
+
+### Startup
+
+To start an agent in Edge mode, the `EDGE=1` environment variable must be set.
+
+Upon startup, the agent will try to retrieve an existing Edge key in the following order:
+
+* from the environment variables via the `EDGE_KEY` environment variable
+* from the filesystem (see the Edge key section below for more information about key persistence on disk)
+* from the cluster (if joining an existing Edge agent cluster)
+
+If no Edge key was retrieved, the agent will start a HTTP server where it will expose a UI to associate an Edge key. After associating a key via the UI, the UI server will shutdown.
+
+For security reasons, the Edge server UI will shutdown after 15 minutes if no key has been specified. The agent will require a restart in order
+to access the Edge UI again.
+
+### Edge key
+
+The Edge key is used by the agent to connect to a specific Portainer instance. It is encoded using base64 and contains the following information:
+
+* Portainer instance API URL
+* Portainer instance tunnel server address
+* Portainer instance tunnel server fingerprint
+* Endpoint identifier
+
+This information is represented in the following format before encoding (single string using the `|` character as a separator):
+
+```
+portainer_instance_url|tunnel_server_addr|tunnel_server_fingerprint|endpoint_ID
+```
+
+The Edge key associated to an agent will be persisted on disk after association under `/data/agent_edge_key`.
+
+### Polling
+
+After associating an Edge key to an agent, the agent will start polling the associated Portainer instance.
+
+It will use the Portainer instance API URL and the endpoint identifier available in the Edge key to build the poll request URL: `http(s)://API_URL/api/endpoints/ENDPOINT_ID/status`
+
+The response of the poll request contains the following information:
+
+* Tunnel status
+* Poll frequency
+* Tunnel port
+* Encrypted credentials
+* Schedules
+
+The tunnel status property can take one of the following values: `IDLE`, `REQUIRED`, `ACTIVE`. When this property is set to `REQUIRED`, the agent will
+create a reverse tunnel to the Portainer instance using the port specified in the response as well as the credentials.
+
+Each poll request sent to the Portainer instance contains the `X-PortainerAgent-EdgeID` header (with the value set to the Edge ID associated to the agent). This is used by the Portainer instance to associate an Edge ID to an endpoint so that an agent won't be able to poll information and join an Edge cluster by re-using an existing key without knowing the Edge ID.
+
+To allow for pre-staged environments, this Edge ID is associated to an endpoint by Portainer after receiving the first poll request from an agent.
+
+### Reverse tunnel
+
+The reverse tunnel is established by the agent. The permissions associated to the credentials are set on the Portainer instance, the credentials are valid for a management session and can only be used
+to create a reverse tunnel on a specific port (the one that is specified in the poll response).
+
+The agent will monitor the usage of the tunnel. The tunnel will be closed in any of the following cases:
+
+1. The status of the tunnel specified in the poll response is equal to `IDLE`
+2. If no activity has been registered on the tunnel (no requests executed against the agent API) after a specific amount of time (can be configured via `EDGE_INACTIVITY_TIMEOUT`, default to 5 minutes)
+
+### API server
+
+When deployed in Edge mode, the agent API is not exposed over HTTPS anymore (see Using the agent non Edge section below) because we're using SSH to setup an encrypted tunnel. In order to avoid potential security issues with agent deployment exposing the API port on their host, the agent won't expose the API server under 0.0.0.0. Instead, it will expose the API server on the same IP address that is used to advertise the cluster (usually, the container IP in the overlay network).
+
+This means that only a container deployed in the same overlay network as the agent will be able to query it.  
+
+## Using the agent (non Edge)
+
+The following information is only relevant for an Agent that was not started in Edge mode.
 
 ### Encryption
 
@@ -132,17 +225,25 @@ This mode will allow multiple instances of Portainer to connect to a single agen
 
 Note: Due to the fact that the agent will now decode and parse the public key associated to each request, this mode might be less performant than the default mode.
 
-
 ## Deployment options
 
 The behavior of the agent can be tuned via a set of mandatory and optional options available as environment variables:
 
 * AGENT_CLUSTER_ADDR (*mandatory*): address (in the IP:PORT format) of an existing agent to join the agent cluster. When deploying the agent as a Docker Swarm service,
 we can leverage the internal Docker DNS to automatically join existing agents or form a cluster by using `tasks.<AGENT_SERVICE_NAME>:<AGENT_PORT>` as the address.
-* AGENT_PORT (*optional*): port on which the agent web server will listen (default to `9001`).
-* CAP_HOST_MANAGEMENT (*optional*): enable advanced filesystem management features. Disabled by default, set to `1` to enable it.
+* AGENT_HOST (*optional*): address on which the agent API will be exposed (default to `0.0.0.0`)
+* AGENT_PORT (*optional*): port on which the agent API will be exposed (default to `9001`)
+* CAP_HOST_MANAGEMENT (*optional*): enable advanced filesystem management features. Disabled by default, set to `1` to enable it
 * AGENT_SECRET (*optional*): shared secret used in the signature verification process
 * LOG_LEVEL (*optional*): defines the log output verbosity (default to `INFO`)
+* EDGE (*optional*): enable Edge mode. Disabled by default, set to `1` to enable it
+* EDGE_KEY (*optional*): specify an Edge key to use at startup
+* EDGE_ID (*mandatory when EDGE=1*): a unique identifier associated to this agent cluster
+* EDGE_SERVER_HOST (*optional*): address on which the Edge UI will be exposed (default to `0.0.0.0`)
+* EDGE_SERVER_PORT (*optional*): port on which the Edge UI will be exposed (default to `80`).
+* EDGE_INACTIVITY_TIMEOUT (*optional*): timeout used by the agent to close the reverse tunnel after inactivity (default to `5m`)
+* EDGE_INSECURE_POLL (*optional*): enable this option if you need the agent to poll a HTTPS Portainer instance with self-signed certificates. Disabled by default, set to `1` to enable it
+
 
 For more information about deployment scenarios, see: https://portainer.readthedocs.io/en/stable/agent.html
 
 
@@ -14,7 +14,6 @@ type (
 		EdgeServerAddr        string
 		EdgeServerPort        string
 		EdgeInactivityTimeout string
-		EdgePollFrequency     string
 		EdgeInsecurePoll      bool
 		LogLevel              string
 	}
@@ -98,6 +97,7 @@ type (
 	InfoService interface {
 		GetInformationFromDockerEngine() (map[string]string, error)
 		GetContainerIpFromDockerEngine(containerName string) (string, error)
+		GetServiceNameFromDockerEngine(containerName string) (string, error)
 	}
 
 	// TLSService is used to create TLS certificates to use enable HTTPS.
@@ -138,7 +138,7 @@ type (
 
 const (
 	// Version represents the version of the agent.
-	Version = "1.4.0"
+	Version = "1.5.0"
 	// APIVersion represents the version of the agent's API.
 	APIVersion = "2"
 	// DefaultAgentAddr is the default address used by the Agent API server.
 
@@ -41,46 +41,57 @@ func main() {
 		log.Println("[INFO] [main] [message: Agent running on a Swarm cluster node. Running in cluster mode]")
 	}
 
-	if options.ClusterAddress == "" && clusterMode {
-		log.Fatalf("[ERROR] [main,configuration] [message: AGENT_CLUSTER_ADDR environment variable is required when deploying the agent inside a Swarm cluster]")
+	containerName, err := os.GetHostName()
+	if err != nil {
+		log.Fatalf("[ERROR] [main,os] [message: Unable to retrieve container name] [error: %s]", err)
 	}
 
-	advertiseAddr, err := retrieveAdvertiseAddress(&infoService)
+	advertiseAddr, err := infoService.GetContainerIpFromDockerEngine(containerName)
 	if err != nil {
-		log.Fatalf("[ERROR] [main,docker,os] [message: Unable to retrieve local agent IP address] [error: %s]", err)
+		log.Fatalf("[ERROR] [main,docker] [message: Unable to retrieve local agent IP address] [error: %s]", err)
 	}
 
 	var clusterService agent.ClusterService
 	if clusterMode {
 		clusterService = cluster.NewClusterService(agentTags)
 
+		clusterAddr := options.ClusterAddress
+		if clusterAddr == "" {
+			serviceName, err := infoService.GetServiceNameFromDockerEngine(containerName)
+			if err != nil {
+				log.Fatalf("[ERROR] [main,docker] [message: Unable to agent service name from Docker] [error: %s]", err)
+			}
+
+			clusterAddr = fmt.Sprintf("tasks.%s", serviceName)
+		}
+
 		// TODO: Workaround. looks like the Docker DNS cannot find any info on tasks.<service_name>
 		// sometimes... Waiting a bit before starting the discovery (at least 3 seconds) seems to solve the problem.
 		time.Sleep(3 * time.Second)
 
-		joinAddr, err := net.LookupIPAddresses(options.ClusterAddress)
+		joinAddr, err := net.LookupIPAddresses(clusterAddr)
 		if err != nil {
-			log.Fatalf("[ERROR] [main,net] [host: %s] [message: Unable to retrieve a list of IP associated to the host] [error: %s]", options.ClusterAddress, err)
+			log.Fatalf("[ERROR] [main,net] [host: %s] [message: Unable to retrieve a list of IP associated to the host] [error: %s]", clusterAddr, err)
 		}
 
 		err = clusterService.Create(advertiseAddr, joinAddr)
 		if err != nil {
 			log.Fatalf("[ERROR] [main,cluster] [message: Unable to create cluster] [error: %s]", err)
 		}
 
+		log.Printf("[DEBUG] [main,configuration] [agent_port: %s] [cluster_address: %s] [advertise_address: %s]", options.AgentServerPort, clusterAddr, advertiseAddr)
+
 		defer clusterService.Leave()
 	}
 
-	log.Printf("[DEBUG] [main,configuration] [agent_port: %s] [cluster_address: %s] [advertise_address: %s]", options.AgentServerPort, options.ClusterAddress, advertiseAddr)
-
 	var tunnelOperator agent.TunnelOperator
 	if options.EdgeMode {
 		apiServerAddr := fmt.Sprintf("%s:%s", advertiseAddr, options.AgentServerPort)
 
 		operatorConfig := &tunnel.OperatorConfig{
 			APIServerAddr:     apiServerAddr,
 			EdgeID:            options.EdgeID,
-			PollFrequency:     options.EdgePollFrequency,
+			PollFrequency:     agent.DefaultEdgePollInterval,
 			InactivityTimeout: options.EdgeInactivityTimeout,
 			InsecurePoll:      options.EdgeInsecurePoll,
 		}
@@ -281,17 +292,3 @@ func retrieveInformationFromDockerEnvironment(infoService agent.InfoService) (ma
 
 	return agentTags, nil
 }
-
-func retrieveAdvertiseAddress(infoService agent.InfoService) (string, error) {
-	containerName, err := os.GetHostName()
-	if err != nil {
-		return "", err
-	}
-
-	advertiseAddr, err := infoService.GetContainerIpFromDockerEngine(containerName)
-	if err != nil {
-		return "", err
-	}
-
-	return advertiseAddr, nil
-}
@@ -2,7 +2,7 @@
 
 LOG_LEVEL=DEBUG
 CAP_HOST_MANAGEMENT=1 #Enabled by default. Change this to anything else to disable this feature
-EDGE=1
+EDGE=0
 TMP="/tmp"
 GIT_COMMIT_HASH=`git rev-parse --short HEAD`
 GIT_BRANCH_NAME=`git rev-parse --abbrev-ref HEAD`
@@ -84,21 +84,19 @@ function deploy_swarm() {
 
   echo "Deployment..."
 
-  docker -H "${DOCKER_MANAGER}:2375" network create --driver overlay --attachable portainer-agent-dev-net
+  docker -H "${DOCKER_MANAGER}:2375" network create --driver overlay portainer-agent-dev-net
   docker -H "${DOCKER_MANAGER}:2375" service create --name portainer-agent-dev \
   --network portainer-agent-dev-net \
   -e LOG_LEVEL="${LOG_LEVEL}" \
   -e CAP_HOST_MANAGEMENT=${CAP_HOST_MANAGEMENT} \
   -e EDGE=${EDGE} \
   -e EDGE_ID=${EDGE_ID} \
-  -e AGENT_CLUSTER_ADDR=tasks.portainer-agent-dev \
   --mode global \
   --mount type=bind,src=//var/run/docker.sock,dst=/var/run/docker.sock \
   --mount type=bind,src=//var/lib/docker/volumes,dst=/var/lib/docker/volumes \
   --mount type=bind,src=//,dst=/host \
-  --publish mode=host,target=9001,published=9001 \
+  --publish target=9001,published=9001 \
   --publish mode=host,published=80,target=80 \
-  --restart-condition none \
   "${IMAGE_NAME}"
 
 #  --mount type=volume,src=portainer_agent_data,dst=/data \
 
@@ -5,10 +5,15 @@ import (
 	"errors"
 	"log"
 
+	"github.com/docker/docker/api/types"
 	"github.com/docker/docker/client"
 	"github.com/portainer/agent"
 )
 
+const (
+	serviceNameLabel = "com.docker.swarm.service.name"
+)
+
 // InfoService is a service used to retrieve information from a Docker environment.
 type InfoService struct{}
 
@@ -58,12 +63,43 @@ func (service *InfoService) GetContainerIpFromDockerEngine(containerName string)
 		return "", err
 	}
 
-	for _, network := range containerInspect.NetworkSettings.Networks {
+	if len(containerInspect.NetworkSettings.Networks) > 1 {
+		log.Printf("[WARN] [docker] [network_count: %d] [message: Agent container running in more than a single Docker network. This might cause communication issues]", len(containerInspect.NetworkSettings.Networks))
+	}
+
+	for networkName, network := range containerInspect.NetworkSettings.Networks {
+		networkInspect, err := cli.NetworkInspect(context.Background(), network.NetworkID, types.NetworkInspectOptions{})
+		if err != nil {
+			return "", err
+		}
+
+		if networkInspect.Ingress || networkInspect.Scope != "swarm" {
+			log.Printf("[DEBUG] [docker] [network_name: %s] [scope: %s] [ingress: %t] [message: Skipping invalid container network]", networkInspect.Name, networkInspect.Scope, networkInspect.Ingress)
+			continue
+		}
+
 		if network.IPAddress != "" {
-			log.Printf("[DEBUG] [docker] [network_count: %d] [ip_address: %s] [message: Retrieving IP address from container networks]", len(containerInspect.NetworkSettings.Networks), network.IPAddress)
+			log.Printf("[DEBUG] [docker] [ip_address: %s] [network_name: %s] [message: Retrieving IP address from container network]", network.IPAddress, networkName)
 			return network.IPAddress, nil
 		}
 	}
 
 	return "", errors.New("unable to retrieve the address on which the agent can advertise. Check your network settings")
 }
+
+// GetServiceNameFromDockerEngine is used to return the name of the Swarm service the agent is part of.
+// The service name is retrieved through container labels.
+func (service *InfoService) GetServiceNameFromDockerEngine(containerName string) (string, error) {
+	cli, err := client.NewClientWithOpts(client.FromEnv, client.WithVersion(agent.SupportedDockerAPIVersion))
+	if err != nil {
+		return "", err
+	}
+	defer cli.Close()
+
+	containerInspect, err := cli.ContainerInspect(context.Background(), containerName)
+	if err != nil {
+		return "", err
+	}
+
+	return containerInspect.Config.Labels[serviceNameLabel], nil
+}