Monitoring updates to match replat

trangevi · trangevi · commit df10e4d8f3fb · 2026-03-12T13:57:07.000-07:00
Signed-off-by: trangevi &lt;trangevi@microsoft.com&gt;
diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/monitor.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/monitor.go
@@ -8,6 +8,9 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"io"
+	"os"
+	"strconv"
 
 	"github.com/azure/azure-dev/cli/azd/pkg/azdext"
 	"github.com/spf13/cobra"
@@ -18,6 +21,7 @@ type monitorFlags struct {
 	projectName string
 	name        string
 	version     string
+	sessionID   string
 	follow      bool
 	tail        int
 	logType     string
@@ -34,23 +38,24 @@ func newMonitorCommand() *cobra.Command {
 
 	cmd := &cobra.Command{
 		Use:   "monitor",
-		Short: "Monitor logs from a hosted agent container.",
-		Long: `Monitor logs from a hosted agent container.
+		Short: "Monitor logs from a hosted agent.",
+		Long: `Monitor logs from a hosted agent.
 
-Streams console output (stdout/stderr) or system events from an agent container.
+Streams console output (stdout/stderr) or system events from an agent session or container.
+Use --session to stream logs for a specific session, or omit it to use the container logstream.
 Use --follow to stream logs in real-time, or omit it to fetch recent logs and exit.
 This is useful for troubleshooting agent startup issues or monitoring agent behavior.`,
-		Example: `  # Fetch the last 50 lines of console logs
-  azd ai agent monitor --name my-agent --version 1
+		Example: `  # Stream session logs
+  azd ai agent monitor --name my-agent --version 1 --session <session-id>
 
-  # Stream console logs in real-time
-  azd ai agent monitor --name my-agent --version 1 --follow
+  # Stream session logs in real-time
+  azd ai agent monitor --name my-agent --version 1 --session <session-id> --follow
 
-  # Fetch system event logs
-  azd ai agent monitor --name my-agent --version 1 --type system
+  # Fetch container console logs (legacy)
+  azd ai agent monitor --name my-agent --version 1
 
-  # Fetch last 100 lines with explicit account
-  azd ai agent monitor --name my-agent --version 1 --tail 100 --account-name myAccount --project-name myProject`,
+  # Fetch system event logs from container (legacy)
+  azd ai agent monitor --name my-agent --version 1 --type system`,
 		RunE: func(cmd *cobra.Command, args []string) error {
 			if err := validateMonitorFlags(flags); err != nil {
 				return err
@@ -64,6 +69,14 @@ This is useful for troubleshooting agent startup issues or monitoring agent beha
 				return err
 			}
 
+			// When vnext is enabled, resolve session ID for session-based logstream.
+			if flags.sessionID == "" {
+				sessionID, vnext := resolveMonitorSession(ctx, flags.name)
+				if vnext {
+					flags.sessionID = sessionID
+				}
+			}
+
 			action := &MonitorAction{
 				AgentContext: agentContext,
 				flags:        flags,
@@ -77,6 +90,7 @@ This is useful for troubleshooting agent startup issues or monitoring agent beha
 	cmd.Flags().StringVarP(&flags.projectName, "project-name", "p", "", "AI Foundry project name")
 	cmd.Flags().StringVarP(&flags.name, "name", "n", "", "Name of the hosted agent (required)")
 	cmd.Flags().StringVarP(&flags.version, "version", "v", "", "Version of the hosted agent (required)")
+	cmd.Flags().StringVarP(&flags.sessionID, "session", "s", "", "Session ID to stream logs for")
 	cmd.Flags().BoolVarP(&flags.follow, "follow", "f", false, "Stream logs in real-time")
 	cmd.Flags().IntVarP(&flags.tail, "tail", "l", 50, "Number of trailing log lines to fetch (1-300)")
 	cmd.Flags().StringVarP(&flags.logType, "type", "t", "console", "Type of logs: 'console' (stdout/stderr) or 'system' (container events)")
@@ -94,15 +108,28 @@ func (a *MonitorAction) Run(ctx context.Context) error {
 		return err
 	}
 
-	body, err := agentClient.GetAgentContainerLogStream(
-		ctx,
-		a.Name,
-		a.Version,
-		DefaultAgentAPIVersion,
-		a.flags.logType,
-		a.flags.tail,
-		a.flags.follow,
-	)
+	var body io.ReadCloser
+	if a.flags.sessionID != "" {
+		fmt.Fprintf(os.Stderr, "Streaming session logs for %s (session: %s)...\n", a.Name, a.flags.sessionID)
+		body, err = agentClient.GetAgentSessionLogStream(
+			ctx,
+			a.Name,
+			a.Version,
+			a.flags.sessionID,
+			"2025-11-15-preview",
+			a.flags.follow,
+		)
+	} else {
+		body, err = agentClient.GetAgentContainerLogStream(
+			ctx,
+			a.Name,
+			a.Version,
+			DefaultAgentAPIVersion,
+			a.flags.logType,
+			a.flags.tail,
+			a.flags.follow,
+		)
+	}
 	if err != nil {
 		// Suppress context deadline/cancellation errors (expected in non-follow timeout and Ctrl+C)
 		if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
@@ -141,3 +168,42 @@ func validateMonitorFlags(flags *monitorFlags) error {
 
 	return nil
 }
+
+// resolveMonitorSession checks if vnext is enabled and resolves the session ID
+// from the .foundry-agent.json file. Returns the session ID and whether vnext is enabled.
+// If vnext is not enabled or the session cannot be resolved, returns empty string and false.
+func resolveMonitorSession(ctx context.Context, agentName string) (string, bool) {
+	azdClient, err := azdext.NewAzdClient()
+	if err != nil {
+		return "", false
+	}
+	defer azdClient.Close()
+
+	// Check if vnext is enabled
+	vnextValue := ""
+	azdEnv, err := loadAzdEnvironment(ctx, azdClient)
+	if err == nil {
+		vnextValue = azdEnv["enableHostedAgentVNext"]
+	}
+	if vnextValue == "" {
+		vnextValue = os.Getenv("enableHostedAgentVNext")
+	}
+	enabled, err := strconv.ParseBool(vnextValue)
+	if err != nil || !enabled {
+		return "", false
+	}
+
+	// Resolve session ID from .foundry-agent.json
+	configPath, err := resolveConfigPath(ctx, azdClient)
+	if err != nil {
+		return "", true
+	}
+	agentCtx := loadLocalContext(configPath)
+	if agentCtx.Sessions != nil {
+		if sid, ok := agentCtx.Sessions[agentName]; ok {
+			return sid, true
+		}
+	}
+
+	return "", true
+}
diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/agent_api/operations.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/agent_api/operations.go
@@ -802,6 +802,7 @@ func (c *AgentClient) GetAgentContainerLogStream(
 	query.Set("api-version", apiVersion)
 	query.Set("kind", kind)
 	query.Set("tail", strconv.Itoa(tail))
+	query.Set("follow", strconv.FormatBool(follow))
 	u.RawQuery = query.Encode()
 
 	requestURL := u.String()
@@ -845,11 +846,12 @@ func (c *AgentClient) GetAgentContainerLogStream(
 	}
 
 	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(io.LimitReader(resp.Body, 1024))
 		_ = resp.Body.Close()
 		if cancel != nil {
 			cancel()
 		}
-		return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
+		return nil, fmt.Errorf("unexpected status code: %d — %s", resp.StatusCode, string(body))
 	}
 
 	// Wrap the body to cancel the context timeout when closed.
@@ -871,6 +873,76 @@ func (r *cancelOnCloseReader) Close() error {
 	return r.ReadCloser.Close()
 }
 
+// GetAgentSessionLogStream streams logs from an agent session.
+// This uses the session-based logstream endpoint for vnext agent configurations.
+func (c *AgentClient) GetAgentSessionLogStream(
+	ctx context.Context,
+	agentName, agentVersion, sessionID, apiVersion string,
+	follow bool,
+) (io.ReadCloser, error) {
+	u, err := url.Parse(c.endpoint)
+	if err != nil {
+		return nil, fmt.Errorf("invalid endpoint URL: %w", err)
+	}
+
+	u.Path += fmt.Sprintf("/agents/%s/versions/%s/sessions/%s:logstream", agentName, agentVersion, sessionID)
+
+	query := u.Query()
+	query.Set("api-version", apiVersion)
+	query.Set("follow", strconv.FormatBool(follow))
+	u.RawQuery = query.Encode()
+
+	requestURL := u.String()
+	token, err := c.credential.GetToken(ctx, policy.TokenRequestOptions{
+		Scopes: []string{"https://ai.azure.com/.default"},
+	})
+	if err != nil {
+		return nil, fmt.Errorf("failed to get auth token: %w", err)
+	}
+
+	requestCtx := ctx
+	var cancel context.CancelFunc
+	if !follow {
+		requestCtx, cancel = context.WithTimeout(ctx, 5*time.Second)
+	}
+
+	req, err := http.NewRequestWithContext(requestCtx, http.MethodGet, requestURL, nil)
+	if err != nil {
+		if cancel != nil {
+			cancel()
+		}
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+
+	req.Header.Set("Authorization", "Bearer "+token.Token)
+	req.Header.Set("User-Agent", fmt.Sprintf("azd-ext-azure-ai-agents/%s", version.Version))
+
+	httpClient := &http.Client{}
+	//nolint:gosec // request URL is built from trusted SDK endpoint + path components
+	resp, err := httpClient.Do(req)
+	if err != nil {
+		if cancel != nil {
+			cancel()
+		}
+		return nil, fmt.Errorf("HTTP request failed: %w", err)
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(io.LimitReader(resp.Body, 1024))
+		_ = resp.Body.Close()
+		if cancel != nil {
+			cancel()
+		}
+		return nil, fmt.Errorf("unexpected status code: %d — %s", resp.StatusCode, string(body))
+	}
+
+	if cancel != nil {
+		return &cancelOnCloseReader{ReadCloser: resp.Body, cancel: cancel}, nil
+	}
+
+	return resp.Body, nil
+}
+
 // GetAgentContainerOperation retrieves the status of a container operation
 func (c *AgentClient) GetAgentContainerOperation(ctx context.Context, agentName, operationID, apiVersion string) (*AgentContainerOperationObject, error) {
 	url := fmt.Sprintf("%s/agents/%s/operations/%s?api-version=%s", c.endpoint, agentName, operationID, apiVersion)