diff --git a/.gitignore b/.gitignore
index 27e2f7832c..18d44ceb8a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,9 @@
 *.so
 *.dylib
 
+# Avoid checking in keys
+*.pem
+
 # Test binary, built with `go test -c`
 *.test
 
diff --git a/deploy/legacy/manifests/controller/helm/retina/templates/daemonset.yaml b/deploy/legacy/manifests/controller/helm/retina/templates/daemonset.yaml
index af4b4e91ea..f61e8bfb44 100644
--- a/deploy/legacy/manifests/controller/helm/retina/templates/daemonset.yaml
+++ b/deploy/legacy/manifests/controller/helm/retina/templates/daemonset.yaml
@@ -214,6 +214,15 @@ spec:
                 fieldRef:
                   apiVersion: v1
                   fieldPath: status.hostIP
+          livenessProbe:
+            httpGet:
+              path: /metrics
+              port: {{ .Values.retinaPort }}
+            initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds | default "30" }}
+            periodSeconds: {{ .Values.livenessProbe.periodSeconds | default "30" }}
+            timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds | default "1" }}
+            failureThreshold: {{ .Values.livenessProbe.failureThreshold | default "3" }}
+            successThreshold: {{ .Values.livenessProbe.successThreshold | default "1" }}
           securityContext:
             capabilities:
               add:
diff --git a/test/e2e/README.md b/test/e2e/README.md
index 06a4581b36..b16ef854fb 100644
--- a/test/e2e/README.md
+++ b/test/e2e/README.md
@@ -17,3 +17,19 @@ For reference, see the `test-all` recipe in the root [Makefile](../../Makefile).
 
 For sample test, please check out:
 [the Retina E2E.](./scenarios/retina/drop/scenario.go)
+
+## Sample VSCode `settings.json` for running with existing cluster
+
+```json
+"go.testFlags": [
+    "-v",
+    "-timeout=40m",
+    "-tags=e2e",
+    "-args",
+    "-create-infra=false",
+    "-delete-infra=false",
+    "-image-namespace=retistrynamespace",
+    "-image-registry=yourregistry",
+    "-image-tag=yourtesttag",
+],
+```
diff --git a/test/e2e/framework/kubernetes/create-kapinger-deployment.go b/test/e2e/framework/kubernetes/create-kapinger-deployment.go
index 60d082b7c3..087ae25451 100644
--- a/test/e2e/framework/kubernetes/create-kapinger-deployment.go
+++ b/test/e2e/framework/kubernetes/create-kapinger-deployment.go
@@ -107,6 +107,9 @@ func (c *CreateKapingerDeployment) GetKapingerDeployment() *appsv1.Deployment {
 				},
 
 				Spec: v1.PodSpec{
+					NodeSelector: map[string]string{
+						"kubernetes.io/os": "linux",
+					},
 					Affinity: &v1.Affinity{
 						PodAntiAffinity: &v1.PodAntiAffinity{
 							// prefer an even spread across the cluster to avoid scheduling on the same node
diff --git a/test/e2e/framework/kubernetes/exec-pod.go b/test/e2e/framework/kubernetes/exec-pod.go
index 2991a90aa2..03728ddbda 100644
--- a/test/e2e/framework/kubernetes/exec-pod.go
+++ b/test/e2e/framework/kubernetes/exec-pod.go
@@ -1,6 +1,7 @@
 package kubernetes
 
 import (
+	"bytes"
 	"context"
 	"fmt"
 	"log"
@@ -9,6 +10,7 @@ import (
 
 	v1 "k8s.io/api/core/v1"
 	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
 	"k8s.io/client-go/tools/clientcmd"
 	"k8s.io/client-go/tools/remotecommand"
 	"k8s.io/kubectl/pkg/scheme"
@@ -27,7 +29,17 @@ func (e *ExecInPod) Run() error {
 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
 
-	err := ExecPod(ctx, e.KubeConfigFilePath, e.PodNamespace, e.PodName, e.Command)
+	config, err := clientcmd.BuildConfigFromFlags("", e.KubeConfigFilePath)
+	if err != nil {
+		return fmt.Errorf("error building kubeconfig: %w", err)
+	}
+
+	clientset, err := kubernetes.NewForConfig(config)
+	if err != nil {
+		return fmt.Errorf("error creating Kubernetes client: %w", err)
+	}
+
+	_, err = ExecPod(ctx, clientset, config, e.PodNamespace, e.PodName, e.Command)
 	if err != nil {
 		return fmt.Errorf("error executing command [%s]: %w", e.Command, err)
 	}
@@ -43,17 +55,8 @@ func (e *ExecInPod) Stop() error {
 	return nil
 }
 
-func ExecPod(ctx context.Context, kubeConfigFilePath, namespace, podName, command string) error {
-	config, err := clientcmd.BuildConfigFromFlags("", kubeConfigFilePath)
-	if err != nil {
-		return fmt.Errorf("error building kubeconfig: %w", err)
-	}
-
-	clientset, err := kubernetes.NewForConfig(config)
-	if err != nil {
-		return fmt.Errorf("error creating Kubernetes client: %w", err)
-	}
-
+func ExecPod(ctx context.Context, clientset *kubernetes.Clientset, config *rest.Config, namespace, podName, command string) ([]byte, error) {
+	log.Printf("executing command \"%s\" on pod \"%s\" in namespace \"%s\"...", command, podName, namespace)
 	req := clientset.CoreV1().RESTClient().Post().Resource("pods").Name(podName).
 		Namespace(namespace).SubResource(ExecSubResources)
 	option := &v1.PodExecOptions{
@@ -69,20 +72,21 @@ func ExecPod(ctx context.Context, kubeConfigFilePath, namespace, podName, comman
 		scheme.ParameterCodec,
 	)
 
+	var buf bytes.Buffer
 	exec, err := remotecommand.NewSPDYExecutor(config, "POST", req.URL())
 	if err != nil {
-		return fmt.Errorf("error creating executor: %w", err)
+		return buf.Bytes(), fmt.Errorf("error creating executor: %w", err)
 	}
 
-	log.Printf("executing command \"%s\" on pod \"%s\" in namespace \"%s\"...", command, podName, namespace)
 	err = exec.StreamWithContext(ctx, remotecommand.StreamOptions{
 		Stdin:  os.Stdin,
-		Stdout: os.Stdout,
-		Stderr: os.Stderr,
+		Stdout: &buf,
+		Stderr: &buf,
 	})
 	if err != nil {
-		return fmt.Errorf("error executing command: %w", err)
+		return buf.Bytes(), fmt.Errorf("error executing command: %w", err)
 	}
 
-	return nil
+	res := buf.Bytes()
+	return res, nil
 }
diff --git a/test/e2e/framework/kubernetes/get-logs.go b/test/e2e/framework/kubernetes/get-logs.go
index 7cd728af85..a57680ce84 100644
--- a/test/e2e/framework/kubernetes/get-logs.go
+++ b/test/e2e/framework/kubernetes/get-logs.go
@@ -12,9 +12,16 @@ import (
 	"k8s.io/client-go/tools/clientcmd"
 )
 
-func PrintPodLogs(kubeconfigpath, namespace, labelSelector string) {
+type GetPodLogs struct {
+	KubeConfigFilePath string
+	Namespace          string
+	LabelSelector      string
+}
+
+func (p *GetPodLogs) Run() error {
+	fmt.Printf("printing pod logs for namespace: %s, labelselector: %s\n", p.Namespace, p.LabelSelector)
 	// Load the kubeconfig file to get the configuration to access the cluster
-	config, err := clientcmd.BuildConfigFromFlags("", kubeconfigpath)
+	config, err := clientcmd.BuildConfigFromFlags("", p.KubeConfigFilePath)
 	if err != nil {
 		log.Printf("error building kubeconfig: %s\n", err)
 	}
@@ -25,8 +32,14 @@ func PrintPodLogs(kubeconfigpath, namespace, labelSelector string) {
 		log.Printf("error creating clientset: %s\n", err)
 	}
 
+	PrintPodLogs(context.Background(), clientset, p.Namespace, p.LabelSelector)
+
+	return nil
+}
+
+func PrintPodLogs(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelSelector string) {
 	// List all the pods in the namespace
-	pods, err := clientset.CoreV1().Pods(namespace).List(context.Background(), metav1.ListOptions{
+	pods, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
 		LabelSelector: labelSelector,
 	})
 	if err != nil {
@@ -55,5 +68,6 @@ func PrintPodLogs(kubeconfigpath, namespace, labelSelector string) {
 
 		// Print the logs
 		log.Println(string(buf))
+		fmt.Printf("#######################################################\n")
 	}
 }
diff --git a/test/e2e/framework/kubernetes/install-retina-helm.go b/test/e2e/framework/kubernetes/install-retina-helm.go
index 82f10745de..e5507b7ef9 100644
--- a/test/e2e/framework/kubernetes/install-retina-helm.go
+++ b/test/e2e/framework/kubernetes/install-retina-helm.go
@@ -1,6 +1,7 @@
 package kubernetes
 
 import (
+	"context"
 	"fmt"
 	"log"
 	"os"
@@ -11,10 +12,12 @@ import (
 	"helm.sh/helm/v3/pkg/action"
 	"helm.sh/helm/v3/pkg/chart/loader"
 	"helm.sh/helm/v3/pkg/cli"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/tools/clientcmd"
 )
 
 const (
-	createTimeout = 240 * time.Second // windpws is slow
+	createTimeout = 20 * time.Minute // windows is slow
 	deleteTimeout = 60 * time.Second
 )
 
@@ -32,6 +35,8 @@ type InstallHelmChart struct {
 }
 
 func (i *InstallHelmChart) Run() error {
+	ctx, cancel := context.WithTimeout(context.Background(), createTimeout)
+	defer cancel()
 	settings := cli.New()
 	settings.KubeConfig = i.KubeConfigFilePath
 	actionConfig := new(action.Configuration)
@@ -97,7 +102,7 @@ func (i *InstallHelmChart) Run() error {
 	client.WaitForJobs = true
 
 	// install the chart here
-	rel, err := client.Run(chart, chart.Values)
+	rel, err := client.RunWithContext(ctx, chart, chart.Values)
 	if err != nil {
 		return fmt.Errorf("failed to install chart: %w", err)
 	}
@@ -106,6 +111,23 @@ func (i *InstallHelmChart) Run() error {
 	// this will confirm the values set during installation
 	log.Printf("chart values: %v\n", rel.Config)
 
+	// ensure all pods are running, since helm doesn't care about windows
+	config, err := clientcmd.BuildConfigFromFlags("", i.KubeConfigFilePath)
+	if err != nil {
+		return fmt.Errorf("error building kubeconfig: %w", err)
+	}
+
+	clientset, err := kubernetes.NewForConfig(config)
+	if err != nil {
+		return fmt.Errorf("error creating Kubernetes client: %w", err)
+	}
+
+	labelSelector := "k8s-app=retina"
+	err = WaitForPodReady(ctx, clientset, "kube-system", labelSelector)
+	if err != nil {
+		return fmt.Errorf("error waiting for retina pods to be ready: %w", err)
+	}
+
 	return nil
 }
 
diff --git a/test/e2e/framework/kubernetes/no-crashes.go b/test/e2e/framework/kubernetes/no-crashes.go
new file mode 100644
index 0000000000..a5d5ec03b7
--- /dev/null
+++ b/test/e2e/framework/kubernetes/no-crashes.go
@@ -0,0 +1,43 @@
+package kubernetes
+
+import (
+	"context"
+	"fmt"
+
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/tools/clientcmd"
+)
+
+var ErrPodCrashed = fmt.Errorf("pod has crashes")
+
+type EnsureStableCluster struct {
+	LabelSelector      string
+	PodNamespace       string
+	KubeConfigFilePath string
+}
+
+func (n *EnsureStableCluster) Run() error {
+	config, err := clientcmd.BuildConfigFromFlags("", n.KubeConfigFilePath)
+	if err != nil {
+		return fmt.Errorf("error building kubeconfig: %w", err)
+	}
+
+	clientset, err := kubernetes.NewForConfig(config)
+	if err != nil {
+		return fmt.Errorf("error creating Kubernetes client: %w", err)
+	}
+
+	err = WaitForPodReady(context.TODO(), clientset, n.PodNamespace, n.LabelSelector)
+	if err != nil {
+		return fmt.Errorf("error waiting for retina pods to be ready: %w", err)
+	}
+	return nil
+}
+
+func (n *EnsureStableCluster) Prevalidate() error {
+	return nil
+}
+
+func (n *EnsureStableCluster) Stop() error {
+	return nil
+}
diff --git a/test/e2e/framework/kubernetes/port-forward.go b/test/e2e/framework/kubernetes/port-forward.go
index 5237109867..531fc3953b 100644
--- a/test/e2e/framework/kubernetes/port-forward.go
+++ b/test/e2e/framework/kubernetes/port-forward.go
@@ -11,6 +11,7 @@ import (
 	"time"
 
 	retry "github.com/microsoft/retina/test/retry"
+	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/client-go/kubernetes"
 	"k8s.io/client-go/tools/clientcmd"
@@ -120,7 +121,7 @@ func (p *PortForward) Run() error {
 }
 
 func (p *PortForward) findPodsWithAffinity(ctx context.Context, clientset *kubernetes.Clientset) (string, error) {
-	targetPods, errAffinity := clientset.CoreV1().Pods(p.Namespace).List(ctx, metav1.ListOptions{
+	targetPodsAll, errAffinity := clientset.CoreV1().Pods(p.Namespace).List(ctx, metav1.ListOptions{
 		LabelSelector: p.LabelSelector,
 		FieldSelector: "status.phase=Running",
 	})
@@ -128,6 +129,15 @@ func (p *PortForward) findPodsWithAffinity(ctx context.Context, clientset *kuber
 		return "", fmt.Errorf("could not list pods in %q with label %q: %w", p.Namespace, p.LabelSelector, errAffinity)
 	}
 
+	// omit windows pods because we can't port-forward to them
+	targetPodsLinux := make([]v1.Pod, 0)
+	for i := range targetPodsAll.Items {
+		if targetPodsAll.Items[i].Spec.NodeSelector["kubernetes.io/os"] != "windows" {
+			targetPodsLinux = append(targetPodsLinux, targetPodsAll.Items[i])
+		}
+	}
+
+	// get all pods with optional label affinity
 	affinityPods, errAffinity := clientset.CoreV1().Pods(p.Namespace).List(ctx, metav1.ListOptions{
 		LabelSelector: p.OptionalLabelAffinity,
 		FieldSelector: "status.phase=Running",
@@ -143,10 +153,10 @@ func (p *PortForward) findPodsWithAffinity(ctx context.Context, clientset *kuber
 	}
 
 	// if a pod is found on the same node as an affinity pod, use it
-	for i := range targetPods.Items {
-		if affinityNodes[targetPods.Items[i].Spec.NodeName] {
+	for i := range targetPodsLinux {
+		if affinityNodes[targetPodsLinux[i].Spec.NodeName] {
 			// found a pod with the specified label, on a node with the optional label affinity
-			return targetPods.Items[i].Name, nil
+			return targetPodsLinux[i].Name, nil
 		}
 	}
 
diff --git a/test/e2e/framework/kubernetes/wait-pod-ready.go b/test/e2e/framework/kubernetes/wait-pod-ready.go
index 208fe140d1..4ec81ae91a 100644
--- a/test/e2e/framework/kubernetes/wait-pod-ready.go
+++ b/test/e2e/framework/kubernetes/wait-pod-ready.go
@@ -15,12 +15,18 @@ import (
 const (
 	RetryTimeoutPodsReady  = 5 * time.Minute
 	RetryIntervalPodsReady = 5 * time.Second
+
+	printInterval = 5 // print to stdout every 5 iterations
 )
 
 func WaitForPodReady(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelSelector string) error {
 	podReadyMap := make(map[string]bool)
 
+	printIterator := 0
 	conditionFunc := wait.ConditionWithContextFunc(func(context.Context) (bool, error) {
+		defer func() {
+			printIterator++
+		}()
 		var podList *corev1.PodList
 		podList, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{LabelSelector: labelSelector})
 		if err != nil {
@@ -40,11 +46,21 @@ func WaitForPodReady(ctx context.Context, clientset *kubernetes.Clientset, names
 				return false, fmt.Errorf("error getting Pod: %w", err)
 			}
 
+			for istatus := range pod.Status.ContainerStatuses {
+				status := &pod.Status.ContainerStatuses[istatus]
+				if status.RestartCount > 0 {
+					return false, fmt.Errorf("pod %s has %d restarts: status: %+v: %w", pod.Name, status.RestartCount, status, ErrPodCrashed)
+				}
+			}
+
 			// Check the Pod phase
 			if pod.Status.Phase != corev1.PodRunning {
-				log.Printf("pod \"%s\" is not in Running state yet. Waiting...\n", pod.Name)
+				if printIterator%printInterval == 0 {
+					log.Printf("pod \"%s\" is not in Running state yet. Waiting...\n", pod.Name)
+				}
 				return false, nil
 			}
+
 			if !podReadyMap[pod.Name] {
 				log.Printf("pod \"%s\" is in Running state\n", pod.Name)
 				podReadyMap[pod.Name] = true
@@ -56,6 +72,7 @@ func WaitForPodReady(ctx context.Context, clientset *kubernetes.Clientset, names
 
 	err := wait.PollUntilContextCancel(ctx, RetryIntervalPodsReady, true, conditionFunc)
 	if err != nil {
+		PrintPodLogs(ctx, clientset, namespace, labelSelector)
 		return fmt.Errorf("error waiting for pods in namespace \"%s\" with label \"%s\" to be in Running state: %w", namespace, labelSelector, err)
 	}
 	return nil
diff --git a/test/e2e/framework/prometheus/prometheus.go b/test/e2e/framework/prometheus/prometheus.go
index 4cdaab1614..0014e2d368 100644
--- a/test/e2e/framework/prometheus/prometheus.go
+++ b/test/e2e/framework/prometheus/prometheus.go
@@ -7,6 +7,7 @@ import (
 	"log"
 	"net/http"
 	"reflect"
+	"strings"
 	"time"
 
 	"github.com/microsoft/retina/test/retry"
@@ -34,7 +35,7 @@ func CheckMetric(promAddress, metricName string, validMetric map[string]string)
 		var err error
 
 		// obtain a full dump of all metrics on the endpoint
-		metrics, err = getAllPrometheusMetrics(promAddress)
+		metrics, err = getAllPrometheusMetricsFromURL(promAddress)
 		if err != nil {
 			return fmt.Errorf("could not start port forward within %ds: %w	", defaultTimeout, err)
 		}
@@ -57,6 +58,21 @@ func CheckMetric(promAddress, metricName string, validMetric map[string]string)
 	return nil
 }
 
+func CheckMetricFromBuffer(prometheusMetricData []byte, metricName string, validMetric map[string]string) error {
+	metrics, err := getAllPrometheusMetricsFromBuffer(prometheusMetricData)
+	if err != nil {
+		return fmt.Errorf("failed to parse prometheus metrics: %w", err)
+	}
+
+	err = verifyValidMetricPresent(metricName, metrics, validMetric)
+	if err != nil {
+		log.Printf("failed to find metric matching %s: %+v\n", metricName, validMetric)
+		return ErrNoMetricFound
+	}
+
+	return nil
+}
+
 func verifyValidMetricPresent(metricName string, data map[string]*promclient.MetricFamily, validMetric map[string]string) error {
 	for _, metric := range data {
 		if metric.GetName() == metricName {
@@ -77,7 +93,7 @@ func verifyValidMetricPresent(metricName string, data map[string]*promclient.Met
 	return fmt.Errorf("failed to find metric matching: %+v: %w", validMetric, ErrNoMetricFound)
 }
 
-func getAllPrometheusMetrics(url string) (map[string]*promclient.MetricFamily, error) {
+func getAllPrometheusMetricsFromURL(url string) (map[string]*promclient.MetricFamily, error) {
 	client := http.Client{}
 	resp, err := client.Get(url) //nolint
 	if err != nil {
@@ -89,7 +105,7 @@ func getAllPrometheusMetrics(url string) (map[string]*promclient.MetricFamily, e
 		return nil, fmt.Errorf("HTTP request failed with status: %v", resp.Status) //nolint:goerr113,gocritic
 	}
 
-	metrics, err := parseReaderPrometheusMetrics(resp.Body)
+	metrics, err := ParseReaderPrometheusMetrics(resp.Body)
 	if err != nil {
 		return nil, err
 	}
@@ -97,7 +113,25 @@ func getAllPrometheusMetrics(url string) (map[string]*promclient.MetricFamily, e
 	return metrics, nil
 }
 
-func parseReaderPrometheusMetrics(input io.Reader) (map[string]*promclient.MetricFamily, error) {
+func getAllPrometheusMetricsFromBuffer(buf []byte) (map[string]*promclient.MetricFamily, error) {
+	var parser expfmt.TextParser
+	reader := strings.NewReader(string(buf))
+	return parser.TextToMetricFamilies(reader) //nolint
+}
+
+func ParseReaderPrometheusMetrics(input io.Reader) (map[string]*promclient.MetricFamily, error) {
 	var parser expfmt.TextParser
 	return parser.TextToMetricFamilies(input) //nolint
 }
+
+// When capturing promethus output via curl and exect, there's a lot
+// of garbage at the front
+func stripExecGarbage(s string) string {
+	index := strings.Index(s, "#")
+	if index == -1 {
+		// If there's no `#`, return the original string
+		return s
+	}
+	// Slice the string up to the character before the first `#`
+	return s[:index]
+}
diff --git a/test/e2e/jobs/jobs.go b/test/e2e/jobs/jobs.go
index eee000c415..8c7c98e04d 100644
--- a/test/e2e/jobs/jobs.go
+++ b/test/e2e/jobs/jobs.go
@@ -9,37 +9,49 @@ import (
 	"github.com/microsoft/retina/test/e2e/scenarios/drop"
 	"github.com/microsoft/retina/test/e2e/scenarios/latency"
 	tcp "github.com/microsoft/retina/test/e2e/scenarios/tcp"
+	"github.com/microsoft/retina/test/e2e/scenarios/windows"
 )
 
-func CreateTestInfra(subID, clusterName, location, kubeConfigFilePath string) *types.Job {
+func CreateTestInfra(subID, clusterName, location, kubeConfigFilePath string, createInfra bool) *types.Job {
 	job := types.NewJob("Create e2e test infrastructure")
 
-	job.AddStep(&azure.CreateResourceGroup{
-		SubscriptionID:    subID,
-		ResourceGroupName: clusterName,
-		Location:          location,
-	}, nil)
-
-	job.AddStep(&azure.CreateVNet{
-		VnetName:         "testvnet",
-		VnetAddressSpace: "10.0.0.0/9",
-	}, nil)
-
-	job.AddStep(&azure.CreateSubnet{
-		SubnetName:         "testsubnet",
-		SubnetAddressSpace: "10.0.0.0/12",
-	}, nil)
-
-	job.AddStep(&azure.CreateNPMCluster{
-		ClusterName:  clusterName,
-		PodCidr:      "10.128.0.0/9",
-		DNSServiceIP: "192.168.0.10",
-		ServiceCidr:  "192.168.0.0/28",
-	}, nil)
-
-	job.AddStep(&azure.GetAKSKubeConfig{
-		KubeConfigFilePath: kubeConfigFilePath,
-	}, nil)
+	if createInfra {
+		job.AddStep(&azure.CreateResourceGroup{
+			SubscriptionID:    subID,
+			ResourceGroupName: clusterName,
+			Location:          location,
+		}, nil)
+
+		job.AddStep(&azure.CreateVNet{
+			VnetName:         "testvnet",
+			VnetAddressSpace: "10.0.0.0/9",
+		}, nil)
+
+		job.AddStep(&azure.CreateSubnet{
+			SubnetName:         "testsubnet",
+			SubnetAddressSpace: "10.0.0.0/12",
+		}, nil)
+
+		job.AddStep(&azure.CreateNPMCluster{
+			ClusterName:  clusterName,
+			PodCidr:      "10.128.0.0/9",
+			DNSServiceIP: "192.168.0.10",
+			ServiceCidr:  "192.168.0.0/28",
+		}, nil)
+
+		job.AddStep(&azure.GetAKSKubeConfig{
+			KubeConfigFilePath: kubeConfigFilePath,
+		}, nil)
+
+	} else {
+		job.AddStep(&azure.GetAKSKubeConfig{
+			KubeConfigFilePath: kubeConfigFilePath,
+			ClusterName:        clusterName,
+			SubscriptionID:     subID,
+			ResourceGroupName:  clusterName,
+			Location:           location,
+		}, nil)
+	}
 
 	job.AddStep(&generic.LoadFlags{
 		TagEnv:            generic.DefaultTagEnv,
@@ -77,6 +89,8 @@ func InstallAndTestRetinaBasicMetrics(kubeConfigFilePath, chartPath string) *typ
 
 	job.AddScenario(tcp.ValidateTCPMetrics())
 
+	job.AddScenario(windows.ValidateWindowsBasicMetric())
+
 	dnsScenarios := []struct {
 		name string
 		req  *dns.RequestValidationParams
@@ -122,6 +136,11 @@ func InstallAndTestRetinaBasicMetrics(kubeConfigFilePath, chartPath string) *typ
 		job.AddScenario(dns.ValidateBasicDNSMetrics(scenario.name, scenario.req, scenario.resp))
 	}
 
+	job.AddStep(&kubernetes.EnsureStableCluster{
+		PodNamespace:  "kube-system",
+		LabelSelector: "k8s-app=retina",
+	}, nil)
+
 	return job
 }
 
@@ -184,5 +203,10 @@ func UpgradeAndTestRetinaAdvancedMetrics(kubeConfigFilePath, chartPath, valuesFi
 
 	job.AddScenario(latency.ValidateLatencyMetric())
 
+	job.AddStep(&kubernetes.EnsureStableCluster{
+		PodNamespace:  "kube-system",
+		LabelSelector: "k8s-app=retina",
+	}, nil)
+
 	return job
 }
diff --git a/test/e2e/retina_e2e_test.go b/test/e2e/retina_e2e_test.go
index f804eef495..e69dab3e61 100644
--- a/test/e2e/retina_e2e_test.go
+++ b/test/e2e/retina_e2e_test.go
@@ -2,6 +2,7 @@ package retina
 
 import (
 	"crypto/rand"
+	"flag"
 	"math/big"
 	"os"
 	"os/user"
@@ -16,14 +17,23 @@ import (
 	"github.com/stretchr/testify/require"
 )
 
-var locations = []string{"eastus2", "centralus", "southcentralus", "uksouth", "centralindia", "westus2"}
+var (
+	locations   = []string{"eastus2", "centralus", "southcentralus", "uksouth", "centralindia", "westus2"}
+	createInfra = flag.Bool("create-infra", true, "create a Resource group, vNET and AKS cluster for testing")
+	deleteInfra = flag.Bool("delete-infra", true, "delete a Resource group, vNET and AKS cluster for testing")
+)
 
 // TestE2ERetina tests all e2e scenarios for retina
 func TestE2ERetina(t *testing.T) {
 	curuser, err := user.Current()
 	require.NoError(t, err)
+	flag.Parse()
 
-	clusterName := curuser.Username + common.NetObsRGtag + strconv.FormatInt(time.Now().Unix(), 10)
+	clusterName := os.Getenv("CLUSTER_NAME")
+	if clusterName == "" {
+		clusterName = curuser.Username + common.NetObsRGtag + strconv.FormatInt(time.Now().Unix(), 10)
+		t.Logf("CLUSTER_NAME is not set, generating a random cluster name: %s", clusterName)
+	}
 
 	subID := os.Getenv("AZURE_SUBSCRIPTION_ID")
 	require.NotEmpty(t, subID)
@@ -49,7 +59,7 @@ func TestE2ERetina(t *testing.T) {
 	kubeConfigFilePath := filepath.Join(rootDir, "test", "e2e", "test.pem")
 
 	// CreateTestInfra
-	createTestInfra := types.NewRunner(t, jobs.CreateTestInfra(subID, clusterName, location, kubeConfigFilePath))
+	createTestInfra := types.NewRunner(t, jobs.CreateTestInfra(subID, clusterName, location, kubeConfigFilePath, *createInfra))
 	createTestInfra.Run()
 
 	// Hacky way to ensure that the test infra is deleted even if the test panics
@@ -57,7 +67,9 @@ func TestE2ERetina(t *testing.T) {
 		if r := recover(); r != nil {
 			t.Logf("Recovered in TestE2ERetina, %v", r)
 		}
-		_ = jobs.DeleteTestInfra(subID, clusterName, location).Run()
+		if *deleteInfra {
+			_ = jobs.DeleteTestInfra(subID, clusterName, location).Run()
+		}
 	}()
 
 	// Install and test Retina basic metrics
diff --git a/test/e2e/scenarios/windows/scenario.go b/test/e2e/scenarios/windows/scenario.go
new file mode 100644
index 0000000000..12d82f74be
--- /dev/null
+++ b/test/e2e/scenarios/windows/scenario.go
@@ -0,0 +1,19 @@
+package windows
+
+import (
+	"github.com/microsoft/retina/test/e2e/framework/types"
+)
+
+func ValidateWindowsBasicMetric() *types.Scenario {
+	name := "Windows Metrics"
+	steps := []*types.StepWrapper{
+		{
+			Step: &ValidateHNSMetric{
+				KubeConfigFilePath:       "./test.pem",
+				RetinaDaemonSetNamespace: "kube-system",
+				RetinaDaemonSetName:      "retina-agent-win",
+			},
+		},
+	}
+	return types.NewScenario(name, steps...)
+}
diff --git a/test/e2e/scenarios/windows/validate-hns-metrics.go b/test/e2e/scenarios/windows/validate-hns-metrics.go
new file mode 100644
index 0000000000..d4bcd0de1a
--- /dev/null
+++ b/test/e2e/scenarios/windows/validate-hns-metrics.go
@@ -0,0 +1,107 @@
+package windows
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"log"
+	"time"
+
+	"github.com/microsoft/retina/test/e2e/common"
+	k8s "github.com/microsoft/retina/test/e2e/framework/kubernetes"
+	prom "github.com/microsoft/retina/test/e2e/framework/prometheus"
+	"github.com/microsoft/retina/test/retry"
+	v1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	kubernetes "k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/tools/clientcmd"
+)
+
+const (
+	defaultRetryDelay        = 5 * time.Second
+	defaultRetryAttempts     = 5
+	defaultHTTPClientTimeout = 2 * time.Second
+)
+
+var (
+	ErrorNoWindowsPod = errors.New("no windows retina pod found")
+	ErrNoMetricFound  = fmt.Errorf("no metric found")
+
+	hnsMetricName  = "networkobservability_windows_hns_stats"
+	defaultRetrier = retry.Retrier{Attempts: defaultRetryAttempts, Delay: defaultRetryDelay}
+)
+
+type ValidateHNSMetric struct {
+	KubeConfigFilePath       string
+	RetinaDaemonSetNamespace string
+	RetinaDaemonSetName      string
+}
+
+func (v *ValidateHNSMetric) Run() error {
+	config, err := clientcmd.BuildConfigFromFlags("", v.KubeConfigFilePath)
+	if err != nil {
+		return fmt.Errorf("error building kubeconfig: %w", err)
+	}
+
+	clientset, err := kubernetes.NewForConfig(config)
+	if err != nil {
+		return fmt.Errorf("error creating Kubernetes client: %w", err)
+	}
+
+	pods, err := clientset.CoreV1().Pods(v.RetinaDaemonSetNamespace).List(context.TODO(), metav1.ListOptions{
+		LabelSelector: "k8s-app=retina",
+	})
+	if err != nil {
+		panic(err.Error())
+	}
+
+	var windowsRetinaPod *v1.Pod
+	for pod := range pods.Items {
+		if pods.Items[pod].Spec.NodeSelector["kubernetes.io/os"] == "windows" {
+			windowsRetinaPod = &pods.Items[pod]
+		}
+	}
+	if windowsRetinaPod == nil {
+		return ErrorNoWindowsPod
+	}
+
+	labels := map[string]string{
+		"direction": "win_packets_sent_count",
+	}
+
+	log.Printf("checking for metric %s with labels %+v\n", hnsMetricName, labels)
+
+	// wrap this in a retrier because windows is slow
+	var output []byte
+	err = defaultRetrier.Do(context.TODO(), func() error {
+		output, err = k8s.ExecPod(context.TODO(), clientset, config, windowsRetinaPod.Namespace, windowsRetinaPod.Name, fmt.Sprintf("curl -s http://localhost:%d/metrics", common.RetinaPort))
+		if err != nil {
+			return fmt.Errorf("error executing command in windows retina pod: %w", err)
+		}
+		if len(output) == 0 {
+			return ErrNoMetricFound
+		}
+
+		if err != nil {
+			return fmt.Errorf("failed to get metrics from windows retina pod: %w", err)
+		}
+
+		err = prom.CheckMetricFromBuffer(output, hnsMetricName, labels)
+		if err != nil {
+			return fmt.Errorf("failed to verify prometheus metrics: %w", err)
+		}
+
+		return nil
+	})
+
+	log.Printf("found metric matching %+v: with labels %+v\n", hnsMetricName, labels)
+	return nil
+}
+
+func (v *ValidateHNSMetric) Prevalidate() error {
+	return nil
+}
+
+func (v *ValidateHNSMetric) Stop() error {
+	return nil
+}