diff --git a/tests/common/support/dscInitialization.go b/tests/common/support/dscInitialization.go
index f13eec555..6818faacd 100644
--- a/tests/common/support/dscInitialization.go
+++ b/tests/common/support/dscInitialization.go
@@ -47,6 +47,24 @@ func GetApplicationsNamespace(test Test) (string, error) {
 	return GetApplicationsNamespaceFromDSCI(test, DefaultDSCIName)
 }
 
+func GetRHOAIVersionFromDSCI(test Test) string {
+	dsci, err := GetDSCI(test, DefaultDSCIName)
+	if err != nil {
+		test.T().Logf("Failed to get DSCI for version: %v", err)
+		return ""
+	}
+	version, found, err := unstructured.NestedString(dsci.Object, "status", "release", "version")
+	if err != nil {
+		test.T().Logf("Failed to read status.release.version from DSCI %s: %v", DefaultDSCIName, err)
+		return ""
+	}
+	if !found {
+		test.T().Logf("DSCI %s is missing status.release.version", DefaultDSCIName)
+		return ""
+	}
+	return version
+}
+
 func GetApplicationsNamespaceFromDSCI(test Test, dsciName string) (string, error) {
 	dsci, err := GetDSCI(test, dsciName)
 	if err != nil {
diff --git a/tests/trainer/README.md b/tests/trainer/README.md
index 2e67d7656..69354dcd5 100644
--- a/tests/trainer/README.md
+++ b/tests/trainer/README.md
@@ -63,6 +63,42 @@ go test ./tests/trainer/ -v
 go test ./tests/trainer -run TestCustomTrainingRuntimesAvailable -v
 ```
 
+## Upgrade Tests
+
+Upgrade tests validate that Trainer v2 resources survive an RHOAI upgrade. They run in two phases controlled by `TEST_TIER`:
+
+```bash
+# Pre-upgrade: create resources and store baselines
+TEST_TIER=Pre-Upgrade go test -v -timeout 10m ./tests/trainer/
+
+# ... perform RHOAI upgrade ...
+
+# Post-upgrade: verify resources survived and complete workloads
+TEST_TIER=Post-Upgrade go test -v -timeout 10m ./tests/trainer/
+```
+
+### Test Coverage
+
+| Test Pair | What it validates |
+|-----------|-------------------|
+| `TestSetupSleepTrainJob` / `TestVerifySleepTrainJob` | Running TrainJob survives upgrade with zero pod restarts |
+| `TestSetupTrainingRuntime` / `TestVerifyTrainingRuntime` | Custom namespace-scoped TrainingRuntime persists, spec unchanged |
+| `TestSetupCustomRuntimeUpgradeTrainJob` / `TestRunCustomRuntimeUpgradeTrainJob` | Custom ClusterTrainingRuntime + Kueue suspend/resume lifecycle |
+
+### Spec Integrity Checks
+
+Post-upgrade tests compare resource `metadata.generation` against pre-upgrade baselines stored in ConfigMaps. When generation changes (indicating a spec mutation), before/after specs are logged as JSON for analysis. The assertion is version-aware — an explicit allowlist in [`utils/utils_upgrade.go`](utils/utils_upgrade.go) defines upgrade paths where spec mutations are expected (e.g., API changes across minor versions). The RHOAI version is read from DSCI `status.release.version`.
+
+### Known Limitations
+
+- **RHOAIENG-48867**: 4 Kueue suspend/resume tests are skipped because the Trainer controller fails updating immutable JobSet `spec.replicatedJobs` when built-in ClusterTrainingRuntime specs change during upgrade. Only affects suspended jobs referencing default/versioned runtimes — running jobs and custom runtimes are not impacted.
+- Tests are version-agnostic — which upgrade path is tested depends on Jenkins pipeline deployment configuration.
+
+### Maintenance
+
+- When Trainer API changes introduce spec mutations during upgrade, add the version pair to `specMutationExpectedPaths` in [`utils/utils_upgrade.go`](utils/utils_upgrade.go).
+- When RHOAIENG-48867 is fixed upstream, remove the `t.Skip` calls in `trainer_kueue_upgrade_training_test.go` to enable the default and specific runtime Kueue tests.
+
 ## GPU Requirements
 
 > **Note:** The TrainingHub SDK tests (`TestOsftTrainingHubMultiNodeMultiGPU`, `TestLoraTrainingHubMultiNodeMultiGPU`, `TestSftTrainingHubMultiNodeMultiGPU`) require **NVIDIA Ampere or newer GPUs** (e.g. A100, H100). The training runtime image (`odh-training-cuda128-torch29-py312-rhel9`, referenced as `DefaultTrainingHubRuntimeCUDA` in [`tests/trainer/utils/utils_runtimes.go`](utils/utils_runtimes.go)) ships with `flash_attn==2.8.3`, which requires compute capability >= 8.0. These tests will not work on pre-Ampere GPUs such as T4 or V100.
diff --git a/tests/trainer/trainer_kueue_upgrade_training_test.go b/tests/trainer/trainer_kueue_upgrade_training_test.go
index 6c2dfdbd3..1fc9a8b17 100644
--- a/tests/trainer/trainer_kueue_upgrade_training_test.go
+++ b/tests/trainer/trainer_kueue_upgrade_training_test.go
@@ -17,6 +17,8 @@ limitations under the License.
 package trainer
 
 import (
+	"encoding/json"
+	"fmt"
 	"strings"
 	"testing"
 
@@ -39,11 +41,15 @@ import (
 )
 
 var (
-	upgradeNamespaceName = "test-trainer-upgrade"
-	resourceFlavorName   = "rf-trainer-upgrade"
-	clusterQueueName     = "cq-trainer-upgrade"
-	localQueueName       = "lq-trainer-upgrade"
-	upgradeTrainJobName  = "trainjob-upgrade"
+	upgradeNamespaceName   = "test-trainer-upgrade"
+	resourceFlavorName     = "rf-trainer-upgrade"
+	clusterQueueName       = "cq-trainer-upgrade"
+	localQueueName         = "lq-trainer-upgrade"
+	upgradeTrainJobName    = "trainjob-upgrade"
+	upgradeConfigMapName   = "default-runtime-upgrade-baseline"
+	upgradeTrainJobGenKey  = "trainjob-generation"
+	upgradeTrainJobSpecKey = "trainjob-spec"
+	rhoaiVersionKey        = "rhoai-version"
 
 	// Specific runtime upgrade test variables
 	specificRuntimeNamespaceName  = "test-trainer-upgrade-specific"
@@ -53,6 +59,10 @@ var (
 	specificRuntimeTrainJobName   = "trainjob-upgrade-specific"
 	specificRuntimeConfigMapName  = "specific-runtime-upgrade"
 	specificRuntimeConfigMapKey   = "runtime-name"
+	specificRuntimeGenerationKey  = "runtime-generation"
+	specificRuntimeSpecKey        = "runtime-spec"
+	specificTrainJobGenerationKey = "trainjob-generation"
+	specificTrainJobSpecKey       = "trainjob-spec"
 
 	// Custom runtime upgrade test variables
 	customRuntimeNamespaceName  = "test-trainer-upgrade-custom-rt"
@@ -61,6 +71,11 @@ var (
 	customRuntimeLocalQueue     = "lq-trainer-upgrade-custom-rt"
 	customRuntimeTrainJobName   = "trainjob-upgrade-custom-rt"
 	customRuntimeCTRName        = "custom-upgrade-runtime"
+	customRuntimeConfigMapName  = "custom-runtime-upgrade-baseline"
+	customRuntimeGenerationKey  = "ctr-generation"
+	customRuntimeSpecKey        = "ctr-spec"
+	customTrainJobGenerationKey = "trainjob-generation"
+	customTrainJobSpecKey       = "trainjob-spec"
 )
 
 func TestSetupUpgradeTrainJob(t *testing.T) {
@@ -130,6 +145,13 @@ func TestSetupUpgradeTrainJob(t *testing.T) {
 	test.Eventually(TrainJob(test, trainJob.Namespace, upgradeTrainJobName), TestTimeoutShort).
 		Should(WithTransform(TrainJobConditionSuspended, Equal(metav1.ConditionTrue)))
 	test.T().Logf("TrainJob %s/%s is suspended, waiting for ClusterQueue to be enabled after upgrade", trainJob.Namespace, upgradeTrainJobName)
+
+	// Store TrainJob baseline for post-upgrade integrity check
+	trainJob, err = test.Client().Trainer().TrainerV1alpha1().TrainJobs(upgradeNamespaceName).Get(test.Ctx(), upgradeTrainJobName, metav1.GetOptions{})
+	test.Expect(err).NotTo(HaveOccurred())
+	data := map[string]string{}
+	addResourceBaseline(test, data, upgradeTrainJobGenKey, upgradeTrainJobSpecKey, trainJob.Generation, trainJob.Spec)
+	storeUpgradeBaseline(test, upgradeNamespaceName, upgradeConfigMapName, data)
 }
 
 func TestRunUpgradeTrainJob(t *testing.T) {
@@ -144,9 +166,20 @@ func TestRunUpgradeTrainJob(t *testing.T) {
 	defer test.Client().Kueue().KueueV1beta2().ClusterQueues().Delete(test.Ctx(), clusterQueueName, metav1.DeleteOptions{})
 	defer DeleteTestNamespace(test, namespace)
 
+	// Check TrainJob spec integrity
+	configMap, err := test.Client().Core().CoreV1().ConfigMaps(upgradeNamespaceName).Get(
+		test.Ctx(), upgradeConfigMapName, metav1.GetOptions{})
+	test.Expect(err).NotTo(HaveOccurred(), "Baseline ConfigMap should exist")
+
+	trainJob, err := test.Client().Trainer().TrainerV1alpha1().TrainJobs(upgradeNamespaceName).Get(test.Ctx(), upgradeTrainJobName, metav1.GetOptions{})
+	test.Expect(err).NotTo(HaveOccurred(), "TrainJob should exist after upgrade")
+
+	verifySpecIntegrity(test, "TrainJob", trainJob.Generation, trainJob.Spec,
+		configMap, upgradeTrainJobGenKey, upgradeTrainJobSpecKey)
+
 	// Enable ClusterQueue to process waiting TrainJob
 	clusterQueue := kueueacv1beta2.ClusterQueue(clusterQueueName).WithSpec(kueueacv1beta2.ClusterQueueSpec().WithStopPolicy(kueuev1beta2.None))
-	_, err := test.Client().Kueue().KueueV1beta2().ClusterQueues().Apply(test.Ctx(), clusterQueue, metav1.ApplyOptions{FieldManager: "application/apply-patch", Force: true})
+	_, err = test.Client().Kueue().KueueV1beta2().ClusterQueues().Apply(test.Ctx(), clusterQueue, metav1.ApplyOptions{FieldManager: "application/apply-patch", Force: true})
 	test.Expect(err).NotTo(HaveOccurred())
 	test.T().Logf("Enabled ClusterQueue %s by setting StopPolicy to None", clusterQueueName)
 
@@ -186,8 +219,11 @@ func TestSetupSpecificRuntimeUpgradeTrainJob(t *testing.T) {
 	CreateOrGetTestNamespaceWithName(test, specificRuntimeNamespaceName, WithKueueManaged())
 	test.T().Logf("Created Kueue-managed namespace: %s", specificRuntimeNamespaceName)
 
-	// Store the runtime name in ConfigMap for post-upgrade verification
-	storeSpecificRuntimeInConfigMap(test, specificRuntime)
+	// Store the runtime baseline in ConfigMap for post-upgrade verification
+	ctr, err := test.Client().Trainer().TrainerV1alpha1().ClusterTrainingRuntimes().Get(test.Ctx(), specificRuntime, metav1.GetOptions{})
+	test.Expect(err).NotTo(HaveOccurred())
+	baselineData := map[string]string{specificRuntimeConfigMapKey: specificRuntime}
+	addResourceBaseline(test, baselineData, specificRuntimeGenerationKey, specificRuntimeSpecKey, ctr.Generation, ctr.Spec)
 
 	// Create Kueue resources with StopPolicy=Hold
 	resourceFlavor := kueueacv1beta2.ResourceFlavor(specificRuntimeResourceFlavor)
@@ -245,6 +281,12 @@ func TestSetupSpecificRuntimeUpgradeTrainJob(t *testing.T) {
 	test.Eventually(TrainJob(test, trainJob.Namespace, specificRuntimeTrainJobName), TestTimeoutShort).
 		Should(WithTransform(TrainJobConditionSuspended, Equal(metav1.ConditionTrue)))
 	test.T().Logf("TrainJob %s/%s using runtime %s is suspended, waiting for upgrade", trainJob.Namespace, specificRuntimeTrainJobName, specificRuntime)
+
+	// Store TrainJob baseline and persist ConfigMap
+	trainJob, err = test.Client().Trainer().TrainerV1alpha1().TrainJobs(specificRuntimeNamespaceName).Get(test.Ctx(), specificRuntimeTrainJobName, metav1.GetOptions{})
+	test.Expect(err).NotTo(HaveOccurred())
+	addResourceBaseline(test, baselineData, specificTrainJobGenerationKey, specificTrainJobSpecKey, trainJob.Generation, trainJob.Spec)
+	storeUpgradeBaseline(test, specificRuntimeNamespaceName, specificRuntimeConfigMapName, baselineData)
 }
 
 // TestRunSpecificRuntimeUpgradeTrainJob verifies that a TrainJob using a specific cluster training runtime
@@ -272,8 +314,13 @@ func TestRunSpecificRuntimeUpgradeTrainJob(t *testing.T) {
 		DeleteTestNamespace(test, namespace)
 	}()
 
-	// Verify the ClusterTrainingRuntime still exists
-	_, err := test.Client().Trainer().TrainerV1alpha1().ClusterTrainingRuntimes().Get(test.Ctx(), specificRuntime, metav1.GetOptions{})
+	// Load baselines from ConfigMap
+	configMap, err := test.Client().Core().CoreV1().ConfigMaps(specificRuntimeNamespaceName).Get(
+		test.Ctx(), specificRuntimeConfigMapName, metav1.GetOptions{})
+	test.Expect(err).NotTo(HaveOccurred(), "Baseline ConfigMap should exist")
+
+	// Check ClusterTrainingRuntime spec integrity
+	ctr, err := test.Client().Trainer().TrainerV1alpha1().ClusterTrainingRuntimes().Get(test.Ctx(), specificRuntime, metav1.GetOptions{})
 	if err != nil {
 		if errors.IsNotFound(err) {
 			test.T().Logf("ClusterTrainingRuntime %s was removed during upgrade (expected for versioned runtimes)", specificRuntime)
@@ -282,13 +329,18 @@ func TestRunSpecificRuntimeUpgradeTrainJob(t *testing.T) {
 		}
 	} else {
 		test.T().Logf("ClusterTrainingRuntime %s still exists after upgrade", specificRuntime)
+		verifySpecIntegrity(test, "ClusterTrainingRuntime", ctr.Generation, ctr.Spec,
+			configMap, specificRuntimeGenerationKey, specificRuntimeSpecKey)
 	}
 
-	// Verify TrainJob is still suspended
+	// Check TrainJob spec integrity
 	trainJob, err := test.Client().Trainer().TrainerV1alpha1().TrainJobs(specificRuntimeNamespaceName).Get(test.Ctx(), specificRuntimeTrainJobName, metav1.GetOptions{})
 	test.Expect(err).NotTo(HaveOccurred(), "TrainJob should exist after upgrade")
 	test.T().Logf("TrainJob %s/%s exists after upgrade with RuntimeRef: %s", trainJob.Namespace, trainJob.Name, trainJob.Spec.RuntimeRef.Name)
 
+	verifySpecIntegrity(test, "TrainJob", trainJob.Generation, trainJob.Spec,
+		configMap, specificTrainJobGenerationKey, specificTrainJobSpecKey)
+
 	// Enable ClusterQueue to process the TrainJob
 	clusterQueue := kueueacv1beta2.ClusterQueue(specificRuntimeClusterQueue).WithSpec(kueueacv1beta2.ClusterQueueSpec().WithStopPolicy(kueuev1beta2.None))
 	_, err = test.Client().Kueue().KueueV1beta2().ClusterQueues().Apply(test.Ctx(), clusterQueue, metav1.ApplyOptions{FieldManager: "application/apply-patch", Force: true})
@@ -369,8 +421,8 @@ func TestSetupCustomRuntimeUpgradeTrainJob(t *testing.T) {
 	test.Expect(err).NotTo(HaveOccurred())
 	test.T().Logf("Applied Kueue LocalQueue %s/%s successfully", appliedLocalQueue.Namespace, appliedLocalQueue.Name)
 
-	// Create TrainJob using the custom CTR
-	trainJob := createUpgradeTrainJob(test, customRuntimeNamespaceName, appliedLocalQueue.Name, customRuntimeTrainJobName, customRuntimeCTRName)
+	// Create TrainJob using the custom CTR with PodTemplateOverrides to exercise API surface coverage
+	trainJob := createCustomRuntimeUpgradeTrainJob(test, customRuntimeNamespaceName, appliedLocalQueue.Name)
 
 	// Verify Kueue Workload is Inadmissible
 	var workloadName string
@@ -389,6 +441,17 @@ func TestSetupCustomRuntimeUpgradeTrainJob(t *testing.T) {
 	test.Eventually(TrainJob(test, trainJob.Namespace, customRuntimeTrainJobName), TestTimeoutShort).
 		Should(WithTransform(TrainJobConditionSuspended, Equal(metav1.ConditionTrue)))
 	test.T().Logf("TrainJob %s/%s using custom runtime %s is suspended, waiting for upgrade", trainJob.Namespace, customRuntimeTrainJobName, customRuntimeCTRName)
+
+	// Store baselines for post-upgrade integrity check
+	ctr, err := test.Client().Trainer().TrainerV1alpha1().ClusterTrainingRuntimes().Get(test.Ctx(), customRuntimeCTRName, metav1.GetOptions{})
+	test.Expect(err).NotTo(HaveOccurred())
+	trainJob, err = test.Client().Trainer().TrainerV1alpha1().TrainJobs(customRuntimeNamespaceName).Get(test.Ctx(), customRuntimeTrainJobName, metav1.GetOptions{})
+	test.Expect(err).NotTo(HaveOccurred())
+
+	data := map[string]string{}
+	addResourceBaseline(test, data, customRuntimeGenerationKey, customRuntimeSpecKey, ctr.Generation, ctr.Spec)
+	addResourceBaseline(test, data, customTrainJobGenerationKey, customTrainJobSpecKey, trainJob.Generation, trainJob.Spec)
+	storeUpgradeBaseline(test, customRuntimeNamespaceName, customRuntimeConfigMapName, data)
 }
 
 func TestRunCustomRuntimeUpgradeTrainJob(t *testing.T) {
@@ -402,14 +465,30 @@ func TestRunCustomRuntimeUpgradeTrainJob(t *testing.T) {
 		_ = test.Client().Kueue().KueueV1beta2().ResourceFlavors().Delete(test.Ctx(), customRuntimeResourceFlavor, metav1.DeleteOptions{})
 		_ = test.Client().Kueue().KueueV1beta2().ClusterQueues().Delete(test.Ctx(), customRuntimeClusterQueue, metav1.DeleteOptions{})
 		_ = test.Client().Trainer().TrainerV1alpha1().ClusterTrainingRuntimes().Delete(test.Ctx(), customRuntimeCTRName, metav1.DeleteOptions{})
+		_ = test.Client().Core().CoreV1().ConfigMaps(customRuntimeNamespaceName).Delete(test.Ctx(), customRuntimeConfigMapName, metav1.DeleteOptions{})
 		DeleteTestNamespace(test, namespace)
 	}()
 
-	// Verify custom CTR still exists after upgrade
-	_, err := test.Client().Trainer().TrainerV1alpha1().ClusterTrainingRuntimes().Get(test.Ctx(), customRuntimeCTRName, metav1.GetOptions{})
+	// Load baselines from ConfigMap
+	configMap, err := test.Client().Core().CoreV1().ConfigMaps(customRuntimeNamespaceName).Get(
+		test.Ctx(), customRuntimeConfigMapName, metav1.GetOptions{})
+	test.Expect(err).NotTo(HaveOccurred(), "Baseline ConfigMap should exist")
+
+	// Check custom CTR spec integrity
+	ctr, err := test.Client().Trainer().TrainerV1alpha1().ClusterTrainingRuntimes().Get(test.Ctx(), customRuntimeCTRName, metav1.GetOptions{})
 	test.Expect(err).NotTo(HaveOccurred(), "Custom ClusterTrainingRuntime should exist after upgrade")
 	test.T().Logf("Custom ClusterTrainingRuntime %s is preserved after upgrade", customRuntimeCTRName)
 
+	verifySpecIntegrity(test, "Custom CTR", ctr.Generation, ctr.Spec,
+		configMap, customRuntimeGenerationKey, customRuntimeSpecKey)
+
+	// Check TrainJob spec integrity
+	trainJob, err := test.Client().Trainer().TrainerV1alpha1().TrainJobs(customRuntimeNamespaceName).Get(test.Ctx(), customRuntimeTrainJobName, metav1.GetOptions{})
+	test.Expect(err).NotTo(HaveOccurred(), "TrainJob should exist after upgrade")
+
+	verifySpecIntegrity(test, "TrainJob", trainJob.Generation, trainJob.Spec,
+		configMap, customTrainJobGenerationKey, customTrainJobSpecKey)
+
 	// Enable ClusterQueue to process the TrainJob
 	clusterQueue := kueueacv1beta2.ClusterQueue(customRuntimeClusterQueue).WithSpec(kueueacv1beta2.ClusterQueueSpec().WithStopPolicy(kueuev1beta2.None))
 	_, err = test.Client().Kueue().KueueV1beta2().ClusterQueues().Apply(test.Ctx(), clusterQueue, metav1.ApplyOptions{FieldManager: "application/apply-patch", Force: true})
@@ -435,6 +514,26 @@ func TestRunCustomRuntimeUpgradeTrainJob(t *testing.T) {
 
 // Helper functions
 
+func verifySpecIntegrity(test Test, resourceName string, generation int64, spec interface{},
+	configMap *corev1.ConfigMap, genKey, specKey string) {
+	expectedGen := configMap.Data[genKey]
+	actualGen := fmt.Sprintf("%d", generation)
+	if actualGen != expectedGen {
+		preVersion := configMap.Data[rhoaiVersionKey]
+		postVersion := GetRHOAIVersionFromDSCI(test)
+		currentSpecJSON, _ := json.Marshal(spec)
+		test.T().Logf("%s generation changed during upgrade (%s to %s)", resourceName, expectedGen, actualGen)
+		test.T().Logf("Pre-upgrade %s spec: %s", resourceName, configMap.Data[specKey])
+		test.T().Logf("Post-upgrade %s spec: %s", resourceName, currentSpecJSON)
+		test.Expect(preVersion).NotTo(BeEmpty(), "Pre-upgrade RHOAI version missing from baseline ConfigMap")
+		test.Expect(postVersion).NotTo(BeEmpty(), "Post-upgrade RHOAI version not available from DSCI")
+		test.Expect(trainerutils.IsSpecMutationExpected(preVersion, postVersion)).To(BeTrue(),
+			"Unexpected %s spec mutation for upgrade %s → %s", resourceName, preVersion, postVersion)
+	} else {
+		test.T().Logf("%s generation unchanged after upgrade: %s", resourceName, actualGen)
+	}
+}
+
 func createUpgradeTrainJob(test Test, namespace, localQueueName, jobName, runtimeName string) *trainerv1alpha1.TrainJob {
 	// Delete existing TrainJob if present
 	_, err := test.Client().Trainer().TrainerV1alpha1().TrainJobs(namespace).Get(test.Ctx(), jobName, metav1.GetOptions{})
@@ -474,6 +573,65 @@ func createUpgradeTrainJob(test Test, namespace, localQueueName, jobName, runtim
 	return trainJob
 }
 
+func createCustomRuntimeUpgradeTrainJob(test Test, namespace, localQueueName string) *trainerv1alpha1.TrainJob {
+	_, err := test.Client().Trainer().TrainerV1alpha1().TrainJobs(namespace).Get(test.Ctx(), customRuntimeTrainJobName, metav1.GetOptions{})
+	if err == nil {
+		err := test.Client().Trainer().TrainerV1alpha1().TrainJobs(namespace).Delete(test.Ctx(), customRuntimeTrainJobName, metav1.DeleteOptions{})
+		test.Expect(err).NotTo(HaveOccurred())
+		test.Eventually(TrainJobs(test, namespace), TestTimeoutShort).Should(BeEmpty())
+	} else if !errors.IsNotFound(err) {
+		test.T().Fatalf("Error retrieving TrainJob with name `%s`: %v", customRuntimeTrainJobName, err)
+	}
+
+	trainJob := &trainerv1alpha1.TrainJob{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: customRuntimeTrainJobName,
+			Labels: map[string]string{
+				"kueue.x-k8s.io/queue-name": localQueueName,
+			},
+		},
+		Spec: trainerv1alpha1.TrainJobSpec{
+			RuntimeRef: trainerv1alpha1.RuntimeRef{
+				Name: customRuntimeCTRName,
+			},
+			Trainer: &trainerv1alpha1.Trainer{
+				Command: []string{
+					"python",
+					"-c",
+					"import torch; print(f'PyTorch version: {torch.__version__}'); import time; time.sleep(5); print('Training completed successfully')",
+				},
+			},
+			PodTemplateOverrides: []trainerv1alpha1.PodTemplateOverride{
+				{
+					TargetJobs: []trainerv1alpha1.PodTemplateOverrideTargetJob{
+						{Name: "node"},
+					},
+					Metadata: &metav1.ObjectMeta{
+						Labels: map[string]string{
+							"upgrade-test": "custom-runtime",
+						},
+					},
+					Spec: &trainerv1alpha1.PodTemplateSpecOverride{
+						Tolerations: []corev1.Toleration{
+							{
+								Key:      "upgrade-test",
+								Operator: corev1.TolerationOpExists,
+								Effect:   corev1.TaintEffectNoSchedule,
+							},
+						},
+					},
+				},
+			},
+		},
+	}
+
+	trainJob, err = test.Client().Trainer().TrainerV1alpha1().TrainJobs(namespace).Create(test.Ctx(), trainJob, metav1.CreateOptions{})
+	test.Expect(err).NotTo(HaveOccurred())
+	test.T().Logf("Created TrainJob %s/%s with runtime %s and PodTemplateOverrides", trainJob.Namespace, trainJob.Name, customRuntimeCTRName)
+
+	return trainJob
+}
+
 func findSpecificRuntime(test Test) string {
 	runtimes, err := test.Client().Trainer().TrainerV1alpha1().ClusterTrainingRuntimes().List(test.Ctx(), metav1.ListOptions{})
 	test.Expect(err).NotTo(HaveOccurred(), "Failed to list ClusterTrainingRuntimes")
@@ -495,24 +653,26 @@ func findSpecificRuntime(test Test) string {
 	return specificRuntimes[0]
 }
 
-// storeSpecificRuntimeInConfigMap stores the specific runtime name for post-upgrade verification
-func storeSpecificRuntimeInConfigMap(test Test, runtimeName string) {
+func addResourceBaseline(test Test, data map[string]string, genKey, specKey string, generation int64, spec interface{}) {
+	specJSON, err := json.Marshal(spec)
+	test.Expect(err).NotTo(HaveOccurred())
+	data[genKey] = fmt.Sprintf("%d", generation)
+	data[specKey] = string(specJSON)
+}
+
+func storeUpgradeBaseline(test Test, namespace, configMapName string, data map[string]string) {
+	data[rhoaiVersionKey] = GetRHOAIVersionFromDSCI(test)
 	configMap := &corev1.ConfigMap{
 		ObjectMeta: metav1.ObjectMeta{
-			Name:      specificRuntimeConfigMapName,
-			Namespace: specificRuntimeNamespaceName,
-		},
-		Data: map[string]string{
-			specificRuntimeConfigMapKey: runtimeName,
+			Name:      configMapName,
+			Namespace: namespace,
 		},
+		Data: data,
 	}
-
-	// Delete existing ConfigMap if present
-	_ = test.Client().Core().CoreV1().ConfigMaps(specificRuntimeNamespaceName).Delete(test.Ctx(), specificRuntimeConfigMapName, metav1.DeleteOptions{})
-
-	_, err := test.Client().Core().CoreV1().ConfigMaps(specificRuntimeNamespaceName).Create(test.Ctx(), configMap, metav1.CreateOptions{})
-	test.Expect(err).NotTo(HaveOccurred(), "Failed to create ConfigMap for specific runtime")
-	test.T().Logf("Stored specific runtime name in ConfigMap %s/%s: %s", specificRuntimeNamespaceName, specificRuntimeConfigMapName, runtimeName)
+	_ = test.Client().Core().CoreV1().ConfigMaps(namespace).Delete(test.Ctx(), configMapName, metav1.DeleteOptions{})
+	_, err := test.Client().Core().CoreV1().ConfigMaps(namespace).Create(test.Ctx(), configMap, metav1.CreateOptions{})
+	test.Expect(err).NotTo(HaveOccurred())
+	test.T().Logf("Stored upgrade baseline in ConfigMap %s/%s", namespace, configMapName)
 }
 
 // getSpecificRuntimeFromConfigMap retrieves the specific runtime name from ConfigMap
diff --git a/tests/trainer/trainer_trainingruntime_upgrade_test.go b/tests/trainer/trainer_trainingruntime_upgrade_test.go
index 952677466..a7f5086b5 100644
--- a/tests/trainer/trainer_trainingruntime_upgrade_test.go
+++ b/tests/trainer/trainer_trainingruntime_upgrade_test.go
@@ -35,6 +35,9 @@ import (
 var (
 	runtimeNamespaceName = "test-trainer-upgrade-runtime"
 	customRuntimeName    = "custom-sleep-runtime"
+	runtimeConfigMapName = "runtime-upgrade-baseline"
+	runtimeGenerationKey = "runtime-generation"
+	runtimeSpecKey       = "runtime-spec"
 )
 
 func TestSetupTrainingRuntime(t *testing.T) {
@@ -53,6 +56,11 @@ func TestSetupTrainingRuntime(t *testing.T) {
 	test.Expect(err).NotTo(HaveOccurred())
 	test.Expect(runtime.Name).To(Equal(customRuntimeName))
 	test.T().Logf("Custom TrainingRuntime %s/%s created successfully", runtimeNamespaceName, customRuntimeName)
+
+	// Store baseline for post-upgrade verification
+	data := map[string]string{}
+	addResourceBaseline(test, data, runtimeGenerationKey, runtimeSpecKey, runtime.Generation, runtime.Spec)
+	storeUpgradeBaseline(test, runtimeNamespaceName, runtimeConfigMapName, data)
 }
 
 func TestVerifyTrainingRuntime(t *testing.T) {
@@ -75,6 +83,18 @@ func TestVerifyTrainingRuntime(t *testing.T) {
 	test.Expect(runtimeNames).To(ContainElement(customRuntimeName),
 		"Custom TrainingRuntime should exist after upgrade. Found runtimes: %v", runtimeNames)
 	test.T().Logf("TrainingRuntime %s/%s is preserved after upgrade", runtimeNamespaceName, customRuntimeName)
+
+	// Check spec integrity across upgrade
+	runtime, err := test.Client().Trainer().TrainerV1alpha1().TrainingRuntimes(runtimeNamespaceName).Get(
+		test.Ctx(), customRuntimeName, metav1.GetOptions{})
+	test.Expect(err).NotTo(HaveOccurred())
+
+	configMap, err := test.Client().Core().CoreV1().ConfigMaps(runtimeNamespaceName).Get(
+		test.Ctx(), runtimeConfigMapName, metav1.GetOptions{})
+	test.Expect(err).NotTo(HaveOccurred(), "Baseline ConfigMap should exist")
+
+	verifySpecIntegrity(test, "TrainingRuntime", runtime.Generation, runtime.Spec,
+		configMap, runtimeGenerationKey, runtimeSpecKey)
 }
 
 func createCustomTrainingRuntime(test Test, namespace string) *trainerv1alpha1.TrainingRuntime {
diff --git a/tests/trainer/utils/utils_upgrade.go b/tests/trainer/utils/utils_upgrade.go
new file mode 100644
index 000000000..e8c5b6e1e
--- /dev/null
+++ b/tests/trainer/utils/utils_upgrade.go
@@ -0,0 +1,50 @@
+/*
+Copyright 2026.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package trainer
+
+import "strings"
+
+// specMutationExpectedPaths lists from→to minor version pairs where Trainer API changes
+// are known to mutate existing TrainJob/TrainingRuntime specs during upgrade.
+var specMutationExpectedPaths = [][2]string{
+	// kubeflow/trainer#3309: PodTemplateOverrides → RuntimePatches
+	//	{"3.4", "3.5"},
+}
+
+// IsSpecMutationExpected returns true if the upgrade path from→to is known to mutate specs.
+// Versions are compared by major.minor only.
+func IsSpecMutationExpected(fromVersion, toVersion string) bool {
+	fromMinor := majorMinor(fromVersion)
+	toMinor := majorMinor(toVersion)
+	if fromMinor == "" || toMinor == "" {
+		return false
+	}
+	for _, pair := range specMutationExpectedPaths {
+		if pair[0] == fromMinor && pair[1] == toMinor {
+			return true
+		}
+	}
+	return false
+}
+
+func majorMinor(version string) string {
+	parts := strings.SplitN(version, ".", 3)
+	if len(parts) < 2 {
+		return ""
+	}
+	return parts[0] + "." + parts[1]
+}