@@ -24,10 +24,33 @@ func TestPostgres_singleNode(t *testing.T) {
2424 t .Skip ()
2525 }
2626
27- f .Fly (
28- "pg create --org %s --name %s --region %s --initial-cluster-size 1 --vm-size shared-cpu-1x --volume-size 1" ,
29- f .OrgSlug (), appName , f .PrimaryRegion (),
30- )
27+ // Retry pg create up to 3 times due to transient volume provisioning issues
28+ var pgCreateErr error
29+ for attempt := 1 ; attempt <= 3 ; attempt ++ {
30+ result := f .FlyAllowExitFailure (
31+ "pg create --org %s --name %s --region %s --initial-cluster-size 1 --vm-size shared-cpu-1x --volume-size 1" ,
32+ f .OrgSlug (), appName , f .PrimaryRegion (),
33+ )
34+
35+ if result .ExitCode () == 0 {
36+ pgCreateErr = nil
37+ break
38+ }
39+
40+ pgCreateErr = fmt .Errorf ("pg create failed (attempt %d/3): %s" , attempt , result .StdErrString ())
41+
42+ if strings .Contains (result .StdErrString (), "volume not found" ) && attempt < 3 {
43+ f .Logf ("Volume provisioning failed (attempt %d/3), retrying..." , attempt )
44+ f .FlyAllowExitFailure ("apps destroy %s --yes" , appName )
45+ time .Sleep (5 * time .Second )
46+ } else if attempt < 3 {
47+ f .Logf ("pg create failed (attempt %d/3): %v, retrying..." , attempt , result .StdErrString ())
48+ time .Sleep (2 * time .Second )
49+ }
50+ }
51+
52+ require .NoError (f , pgCreateErr , "pg create failed after 3 attempts" )
53+
3154 f .Fly ("status -a %s" , appName )
3255 f .Fly ("config save -a %s" , appName )
3356 f .Fly ("config validate" )
@@ -44,10 +67,33 @@ func TestPostgres_autostart(t *testing.T) {
4467
4568 appName := f .CreateRandomAppName ()
4669
47- f .Fly (
48- "pg create --org %s --name %s --region %s --initial-cluster-size 1 --vm-size %s --volume-size 1" ,
49- f .OrgSlug (), appName , f .PrimaryRegion (), postgresMachineSize ,
50- )
70+ // Retry pg create up to 3 times due to transient volume provisioning issues
71+ var pgCreateErr error
72+ for attempt := 1 ; attempt <= 3 ; attempt ++ {
73+ result := f .FlyAllowExitFailure (
74+ "pg create --org %s --name %s --region %s --initial-cluster-size 1 --vm-size %s --volume-size 1" ,
75+ f .OrgSlug (), appName , f .PrimaryRegion (), postgresMachineSize ,
76+ )
77+
78+ if result .ExitCode () == 0 {
79+ pgCreateErr = nil
80+ break
81+ }
82+
83+ pgCreateErr = fmt .Errorf ("pg create failed (attempt %d/3): %s" , attempt , result .StdErrString ())
84+
85+ if strings .Contains (result .StdErrString (), "volume not found" ) && attempt < 3 {
86+ f .Logf ("Volume provisioning failed (attempt %d/3), retrying..." , attempt )
87+ f .FlyAllowExitFailure ("apps destroy %s --yes" , appName )
88+ time .Sleep (5 * time .Second )
89+ } else if attempt < 3 {
90+ f .Logf ("pg create failed (attempt %d/3): %v, retrying..." , attempt , result .StdErrString ())
91+ time .Sleep (2 * time .Second )
92+ }
93+ }
94+
95+ require .NoError (f , pgCreateErr , "pg create failed after 3 attempts" )
96+
5197 machList := f .MachinesList (appName )
5298 require .Equal (t , 1 , len (machList ), "expected exactly 1 machine after launch" )
5399 firstMachine := machList [0 ]
@@ -58,7 +104,34 @@ func TestPostgres_autostart(t *testing.T) {
58104 }
59105
60106 appName = f .CreateRandomAppName ()
61- f .Fly ("pg create --org %s --name %s --region %s --initial-cluster-size 1 --vm-size shared-cpu-1x --volume-size 1 --autostart" , f .OrgSlug (), appName , f .PrimaryRegion ())
107+
108+ // Retry second pg create
109+ pgCreateErr = nil
110+ for attempt := 1 ; attempt <= 3 ; attempt ++ {
111+ result := f .FlyAllowExitFailure (
112+ "pg create --org %s --name %s --region %s --initial-cluster-size 1 --vm-size shared-cpu-1x --volume-size 1 --autostart" ,
113+ f .OrgSlug (), appName , f .PrimaryRegion (),
114+ )
115+
116+ if result .ExitCode () == 0 {
117+ pgCreateErr = nil
118+ break
119+ }
120+
121+ pgCreateErr = fmt .Errorf ("pg create failed (attempt %d/3): %s" , attempt , result .StdErrString ())
122+
123+ if strings .Contains (result .StdErrString (), "volume not found" ) && attempt < 3 {
124+ f .Logf ("Volume provisioning failed (attempt %d/3), retrying..." , attempt )
125+ f .FlyAllowExitFailure ("apps destroy %s --yes" , appName )
126+ time .Sleep (5 * time .Second )
127+ } else if attempt < 3 {
128+ f .Logf ("pg create failed (attempt %d/3): %v, retrying..." , attempt , result .StdErrString ())
129+ time .Sleep (2 * time .Second )
130+ }
131+ }
132+
133+ require .NoError (f , pgCreateErr , "pg create failed after 3 attempts" )
134+
62135 machList = f .MachinesList (appName )
63136 require .Equal (t , 1 , len (machList ), "expected exactly 1 machine after launch" )
64137 firstMachine = machList [0 ]
@@ -95,7 +168,33 @@ func TestPostgres_FlexFailover(t *testing.T) {
95168 return ""
96169 }
97170
98- f .Fly ("pg create --flex --org %s --name %s --region %s --initial-cluster-size 3 --vm-size shared-cpu-1x --volume-size 1" , f .OrgSlug (), appName , f .PrimaryRegion ())
171+ // Retry pg create up to 3 times due to transient volume provisioning issues
172+ var pgCreateErr error
173+ for attempt := 1 ; attempt <= 3 ; attempt ++ {
174+ result := f .FlyAllowExitFailure (
175+ "pg create --flex --org %s --name %s --region %s --initial-cluster-size 3 --vm-size shared-cpu-1x --volume-size 1" ,
176+ f .OrgSlug (), appName , f .PrimaryRegion (),
177+ )
178+
179+ if result .ExitCode () == 0 {
180+ pgCreateErr = nil
181+ break
182+ }
183+
184+ pgCreateErr = fmt .Errorf ("pg create failed (attempt %d/3): %s" , attempt , result .StdErrString ())
185+
186+ if strings .Contains (result .StdErrString (), "volume not found" ) && attempt < 3 {
187+ f .Logf ("Volume provisioning failed (attempt %d/3), retrying..." , attempt )
188+ f .FlyAllowExitFailure ("apps destroy %s --yes" , appName )
189+ time .Sleep (5 * time .Second )
190+ } else if attempt < 3 {
191+ f .Logf ("pg create failed (attempt %d/3): %v, retrying..." , attempt , result .StdErrString ())
192+ time .Sleep (2 * time .Second )
193+ }
194+ }
195+
196+ require .NoError (f , pgCreateErr , "pg create failed after 3 attempts" )
197+
99198 machList := f .MachinesList (appName )
100199 leaderMachineID := findLeaderID (machList )
101200 if leaderMachineID == "" {
@@ -119,7 +218,37 @@ func TestPostgres_NoMachines(t *testing.T) {
119218
120219 appName := f .CreateRandomAppName ()
121220
122- f .Fly ("pg create --org %s --name %s --region %s --initial-cluster-size 1 --vm-size shared-cpu-1x --volume-size 1" , f .OrgSlug (), appName , f .PrimaryRegion ())
221+ // Retry pg create up to 3 times due to transient volume provisioning issues
222+ var pgCreateErr error
223+ for attempt := 1 ; attempt <= 3 ; attempt ++ {
224+ result := f .FlyAllowExitFailure (
225+ "pg create --org %s --name %s --region %s --initial-cluster-size 1 --vm-size shared-cpu-1x --volume-size 1" ,
226+ f .OrgSlug (), appName , f .PrimaryRegion (),
227+ )
228+
229+ if result .ExitCode () == 0 {
230+ // Success!
231+ pgCreateErr = nil
232+ break
233+ }
234+
235+ pgCreateErr = fmt .Errorf ("pg create failed (attempt %d/3): %s" , attempt , result .StdErrString ())
236+
237+ // If this was a volume-related error and we have retries left, clean up and retry
238+ if strings .Contains (result .StdErrString (), "volume not found" ) && attempt < 3 {
239+ f .Logf ("Volume provisioning failed (attempt %d/3), retrying..." , attempt )
240+ // Clean up the partially created app before retrying
241+ f .FlyAllowExitFailure ("apps destroy %s --yes" , appName )
242+ time .Sleep (5 * time .Second ) // Give the platform time to clean up
243+ } else if attempt < 3 {
244+ // Other error, still retry but don't clean up
245+ f .Logf ("pg create failed (attempt %d/3): %v, retrying..." , attempt , result .StdErrString ())
246+ time .Sleep (2 * time .Second )
247+ }
248+ }
249+
250+ require .NoError (f , pgCreateErr , "pg create failed after 3 attempts" )
251+
123252 machList := f .MachinesList (appName )
124253 require .Equal (t , 1 , len (machList ), "expected exactly 1 machine after launch" )
125254 firstMachine := machList [0 ]
@@ -213,14 +342,59 @@ func TestPostgres_ImportSuccess(t *testing.T) {
213342 firstAppName := f .CreateRandomAppName ()
214343 secondAppName := f .CreateRandomAppName ()
215344
216- f .Fly (
217- "pg create --org %s --name %s --region %s --initial-cluster-size 1 --vm-size %s --volume-size 1 --password x" ,
218- f .OrgSlug (), firstAppName , f .PrimaryRegion (), postgresMachineSize ,
219- )
220- f .Fly (
221- "pg create --org %s --name %s --region %s --initial-cluster-size 1 --vm-size %s --volume-size 1" ,
222- f .OrgSlug (), secondAppName , f .PrimaryRegion (), postgresMachineSize ,
223- )
345+ // Retry first pg create up to 3 times due to transient volume provisioning issues
346+ var pgCreateErr error
347+ for attempt := 1 ; attempt <= 3 ; attempt ++ {
348+ result := f .FlyAllowExitFailure (
349+ "pg create --org %s --name %s --region %s --initial-cluster-size 1 --vm-size %s --volume-size 1 --password x" ,
350+ f .OrgSlug (), firstAppName , f .PrimaryRegion (), postgresMachineSize ,
351+ )
352+
353+ if result .ExitCode () == 0 {
354+ pgCreateErr = nil
355+ break
356+ }
357+
358+ pgCreateErr = fmt .Errorf ("pg create failed (attempt %d/3): %s" , attempt , result .StdErrString ())
359+
360+ if strings .Contains (result .StdErrString (), "volume not found" ) && attempt < 3 {
361+ f .Logf ("Volume provisioning failed (attempt %d/3), retrying..." , attempt )
362+ f .FlyAllowExitFailure ("apps destroy %s --yes" , firstAppName )
363+ time .Sleep (5 * time .Second )
364+ } else if attempt < 3 {
365+ f .Logf ("pg create failed (attempt %d/3): %v, retrying..." , attempt , result .StdErrString ())
366+ time .Sleep (2 * time .Second )
367+ }
368+ }
369+
370+ require .NoError (f , pgCreateErr , "pg create failed after 3 attempts" )
371+
372+ // Retry second pg create
373+ pgCreateErr = nil
374+ for attempt := 1 ; attempt <= 3 ; attempt ++ {
375+ result := f .FlyAllowExitFailure (
376+ "pg create --org %s --name %s --region %s --initial-cluster-size 1 --vm-size %s --volume-size 1" ,
377+ f .OrgSlug (), secondAppName , f .PrimaryRegion (), postgresMachineSize ,
378+ )
379+
380+ if result .ExitCode () == 0 {
381+ pgCreateErr = nil
382+ break
383+ }
384+
385+ pgCreateErr = fmt .Errorf ("pg create failed (attempt %d/3): %s" , attempt , result .StdErrString ())
386+
387+ if strings .Contains (result .StdErrString (), "volume not found" ) && attempt < 3 {
388+ f .Logf ("Volume provisioning failed (attempt %d/3), retrying..." , attempt )
389+ f .FlyAllowExitFailure ("apps destroy %s --yes" , secondAppName )
390+ time .Sleep (5 * time .Second )
391+ } else if attempt < 3 {
392+ f .Logf ("pg create failed (attempt %d/3): %v, retrying..." , attempt , result .StdErrString ())
393+ time .Sleep (2 * time .Second )
394+ }
395+ }
396+
397+ require .NoError (f , pgCreateErr , "pg create failed after 3 attempts" )
224398 assert .EventuallyWithT (t , func (t * assert.CollectT ) {
225399 assertPostgresIsUp (t , f , firstAppName )
226400 }, 1 * time .Minute , 10 * time .Second )
@@ -265,10 +439,32 @@ func TestPostgres_ImportFailure(t *testing.T) {
265439
266440 appName := f .CreateRandomAppName ()
267441
268- f .Fly (
269- "pg create --org %s --name %s --region %s --initial-cluster-size 1 --vm-size %s --volume-size 1 --password x" ,
270- f .OrgSlug (), appName , f .PrimaryRegion (), postgresMachineSize ,
271- )
442+ // Retry pg create up to 3 times due to transient volume provisioning issues
443+ var pgCreateErr error
444+ for attempt := 1 ; attempt <= 3 ; attempt ++ {
445+ result := f .FlyAllowExitFailure (
446+ "pg create --org %s --name %s --region %s --initial-cluster-size 1 --vm-size %s --volume-size 1 --password x" ,
447+ f .OrgSlug (), appName , f .PrimaryRegion (), postgresMachineSize ,
448+ )
449+
450+ if result .ExitCode () == 0 {
451+ pgCreateErr = nil
452+ break
453+ }
454+
455+ pgCreateErr = fmt .Errorf ("pg create failed (attempt %d/3): %s" , attempt , result .StdErrString ())
456+
457+ if strings .Contains (result .StdErrString (), "volume not found" ) && attempt < 3 {
458+ f .Logf ("Volume provisioning failed (attempt %d/3), retrying..." , attempt )
459+ f .FlyAllowExitFailure ("apps destroy %s --yes" , appName )
460+ time .Sleep (5 * time .Second )
461+ } else if attempt < 3 {
462+ f .Logf ("pg create failed (attempt %d/3): %v, retrying..." , attempt , result .StdErrString ())
463+ time .Sleep (2 * time .Second )
464+ }
465+ }
466+
467+ require .NoError (f , pgCreateErr , "pg create failed after 3 attempts" )
272468 assert .EventuallyWithT (t , func (t * assert.CollectT ) {
273469 assertPostgresIsUp (t , f , appName )
274470 }, 1 * time .Minute , 10 * time .Second )
0 commit comments