@@ -1569,55 +1569,70 @@ def configure(
15691569 # 3. if ngpus-per-node argument is equal to 0, it will be updated to 1 automatically.
15701570 # ----------------------------------------------------------------------------------------------------------
15711571 max_gpus_per_node = self .get_value ("MAX_GPUS_PER_NODE" )
1572- if gpu_type and str (gpu_type ).lower () != "none" :
1573- expect (
1574- max_gpus_per_node ,
1575- f"GPUS are not defined for machine={ machine_name } and compiler={ compiler } " ,
1576- )
1577- expect (
1578- gpu_offload ,
1579- "Both gpu-type and gpu-offload must be defined if either is defined" ,
1580- )
1581- expect (
1582- compiler in ["nvhpc" , "cray" ],
1583- f"Only nvhpc and cray compilers are expected for a GPU run; the user given compiler is { compiler } , " ,
1584- )
1585- valid_gpu_type = self .get_value ("GPU_TYPE" ).split ("," )
1586- valid_gpu_type .remove ("none" )
1587- expect (
1588- gpu_type in valid_gpu_type ,
1589- f"Unsupported GPU type is given: { gpu_type } ; valid values are { valid_gpu_type } " ,
1590- )
1591- valid_gpu_offload = self .get_value ("GPU_OFFLOAD" ).split ("," )
1592- valid_gpu_offload .remove ("none" )
1593- expect (
1594- gpu_offload in valid_gpu_offload ,
1595- f"Unsupported GPU programming model is given: { gpu_offload } ; valid values are { valid_gpu_offload } " ,
1596- )
1597- self .gpu_enabled = True
1598- if ngpus_per_node >= 0 :
1599- self .set_value (
1600- "NGPUS_PER_NODE" ,
1601- max (1 , ngpus_per_node )
1602- if ngpus_per_node <= max_gpus_per_node
1603- else max_gpus_per_node ,
1604- )
1605- elif gpu_offload and str (gpu_offload ).lower () != "none" :
1606- expect (
1607- False ,
1608- "Both gpu-type and gpu-offload must be defined if either is defined" ,
1609- )
1610- elif ngpus_per_node != 0 :
1611- expect (
1612- False ,
1613- f"ngpus_per_node is expected to be 0 for a pure CPU run ; { ngpus_per_node } is provided instead ;" ,
1614- )
1615-
16161572 # Set these two GPU XML variables here to overwrite the default values
16171573 # Only set them for "cesm" model
16181574 if self ._cime_model == "cesm" :
1575+ if gpu_type and str (gpu_type ).lower () != "none" :
1576+ expect (
1577+ max_gpus_per_node ,
1578+ f"GPUS are not defined for machine={ machine_name } and compiler={ compiler } " ,
1579+ )
1580+ expect (
1581+ gpu_offload ,
1582+ "Both gpu-type and gpu-offload must be defined if either is defined" ,
1583+ )
1584+ expect (
1585+ compiler in ["nvhpc" , "cray" ],
1586+ f"Only nvhpc and cray compilers are expected for a GPU run; the user given compiler is { compiler } , " ,
1587+ )
1588+ valid_gpu_type = self .get_value ("GPU_TYPE" ).split ("," )
1589+ valid_gpu_type .remove ("none" )
1590+ expect (
1591+ gpu_type in valid_gpu_type ,
1592+ f"Unsupported GPU type is given: { gpu_type } ; valid values are { valid_gpu_type } " ,
1593+ )
1594+ valid_gpu_offload = self .get_value ("GPU_OFFLOAD" ).split ("," )
1595+ valid_gpu_offload .remove ("none" )
1596+ expect (
1597+ gpu_offload in valid_gpu_offload ,
1598+ f"Unsupported GPU programming model is given: { gpu_offload } ; valid values are { valid_gpu_offload } " ,
1599+ )
1600+ self .gpu_enabled = True
1601+ if ngpus_per_node >= 0 :
1602+ self .set_value (
1603+ "NGPUS_PER_NODE" ,
1604+ max (1 , ngpus_per_node )
1605+ if ngpus_per_node <= max_gpus_per_node
1606+ else max_gpus_per_node ,
1607+ )
1608+ elif gpu_offload and str (gpu_offload ).lower () != "none" :
1609+ expect (
1610+ False ,
1611+ "Both gpu-type and gpu-offload must be defined if either is defined" ,
1612+ )
1613+ elif ngpus_per_node != 0 :
1614+ expect (
1615+ False ,
1616+ f"ngpus_per_node is expected to be 0 for a pure CPU run ; { ngpus_per_node } is provided instead ;" ,
1617+ )
16191618 self .set_value ("GPU_TYPE" , str (gpu_type ).lower ())
16201619 self .set_value ("GPU_OFFLOAD" , str (gpu_offload ).lower ())
1620+ else :
1621+ # Assume it is SCREAM or E3SM
1622+ if "gpu" not in compiler .lower ():
1623+ expect (
1624+ ngpus_per_node == 0 ,
1625+ f"ngpus_per_node is expected to be 0 for a pure CPU run; { ngpus_per_node } is provided instead;" ,
1626+ )
1627+ else :
1628+ self .gpu_enabled = True
1629+ if ngpus_per_node >= 0 :
1630+ self .set_value (
1631+ "NGPUS_PER_NODE" ,
1632+ max (1 , ngpus_per_node )
1633+ if ngpus_per_node <= max_gpus_per_node
1634+ else max_gpus_per_node ,
1635+ )
16211636
16221637 self .initialize_derived_attributes ()
16231638
0 commit comments