Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add better storage workflow and Availability Zone support #206

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions bicep/amlfs.bicep
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
targetScope = 'resourceGroup'
import {tags_t} from './types.bicep'
import {tags_t, availabilityZone_t} from './types.bicep'

param location string
param tags tags_t
Expand All @@ -21,6 +21,7 @@ The step sizes are dependent on the SKU.
''')
param capacity int
param infrastructureOnly bool = false
param availabilityZone availabilityZone_t[] = []

resource fileSystem 'Microsoft.StorageCache/amlFileSystems@2024-03-01' = if (!infrastructureOnly){
name: '${name}-${uniqueString(resourceGroup().id,deployment().name)}'
Expand All @@ -29,7 +30,7 @@ resource fileSystem 'Microsoft.StorageCache/amlFileSystems@2024-03-01' = if (!in
sku: {
name: sku
}
zones: [ '1' ]
zones: availabilityZone
properties: {
storageCapacityTiB: capacity
filesystemSubnet: subnetId
Expand All @@ -45,4 +46,4 @@ output ipAddress string = infrastructureOnly ? '' : fileSystem.properties.client
// TODO we are fighting the chef cookbooks here by adding tcp:/lustrefs, as it simply prepends all paths
// with tcp:/lustrefs
output exportPath string = '' //what should our placeholder be for new amlfs??
output mountOptions string = ''
output mountOptions string = 'noatime,user_xattr'
4 changes: 3 additions & 1 deletion bicep/anf.bicep
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
targetScope = 'resourceGroup'
import {tags_t} from './types.bicep'
import {tags_t, availabilityZone_t} from './types.bicep'

param name string
param location string
param tags tags_t
param availabilityZone availabilityZone_t[] = []
param resourcePostfix string = uniqueString(resourceGroup().id)
param subnetId string
param serviceLevel string
Expand Down Expand Up @@ -32,6 +33,7 @@ resource anfVolume 'Microsoft.NetApp/netAppAccounts/capacityPools/volumes@2024-0
location: location
tags: tags
parent: anfPool
zones: length(availabilityZone) == 0 ? null : availabilityZone
properties: {
unixPermissions: '0755'
creationToken: '${name}-path'
Expand Down
27 changes: 16 additions & 11 deletions bicep/ccw.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -208,21 +208,24 @@ module mySQLccw './mysql.bicep' = if (create_database) {
}
}

module ccwAMLFS 'amlfs.bicep' = if (additionalFilesystem.type == 'aml-new') {
name: 'ccwAMLFS-additional'
module ccwAMLFS 'amlfs.bicep' = [
for amlfs in items({ home: sharedFilesystem, additional: additionalFilesystem }): if (amlfs.value.type == 'aml-new') {
name: 'ccwAMLFS-${amlfs.key}'
params: {
location: location
tags: getTags('Microsoft.StorageCache/amlFileSystems', tags)
name: 'ccw-lustre'
subnetId: subnets.?additional.id ?? ''
sku: additionalFilesystem.?lustreTier
capacity: additionalFilesystem.?lustreCapacityInTib
subnetId: subnets[amlfs.key].id
sku: amlfs.value.?lustreTier
capacity: amlfs.value.?lustreCapacityInTib
availabilityZone: amlfs.value.?availabilityZone
infrastructureOnly: infrastructureOnly
}
dependsOn: [
ccwNetwork
]
}
]

module ccwANFAccount 'anf-account.bicep' = if((sharedFilesystem.type == 'anf-new' || additionalFilesystem.type == 'anf-new') && !infrastructureOnly) {
name: 'ccwANFAccount'
Expand All @@ -242,6 +245,7 @@ module ccwANF 'anf.bicep' = [
serviceLevel: filer.value.anfServiceTier
sizeTiB: filer.value.anfCapacityInTiB
defaultMountOptions: anfDefaultMountOptions
availabilityZone: filer.value.?availabilityZone
infrastructureOnly: infrastructureOnly
}
dependsOn: [
Expand Down Expand Up @@ -282,24 +286,24 @@ output filerInfoFinal types.filerInfo_t = {
home: {
type: sharedFilesystem.type
nfsCapacityInGb: sharedFilesystem.?nfsCapacityInGb ?? -1
ipAddress: sharedFilesystem.type == 'anf-new' ? ccwANF[1].outputs.ipAddress : sharedFilesystem.?ipAddress ?? ''
exportPath: sharedFilesystem.type == 'anf-new' ? ccwANF[1].outputs.exportPath : sharedFilesystem.?exportPath ?? ''
ipAddress: sharedFilesystem.type == 'anf-new' ? ccwANF[1].outputs.ipAddress : (sharedFilesystem.type == 'aml-new' ? ccwAMLFS[1].outputs.ipAddress : sharedFilesystem.?ipAddress ?? '')
exportPath: sharedFilesystem.type == 'anf-new' ? ccwANF[1].outputs.exportPath : (sharedFilesystem.type == 'aml-new' ? ccwAMLFS[1].outputs.exportPath : sharedFilesystem.?exportPath ?? '')
mountOptions: sharedFilesystem.type == 'anf-new'
? ccwANF[1].outputs.mountOptions
: sharedFilesystem.?mountOptions ?? ''
: (sharedFilesystem.type == 'aml-new' ? ccwAMLFS[1].outputs.mountOptions : sharedFilesystem.?mountOptions ?? '')
mountPath: '/shared'
}
additional: {
type: additionalFilesystem.type
ipAddress: additionalFilesystem.type == 'anf-new'
? ccwANF[0].outputs.ipAddress
: additionalFilesystem.type == 'aml-new' ? ccwAMLFS.outputs.ipAddress : additionalFilesystem.?ipAddress ?? ''
: (sharedFilesystem.type == 'aml-new' ? ccwAMLFS[0].outputs.ipAddress : additionalFilesystem.?ipAddress ?? '')
exportPath: additionalFilesystem.type == 'anf-new'
? ccwANF[0].outputs.exportPath
:additionalFilesystem.?exportPath ?? ''
: (sharedFilesystem.type == 'aml-new' ? ccwAMLFS[0].outputs.exportPath : additionalFilesystem.?exportPath ?? '')
mountOptions: additionalFilesystem.type == 'anf-new'
? ccwANF[0].outputs.mountOptions
: additionalFilesystem.?mountOptions ?? ''
: (sharedFilesystem.type == 'aml-new' ? ccwAMLFS[0].outputs.mountOptions : additionalFilesystem.?mountOptions ?? '')
mountPath: additionalFilesystem.?mountPath ?? ''
}
}
Expand Down Expand Up @@ -350,6 +354,7 @@ output partitions types.partitions_t = {
maxNodes: htc.maxNodes
osImage: htc.osImage
useSpot: htc.?useSpot ?? false
availabilityZone: htc.availabilityZone
}
hpc: hpc
gpu: gpu
Expand Down
5 changes: 4 additions & 1 deletion bicep/files-to-load/create_cc_param.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,19 @@ def set_slurm_params(params, dbPassword, outputs):
params['MaxHTCExecuteNodeCount'] = int(outputs['partitions']['value']['htc']['maxNodes'])
params['HTCImageName'] = outputs['partitions']['value']['htc']['osImage']
params['HTCUseLowPrio'] = outputs['partitions']['value']['htc']['useSpot']

params['HTCAvailabilityZone'] = outputs['partitions']['value']['htc']['availabilityZone']

#HPC
params['HPCMachineType'] = outputs['partitions']['value']['hpc']['sku']
params['MaxHPCExecuteNodeCount'] = int(outputs['partitions']['value']['hpc']['maxNodes'])
params['HPCImageName'] = outputs['partitions']['value']['hpc']['osImage']
params['HPCAvailabilityZone'] = outputs['partitions']['value']['hpc']['availabilityZone']

#GPU
params['GPUMachineType'] = outputs['partitions']['value']['gpu']['sku']
params['MaxGPUExecuteNodeCount'] = int(outputs['partitions']['value']['gpu']['maxNodes'])
params['GPUImageName'] = outputs['partitions']['value']['gpu']['osImage']
params['GPUAvailabilityZone'] = outputs['partitions']['value']['gpu']['availabilityZone']

#scheduler node
#params['slurm'] #is this the slurm version??? no, so what is it?
Expand Down
7 changes: 4 additions & 3 deletions bicep/network-new.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ param additionalFilesystem types.additionalFilesystem_t
var filerTypes = [sharedFilesystem.type, additionalFilesystem.type]
var create_anf = contains(filerTypes, 'anf-new')
var create_anf_subnet = create_anf ? (sharedFilesystem.type == 'anf-new' ? network.?sharedFilerSubnet : network.?additionalFilerSubnet) : null
var create_lustre = additionalFilesystem.type == 'aml-new'
var create_lustre = contains(filerTypes, 'aml-new')
var create_lustre_subnet = create_lustre ? (sharedFilesystem.type == 'aml-new' ? network.?sharedFilerSubnet : network.?additionalFilerSubnet) : null
var deploy_bastion = network.?bastion ?? false
var create_database = false //update once MySQL capacity is available
param natGatewayId string
Expand Down Expand Up @@ -115,7 +116,7 @@ var vnet = {
} : {},
create_lustre ? {
lustre: {
name: network.?additionalFilerSubnet ?? 'ccw-lustre-subnet'
name: create_lustre_subnet ?? 'ccw-lustre-subnet'
cidr: subnet_cidr.lustre
nat_gateway : false
service_endpoints: []
Expand Down Expand Up @@ -396,7 +397,7 @@ var subnet_database = create_database ? rsc_output(subnetDatabase) : {}

var filerTypeHome = sharedFilesystem.type
var filerTypeAddl = additionalFilesystem.type
var output_home_subnet = filerTypeHome == 'anf-new'
var output_home_subnet = filerTypeHome == 'anf-new' || filerTypeHome == 'aml-new'
var output_addl_subnet = contains(['aml-new','anf-new'],filerTypeAddl)
var home_filer = output_home_subnet ? (filerTypeHome == 'anf-new' ? { home: subnet_netapp } : { home: subnet_lustre }) : {}
var addl_filer = output_addl_subnet ? (filerTypeAddl == 'anf-new' ? { additional: subnet_netapp } : { additional: subnet_lustre }) : {}
Expand Down
24 changes: 23 additions & 1 deletion bicep/types.bicep
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
@export()
type availabilityZone_t = '1' | '2' | '3'

type shared_nfs_new_t = {
type: 'nfs-new'
nfsCapacityInGb: int
Expand All @@ -14,17 +17,32 @@ type shared_anf_new_t = {
type: 'anf-new'
anfServiceTier: string
anfCapacityInTiB: int
availabilityZone: availabilityZone_t[]?
}

type shared_aml_new_t = {
type: 'aml-new'
lustreTier: string
lustreCapacityInTib: int
availabilityZone: availabilityZone_t[]?
}

type shared_aml_existing_t = {
type: 'aml-existing'
ipAddress: string
mountOptions: string?
}

@discriminator('type')
@export()
type sharedFilesystem_t = shared_nfs_new_t | shared_nfs_existing_t | shared_anf_new_t
type sharedFilesystem_t = shared_nfs_new_t | shared_nfs_existing_t | shared_anf_new_t | shared_aml_new_t | shared_aml_existing_t

type additional_anf_new_t = {
type: 'anf-new'
anfServiceTier: string
anfCapacityInTiB: int
mountPath: string
availabilityZone: availabilityZone_t[]?
}

type additional_nfs_existing_t = {
Expand All @@ -40,6 +58,7 @@ type additional_aml_new_t = {
lustreTier: string
lustreCapacityInTib: int
mountPath: string
availabilityZone: availabilityZone_t[]?
}

type additional_aml_existing_t = {
Expand Down Expand Up @@ -176,6 +195,7 @@ type htc_t = {
osImage: string
maxNodes: int
useSpot: bool?
availabilityZone: availabilityZone_t[]
}

@export()
Expand All @@ -184,13 +204,15 @@ type htc_output_t = {
osImage: string
maxNodes: int
useSpot: bool
availabilityZone: availabilityZone_t[]
}

@export()
type hpc_t = {
sku: string
osImage: string
maxNodes: int
availabilityZone: availabilityZone_t[]
}

@export()
Expand Down
Loading
Loading