|
| 1 | +<# |
| 2 | +.SYNOPSIS |
| 3 | + Sets up the Azure AI Search index for Health Plan Chat. |
| 4 | +
|
| 5 | +.DESCRIPTION |
| 6 | + Creates the 'plan-materials' index with: |
| 7 | + - Vector search configuration (HNSW algorithm) |
| 8 | + - Semantic search configuration |
| 9 | + - Fields for plan document content and embeddings |
| 10 | +
|
| 11 | + Also creates the data source, skillset (with embedding), and indexer. |
| 12 | +
|
| 13 | +.PARAMETER SearchServiceName |
| 14 | + Name of the Azure AI Search service. |
| 15 | +
|
| 16 | +.PARAMETER ResourceGroupName |
| 17 | + Name of the resource group containing the search service. |
| 18 | +
|
| 19 | +.PARAMETER StorageAccountName |
| 20 | + Name of the storage account containing plan materials. |
| 21 | +
|
| 22 | +.PARAMETER FoundryEndpoint |
| 23 | + Azure AI Foundry endpoint for embedding model. |
| 24 | +
|
| 25 | +.EXAMPLE |
| 26 | + ./setup-search-index.ps1 -SearchServiceName "srch-healthplanchat-demo-abc123" -ResourceGroupName "rg-healthplanchat-demo" -StorageAccountName "sthpcdemoabc123" -FoundryEndpoint "https://aif-healthplanchat-demo-abc123.cognitiveservices.azure.com" |
| 27 | +#> |
| 28 | + |
| 29 | +param( |
| 30 | + [Parameter(Mandatory = $true)] |
| 31 | + [string]$SearchServiceName, |
| 32 | + |
| 33 | + [Parameter(Mandatory = $true)] |
| 34 | + [string]$ResourceGroupName, |
| 35 | + |
| 36 | + [Parameter(Mandatory = $true)] |
| 37 | + [string]$StorageAccountName, |
| 38 | + |
| 39 | + [Parameter(Mandatory = $true)] |
| 40 | + [string]$FoundryEndpoint |
| 41 | +) |
| 42 | + |
| 43 | +$ErrorActionPreference = "Stop" |
| 44 | + |
| 45 | +Write-Host "Setting up Azure AI Search index for Health Plan Chat..." -ForegroundColor Cyan |
| 46 | + |
| 47 | +# Get access token for Search management |
| 48 | +$searchEndpoint = "https://$SearchServiceName.search.windows.net" |
| 49 | +$token = az account get-access-token --resource https://search.azure.com --query accessToken -o tsv |
| 50 | + |
| 51 | +$headers = @{ |
| 52 | + "Authorization" = "Bearer $token" |
| 53 | + "Content-Type" = "application/json" |
| 54 | + "api-key" = "" # Using AAD auth |
| 55 | +} |
| 56 | + |
| 57 | +# Index definition with vector search |
| 58 | +$indexDefinition = @{ |
| 59 | + name = "plan-materials" |
| 60 | + fields = @( |
| 61 | + @{ name = "id"; type = "Edm.String"; key = $true; searchable = $false; filterable = $true } |
| 62 | + @{ name = "content"; type = "Edm.String"; searchable = $true; analyzer = "en.microsoft" } |
| 63 | + @{ name = "title"; type = "Edm.String"; searchable = $true; analyzer = "en.microsoft" } |
| 64 | + @{ name = "planId"; type = "Edm.String"; searchable = $false; filterable = $true; facetable = $true } |
| 65 | + @{ name = "planType"; type = "Edm.String"; searchable = $false; filterable = $true; facetable = $true } |
| 66 | + @{ name = "section"; type = "Edm.String"; searchable = $true; filterable = $true } |
| 67 | + @{ name = "metadata_storage_path"; type = "Edm.String"; searchable = $false; filterable = $false } |
| 68 | + @{ name = "contentVector"; type = "Collection(Edm.Single)"; searchable = $true; dimensions = 1536; vectorSearchProfile = "vector-profile" } |
| 69 | + ) |
| 70 | + vectorSearch = @{ |
| 71 | + algorithms = @( |
| 72 | + @{ |
| 73 | + name = "hnsw-algorithm" |
| 74 | + kind = "hnsw" |
| 75 | + hnswParameters = @{ |
| 76 | + metric = "cosine" |
| 77 | + m = 4 |
| 78 | + efConstruction = 400 |
| 79 | + efSearch = 500 |
| 80 | + } |
| 81 | + } |
| 82 | + ) |
| 83 | + profiles = @( |
| 84 | + @{ |
| 85 | + name = "vector-profile" |
| 86 | + algorithm = "hnsw-algorithm" |
| 87 | + } |
| 88 | + ) |
| 89 | + } |
| 90 | + semantic = @{ |
| 91 | + configurations = @( |
| 92 | + @{ |
| 93 | + name = "plan-semantic-config" |
| 94 | + prioritizedFields = @{ |
| 95 | + titleField = @{ fieldName = "title" } |
| 96 | + contentFields = @( |
| 97 | + @{ fieldName = "content" } |
| 98 | + ) |
| 99 | + keywordsFields = @( |
| 100 | + @{ fieldName = "section" } |
| 101 | + @{ fieldName = "planType" } |
| 102 | + ) |
| 103 | + } |
| 104 | + } |
| 105 | + ) |
| 106 | + } |
| 107 | +} | ConvertTo-Json -Depth 10 |
| 108 | + |
| 109 | +Write-Host "Creating index 'plan-materials'..." -ForegroundColor Yellow |
| 110 | +try { |
| 111 | + $response = Invoke-RestMethod -Uri "$searchEndpoint/indexes/plan-materials?api-version=2024-05-01-preview" ` |
| 112 | + -Method Put ` |
| 113 | + -Headers $headers ` |
| 114 | + -Body $indexDefinition |
| 115 | + Write-Host "Index created successfully." -ForegroundColor Green |
| 116 | +} catch { |
| 117 | + if ($_.Exception.Response.StatusCode -eq 'Conflict') { |
| 118 | + Write-Host "Index already exists, updating..." -ForegroundColor Yellow |
| 119 | + $response = Invoke-RestMethod -Uri "$searchEndpoint/indexes/plan-materials?api-version=2024-05-01-preview" ` |
| 120 | + -Method Put ` |
| 121 | + -Headers $headers ` |
| 122 | + -Body $indexDefinition |
| 123 | + Write-Host "Index updated successfully." -ForegroundColor Green |
| 124 | + } else { |
| 125 | + throw |
| 126 | + } |
| 127 | +} |
| 128 | + |
| 129 | +# Get storage connection string for data source |
| 130 | +Write-Host "Getting storage account key..." -ForegroundColor Yellow |
| 131 | +$storageKey = az storage account keys list --account-name $StorageAccountName --resource-group $ResourceGroupName --query "[0].value" -o tsv |
| 132 | +$storageConnectionString = "DefaultEndpointsProtocol=https;AccountName=$StorageAccountName;AccountKey=$storageKey;EndpointSuffix=core.windows.net" |
| 133 | + |
| 134 | +# Data source definition |
| 135 | +$dataSourceDefinition = @{ |
| 136 | + name = "plan-materials-blob" |
| 137 | + type = "azureblob" |
| 138 | + credentials = @{ |
| 139 | + connectionString = $storageConnectionString |
| 140 | + } |
| 141 | + container = @{ |
| 142 | + name = "plan-materials" |
| 143 | + } |
| 144 | +} | ConvertTo-Json -Depth 5 |
| 145 | + |
| 146 | +Write-Host "Creating data source 'plan-materials-blob'..." -ForegroundColor Yellow |
| 147 | +try { |
| 148 | + $response = Invoke-RestMethod -Uri "$searchEndpoint/datasources/plan-materials-blob?api-version=2024-05-01-preview" ` |
| 149 | + -Method Put ` |
| 150 | + -Headers $headers ` |
| 151 | + -Body $dataSourceDefinition |
| 152 | + Write-Host "Data source created successfully." -ForegroundColor Green |
| 153 | +} catch { |
| 154 | + if ($_.Exception.Response.StatusCode -eq 'Conflict') { |
| 155 | + Write-Host "Data source already exists, updating..." -ForegroundColor Yellow |
| 156 | + $response = Invoke-RestMethod -Uri "$searchEndpoint/datasources/plan-materials-blob?api-version=2024-05-01-preview" ` |
| 157 | + -Method Put ` |
| 158 | + -Headers $headers ` |
| 159 | + -Body $dataSourceDefinition |
| 160 | + Write-Host "Data source updated successfully." -ForegroundColor Green |
| 161 | + } else { |
| 162 | + throw |
| 163 | + } |
| 164 | +} |
| 165 | + |
| 166 | +# Skillset definition with Azure OpenAI embedding |
| 167 | +$skillsetDefinition = @{ |
| 168 | + name = "plan-materials-skillset" |
| 169 | + description = "Skillset for plan materials with text splitting and embedding" |
| 170 | + skills = @( |
| 171 | + @{ |
| 172 | + "@odata.type" = "#Microsoft.Skills.Text.SplitSkill" |
| 173 | + name = "split-skill" |
| 174 | + description = "Split content into chunks" |
| 175 | + context = "/document" |
| 176 | + inputs = @( |
| 177 | + @{ name = "text"; source = "/document/content" } |
| 178 | + ) |
| 179 | + outputs = @( |
| 180 | + @{ name = "textItems"; targetName = "chunks" } |
| 181 | + ) |
| 182 | + textSplitMode = "pages" |
| 183 | + maximumPageLength = 2000 |
| 184 | + pageOverlapLength = 200 |
| 185 | + } |
| 186 | + @{ |
| 187 | + "@odata.type" = "#Microsoft.Skills.Text.AzureOpenAIEmbeddingSkill" |
| 188 | + name = "embedding-skill" |
| 189 | + description = "Generate embeddings for content chunks" |
| 190 | + context = "/document/chunks/*" |
| 191 | + resourceUri = $FoundryEndpoint |
| 192 | + deploymentId = "text-embedding-3-small" |
| 193 | + modelName = "text-embedding-3-small" |
| 194 | + inputs = @( |
| 195 | + @{ name = "text"; source = "/document/chunks/*" } |
| 196 | + ) |
| 197 | + outputs = @( |
| 198 | + @{ name = "embedding"; targetName = "vector" } |
| 199 | + ) |
| 200 | + } |
| 201 | + ) |
| 202 | + indexProjections = @{ |
| 203 | + selectors = @( |
| 204 | + @{ |
| 205 | + targetIndexName = "plan-materials" |
| 206 | + parentKeyFieldName = "id" |
| 207 | + sourceContext = "/document/chunks/*" |
| 208 | + mappings = @( |
| 209 | + @{ name = "content"; source = "/document/chunks/*" } |
| 210 | + @{ name = "contentVector"; source = "/document/chunks/*/vector" } |
| 211 | + @{ name = "title"; source = "/document/metadata_storage_name" } |
| 212 | + @{ name = "metadata_storage_path"; source = "/document/metadata_storage_path" } |
| 213 | + ) |
| 214 | + } |
| 215 | + ) |
| 216 | + parameters = @{ |
| 217 | + projectionMode = "generatedKeyAsId" |
| 218 | + } |
| 219 | + } |
| 220 | +} | ConvertTo-Json -Depth 10 |
| 221 | + |
| 222 | +Write-Host "Creating skillset 'plan-materials-skillset'..." -ForegroundColor Yellow |
| 223 | +try { |
| 224 | + $response = Invoke-RestMethod -Uri "$searchEndpoint/skillsets/plan-materials-skillset?api-version=2024-05-01-preview" ` |
| 225 | + -Method Put ` |
| 226 | + -Headers $headers ` |
| 227 | + -Body $skillsetDefinition |
| 228 | + Write-Host "Skillset created successfully." -ForegroundColor Green |
| 229 | +} catch { |
| 230 | + Write-Host "Warning: Skillset creation failed. Error: $($_.Exception.Message)" -ForegroundColor Yellow |
| 231 | + Write-Host "You may need to configure the skillset manually or check Foundry endpoint permissions." -ForegroundColor Yellow |
| 232 | +} |
| 233 | + |
| 234 | +# Indexer definition |
| 235 | +$indexerDefinition = @{ |
| 236 | + name = "plan-materials-indexer" |
| 237 | + dataSourceName = "plan-materials-blob" |
| 238 | + targetIndexName = "plan-materials" |
| 239 | + skillsetName = "plan-materials-skillset" |
| 240 | + schedule = @{ |
| 241 | + interval = "PT5M" # Every 5 minutes |
| 242 | + } |
| 243 | + parameters = @{ |
| 244 | + configuration = @{ |
| 245 | + parsingMode = "json" |
| 246 | + dataToExtract = "contentAndMetadata" |
| 247 | + } |
| 248 | + } |
| 249 | + fieldMappings = @( |
| 250 | + @{ sourceFieldName = "metadata_storage_path"; targetFieldName = "metadata_storage_path" } |
| 251 | + ) |
| 252 | + outputFieldMappings = @() |
| 253 | +} | ConvertTo-Json -Depth 5 |
| 254 | + |
| 255 | +Write-Host "Creating indexer 'plan-materials-indexer'..." -ForegroundColor Yellow |
| 256 | +try { |
| 257 | + $response = Invoke-RestMethod -Uri "$searchEndpoint/indexers/plan-materials-indexer?api-version=2024-05-01-preview" ` |
| 258 | + -Method Put ` |
| 259 | + -Headers $headers ` |
| 260 | + -Body $indexerDefinition |
| 261 | + Write-Host "Indexer created successfully." -ForegroundColor Green |
| 262 | +} catch { |
| 263 | + Write-Host "Warning: Indexer creation failed. Error: $($_.Exception.Message)" -ForegroundColor Yellow |
| 264 | + Write-Host "The index was created but you may need to populate it manually." -ForegroundColor Yellow |
| 265 | +} |
| 266 | + |
| 267 | +# Run the indexer immediately |
| 268 | +Write-Host "Running indexer..." -ForegroundColor Yellow |
| 269 | +try { |
| 270 | + Invoke-RestMethod -Uri "$searchEndpoint/indexers/plan-materials-indexer/run?api-version=2024-05-01-preview" ` |
| 271 | + -Method Post ` |
| 272 | + -Headers $headers |
| 273 | + Write-Host "Indexer started." -ForegroundColor Green |
| 274 | +} catch { |
| 275 | + Write-Host "Warning: Could not start indexer. Error: $($_.Exception.Message)" -ForegroundColor Yellow |
| 276 | +} |
| 277 | + |
| 278 | +Write-Host "" |
| 279 | +Write-Host "Search index setup complete!" -ForegroundColor Cyan |
| 280 | +Write-Host "Index: plan-materials" -ForegroundColor White |
| 281 | +Write-Host "Data source: plan-materials-blob" -ForegroundColor White |
| 282 | +Write-Host "Skillset: plan-materials-skillset" -ForegroundColor White |
| 283 | +Write-Host "Indexer: plan-materials-indexer" -ForegroundColor White |
0 commit comments