From c57d877d656a571be7317def03ffcc6f7cdc1ffb Mon Sep 17 00:00:00 2001 From: Malory Rose Date: Wed, 2 Apr 2025 17:49:23 -0700 Subject: [PATCH 1/9] update post deployment scripts --- azure.yaml | 8 + infra/main.bicep | 32 ++-- infra/main.json | 222 +++------------------- infra/scripts/copy_kb_files.sh | 12 +- infra/scripts/run_create_index_scripts.sh | 43 +++-- 5 files changed, 79 insertions(+), 238 deletions(-) diff --git a/azure.yaml b/azure.yaml index 705bc041..bf0e143b 100644 --- a/azure.yaml +++ b/azure.yaml @@ -31,6 +31,10 @@ hooks: run: | Write-Host "Web app URL: " Write-Host "$env:WEB_APP_URL" -ForegroundColor Cyan + Write-Host "Run the following command in the terminal to copy data files to storage account: " + Write-Host "$env:copykbfiles" -ForegroundColor Cyan + Write-Host "Run the following command in the terminal to run the scripts: " + Write-Host "$env:createindex" -ForegroundColor Cyan shell: pwsh continueOnError: false interactive: true @@ -38,6 +42,10 @@ hooks: run: | echo "Web app URL: " echo $WEB_APP_URL + echo "Run the following command in the terminal to copy data files to storage account: " + echo $copykbfiles + echo "Run the following command in the terminal to run the scripts: " + echo $createindex shell: sh continueOnError: false interactive: true \ No newline at end of file diff --git a/infra/main.bicep b/infra/main.bicep index aa34e4a5..34b38604 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -424,6 +424,8 @@ module cosmosDBModule 'deploy_cosmos_db.bicep' = { } scope: resourceGroup(resourceGroup().name) } +output copykbfiles string = './infra/scripts/copy_kb_files.sh ${storageAccount.outputs.storageName} ${storageAccount.outputs.storageContainer} ${managedIdentityModule.outputs.managedIdentityOutput.clientId}' +output createindex string = './infra/scripts/run_create_index_scripts.sh ${kvault.outputs.keyvaultName} ${managedIdentityModule.outputs.managedIdentityOutput.clientId}' // //========== Deployment script to upload sample data ========== // @@ -452,21 +454,21 @@ module cosmosDBModule 'deploy_cosmos_db.bicep' = { // dependsOn:[keyVault,uploadFiles] // } -//========== Deployment script to upload sample data ========== // -module uploadFiles 'deploy_post_deployment_scripts.bicep' = { - name : 'deploy_post_deployment_scripts' - params:{ - solutionName: solutionPrefix - solutionLocation: secondaryLocation - baseUrl: baseUrl - storageAccountName: storageAccount.outputs.storageName - containerName: storageAccount.outputs.storageContainer - managedIdentityObjectId:managedIdentityModule.outputs.managedIdentityOutput.id - managedIdentityClientId:managedIdentityModule.outputs.managedIdentityOutput.clientId - keyVaultName:aifoundry.outputs.keyvaultName - logAnalyticsWorkspaceResourceName: aifoundry.outputs.logAnalyticsWorkspaceResourceName - } -} +// //========== Deployment script to upload sample data ========== // +// module uploadFiles 'deploy_post_deployment_scripts.bicep' = { +// name : 'deploy_post_deployment_scripts' +// params:{ +// solutionName: solutionPrefix +// solutionLocation: secondaryLocation +// baseUrl: baseUrl +// storageAccountName: storageAccount.outputs.storageName +// containerName: storageAccount.outputs.storageContainer +// managedIdentityObjectId:managedIdentityModule.outputs.managedIdentityOutput.id +// managedIdentityClientId:managedIdentityModule.outputs.managedIdentityOutput.clientId +// keyVaultName:aifoundry.outputs.keyvaultName +// logAnalyticsWorkspaceResourceName: aifoundry.outputs.logAnalyticsWorkspaceResourceName +// } +// } // resource CosmosDB 'Microsoft.DocumentDB/databaseAccounts@2023-04-15' = { diff --git a/infra/main.json b/infra/main.json index 5e8eac8c..3c6b08dd 100644 --- a/infra/main.json +++ b/infra/main.json @@ -4,8 +4,8 @@ "metadata": { "_generator": { "name": "bicep", - "version": "0.34.44.8038", - "templateHash": "745056846550767942" + "version": "0.33.93.31351", + "templateHash": "2424049160506179064" } }, "parameters": { @@ -150,8 +150,8 @@ "metadata": { "_generator": { "name": "bicep", - "version": "0.34.44.8038", - "templateHash": "16854919160820907978" + "version": "0.33.93.31351", + "templateHash": "5719315788994459005" } }, "parameters": { @@ -243,8 +243,8 @@ "metadata": { "_generator": { "name": "bicep", - "version": "0.34.44.8038", - "templateHash": "998804936838864872" + "version": "0.33.93.31351", + "templateHash": "16444845925569233096" } }, "parameters": { @@ -383,8 +383,8 @@ "metadata": { "_generator": { "name": "bicep", - "version": "0.34.44.8038", - "templateHash": "15569997416548251984" + "version": "0.33.93.31351", + "templateHash": "627756007451802146" } }, "parameters": { @@ -981,8 +981,8 @@ "metadata": { "_generator": { "name": "bicep", - "version": "0.34.44.8038", - "templateHash": "4051638923493896337" + "version": "0.33.93.31351", + "templateHash": "12863612675732383951" } }, "parameters": { @@ -1210,8 +1210,8 @@ "metadata": { "_generator": { "name": "bicep", - "version": "0.34.44.8038", - "templateHash": "4077975888118623954" + "version": "0.33.93.31351", + "templateHash": "6455788440385967919" } }, "parameters": { @@ -1696,8 +1696,8 @@ "metadata": { "_generator": { "name": "bicep", - "version": "0.34.44.8038", - "templateHash": "6611317231290784098" + "version": "0.33.93.31351", + "templateHash": "16935405490916431865" } }, "parameters": { @@ -1874,198 +1874,20 @@ "dependsOn": [ "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_keyvault')]" ] - }, - { - "type": "Microsoft.Resources/deployments", - "apiVersion": "2022-09-01", - "name": "deploy_post_deployment_scripts", - "properties": { - "expressionEvaluationOptions": { - "scope": "inner" - }, - "mode": "Incremental", - "parameters": { - "solutionName": { - "value": "[variables('solutionPrefix')]" - }, - "solutionLocation": { - "value": "[parameters('secondaryLocation')]" - }, - "baseUrl": { - "value": "[variables('baseUrl')]" - }, - "storageAccountName": { - "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_storage_account'), '2022-09-01').outputs.storageName.value]" - }, - "containerName": { - "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_storage_account'), '2022-09-01').outputs.storageContainer.value]" - }, - "managedIdentityObjectId": { - "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_managed_identity'), '2022-09-01').outputs.managedIdentityOutput.value.id]" - }, - "managedIdentityClientId": { - "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_managed_identity'), '2022-09-01').outputs.managedIdentityOutput.value.clientId]" - }, - "keyVaultName": { - "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_ai_foundry'), '2022-09-01').outputs.keyvaultName.value]" - }, - "logAnalyticsWorkspaceResourceName": { - "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_ai_foundry'), '2022-09-01').outputs.logAnalyticsWorkspaceResourceName.value]" - } - }, - "template": { - "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", - "contentVersion": "1.0.0.0", - "metadata": { - "_generator": { - "name": "bicep", - "version": "0.34.44.8038", - "templateHash": "17578272684671627358" - } - }, - "parameters": { - "solutionName": { - "type": "string", - "metadata": { - "description": "Solution Name" - } - }, - "solutionLocation": { - "type": "string", - "metadata": { - "description": "Specifies the location for resources." - } - }, - "baseUrl": { - "type": "string" - }, - "managedIdentityObjectId": { - "type": "string" - }, - "managedIdentityClientId": { - "type": "string" - }, - "storageAccountName": { - "type": "string" - }, - "containerName": { - "type": "string" - }, - "containerAppName": { - "type": "string", - "defaultValue": "[format('ca-{0}', parameters('solutionName'))]" - }, - "environmentName": { - "type": "string", - "defaultValue": "[format('cae-{0}', parameters('solutionName'))]" - }, - "imageName": { - "type": "string", - "defaultValue": "python:3.11-alpine" - }, - "setupCopyKbFiles": { - "type": "string", - "defaultValue": "[format('{0}infra/scripts/copy_kb_files.sh', parameters('baseUrl'))]" - }, - "setupCreateIndexScriptsUrl": { - "type": "string", - "defaultValue": "[format('{0}infra/scripts/run_create_index_scripts.sh', parameters('baseUrl'))]" - }, - "keyVaultName": { - "type": "string" - }, - "logAnalyticsWorkspaceResourceName": { - "type": "string" - } - }, - "resources": [ - { - "type": "Microsoft.App/managedEnvironments", - "apiVersion": "2022-03-01", - "name": "[parameters('environmentName')]", - "location": "[parameters('solutionLocation')]", - "properties": { - "zoneRedundant": false, - "appLogsConfiguration": { - "destination": "log-analytics", - "logAnalyticsConfiguration": { - "customerId": "[reference(resourceId('Microsoft.OperationalInsights/workspaces', parameters('logAnalyticsWorkspaceResourceName')), '2020-10-01').customerId]", - "sharedKey": "[listKeys(resourceId('Microsoft.OperationalInsights/workspaces', parameters('logAnalyticsWorkspaceResourceName')), '2020-10-01').primarySharedKey]" - } - } - } - }, - { - "type": "Microsoft.App/containerApps", - "apiVersion": "2022-03-01", - "name": "[parameters('containerAppName')]", - "location": "[parameters('solutionLocation')]", - "identity": { - "type": "UserAssigned", - "userAssignedIdentities": { - "[format('{0}', parameters('managedIdentityObjectId'))]": {} - } - }, - "properties": { - "managedEnvironmentId": "[resourceId('Microsoft.App/managedEnvironments', parameters('environmentName'))]", - "configuration": { - "ingress": null, - "activeRevisionsMode": "Single" - }, - "template": { - "scale": { - "minReplicas": 1, - "maxReplicas": 1 - }, - "containers": [ - { - "name": "[parameters('containerAppName')]", - "image": "[parameters('imageName')]", - "resources": { - "cpu": 2, - "memory": "4.0Gi" - }, - "command": [ - "/bin/sh", - "-c", - "[format('mkdir -p /scripts && apk add --no-cache curl bash jq py3-pip gcc musl-dev libffi-dev openssl-dev python3-dev && pip install --upgrade azure-cli && apk add --no-cache --virtual .build-deps build-base unixodbc-dev && curl -s -o msodbcsql18_18.4.1.1-1_amd64.apk https://download.microsoft.com/download/7/6/d/76de322a-d860-4894-9945-f0cc5d6a45f8/msodbcsql18_18.4.1.1-1_amd64.apk && curl -s -o mssql-tools18_18.4.1.1-1_amd64.apk https://download.microsoft.com/download/7/6/d/76de322a-d860-4894-9945-f0cc5d6a45f8/mssql-tools18_18.4.1.1-1_amd64.apk && apk add --allow-untrusted msodbcsql18_18.4.1.1-1_amd64.apk && apk add --allow-untrusted mssql-tools18_18.4.1.1-1_amd64.apk && curl -s -o /scripts/copy_kb_files.sh {0} && chmod +x /scripts/copy_kb_files.sh && sh -x /scripts/copy_kb_files.sh {1} {2} {3} {4} && curl -s -o /scripts/run_create_index_scripts.sh {5} && chmod +x /scripts/run_create_index_scripts.sh && sh -x /scripts/run_create_index_scripts.sh {6} {7} {8} && apk add --no-cache ca-certificates less ncurses-terminfo-base krb5-libs libgcc libintl libssl3 libstdc++ tzdata userspace-rcu zlib icu-libs curl && apk -X https://dl-cdn.alpinelinux.org/alpine/edge/main add --no-cache lttng-ust openssh-client && echo \"Container app setup completed successfully.\"', parameters('setupCopyKbFiles'), parameters('storageAccountName'), parameters('containerName'), parameters('baseUrl'), parameters('managedIdentityClientId'), parameters('setupCreateIndexScriptsUrl'), parameters('baseUrl'), parameters('keyVaultName'), parameters('managedIdentityClientId'))]" - ], - "env": [ - { - "name": "STORAGE_ACCOUNT_NAME", - "value": "[parameters('storageAccountName')]" - }, - { - "name": "CONTAINER_NAME", - "value": "[parameters('containerName')]" - }, - { - "name": "APPSETTING_WEBSITE_SITE_NAME", - "value": "DUMMY" - } - ] - } - ] - } - }, - "dependsOn": [ - "[resourceId('Microsoft.App/managedEnvironments', parameters('environmentName'))]" - ] - } - ] - } - }, - "dependsOn": [ - "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_ai_foundry')]", - "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_managed_identity')]", - "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_storage_account')]" - ] } ], "outputs": { "WEB_APP_URL": { "type": "string", "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_app_service'), '2022-09-01').outputs.webAppUrl.value]" + }, + "copykbfiles": { + "type": "string", + "value": "[format('./infra/scripts/copy_kb_files.sh {0} {1} {2}', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_storage_account'), '2022-09-01').outputs.storageName.value, reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_storage_account'), '2022-09-01').outputs.storageContainer.value, reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_managed_identity'), '2022-09-01').outputs.managedIdentityOutput.value.clientId)]" + }, + "createindex": { + "type": "string", + "value": "[format('./infra/scripts/run_create_index_scripts.sh {0} {1}', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_keyvault'), '2022-09-01').outputs.keyvaultName.value, reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_managed_identity'), '2022-09-01').outputs.managedIdentityOutput.value.clientId)]" } } } \ No newline at end of file diff --git a/infra/scripts/copy_kb_files.sh b/infra/scripts/copy_kb_files.sh index ff3897ee..272ccc96 100644 --- a/infra/scripts/copy_kb_files.sh +++ b/infra/scripts/copy_kb_files.sh @@ -3,8 +3,8 @@ # Variables storageAccount="$1" fileSystem="$2" -baseUrl="$3" -managedIdentityClientId="$4" +# baseUrl="$3" +managedIdentityClientId="$3" zipFileName1="pdfdata.zip" extractedFolder1="pdf" @@ -15,15 +15,15 @@ zipUrl1=${baseUrl}"infra/data/pdfdata.zip" # zipUrl2=${baseUrl}"infra/data/audio_data.zip" # Create folders if they do not exist -mkdir -p "/mnt/azscripts/azscriptinput/$extractedFolder1" +# mkdir -p "/mnt/azscripts/azscriptinput/$extractedFolder1" # mkdir -p "/mnt/azscripts/azscriptinput/$extractedFolder2" # Download the zip file -curl --output /mnt/azscripts/azscriptinput/"$zipFileName1" "$zipUrl1" +# curl --output /mnt/azscripts/azscriptinput/"$zipFileName1" "$zipUrl1" # curl --output /mnt/azscripts/azscriptinput/"$zipFileName2" "$zipUrl2" # Extract the zip file -unzip /mnt/azscripts/azscriptinput/"$zipFileName1" -d /mnt/azscripts/azscriptinput/"$extractedFolder1" +unzip infra/data/"$zipFileName1" -d infra/data/"$extractedFolder1" # unzip /mnt/azscripts/azscriptinput/"$zipFileName2" -d /mnt/azscripts/azscriptinput/"$extractedFolder2" echo "Script Started" @@ -31,5 +31,5 @@ echo "Script Started" # Authenticate with Azure using managed identity az login --identity --client-id ${managedIdentityClientId} # Using az storage blob upload-batch to upload files with managed identity authentication, as the az storage fs directory upload command is not working with managed identity authentication. -az storage blob upload-batch --account-name "$storageAccount" --destination data/"$extractedFolder1" --source /mnt/azscripts/azscriptinput/"$extractedFolder1" --auth-mode login --pattern '*' --overwrite +az storage blob upload-batch --account-name "$storageAccount" --destination data/"$extractedFolder1" --source infra/data/"$extractedFolder1" --auth-mode login --pattern '*' --overwrite # az storage blob upload-batch --account-name "$storageAccount" --destination data/"$extractedFolder2" --source /mnt/azscripts/azscriptinput/"$extractedFolder2" --auth-mode login --pattern '*' --overwrite \ No newline at end of file diff --git a/infra/scripts/run_create_index_scripts.sh b/infra/scripts/run_create_index_scripts.sh index 9c598bcf..7c80f1ec 100644 --- a/infra/scripts/run_create_index_scripts.sh +++ b/infra/scripts/run_create_index_scripts.sh @@ -2,19 +2,28 @@ echo "started the script" # Variables -baseUrl="$1" -keyvaultName="$2" -managedIdentityClientId="$3" -requirementFile="requirements.txt" -requirementFileUrl=${baseUrl}"infra/scripts/index_scripts/requirements.txt" +# baseUrl="$1" +keyvaultName="$1" +managedIdentityClientId="$2" +# requirementFile="infra/scripts/index_scripts/requirements.txt" +# requirementFileUrl=${baseUrl}"infra/scripts/index_scripts/requirements.txt" echo "Script Started" -# Download the create_index and create table python files -curl --output "01_create_search_index.py" ${baseUrl}"infra/scripts/index_scripts/01_create_search_index.py" -curl --output "02_process_data.py" ${baseUrl}"infra/scripts/index_scripts/02_process_data.py" +echo "Getting signed in user id" +signed_user_id=$(az ad signed-in-user show --query id -o tsv) +# # Download the create_index and create table python files +# curl --output "01_create_search_index.py" ${baseUrl}"infra/scripts/index_scripts/01_create_search_index.py" +# curl --output "02_process_data.py" ${baseUrl}"infra/scripts/index_scripts/02_process_data.py" +# Define the scope for the Key Vault (replace with your Key Vault resource ID) +echo "Getting key vault resource id" +key_vault_resource_id=$(az keyvault show --name $keyvaultName --query id --output tsv) + +# Assign the Key Vault Administrator role to the user +echo "Assigning the Key Vault Administrator role to the user." +az role assignment create --assignee $signed_user_id --role "Key Vault Administrator" --scope $key_vault_resource_id # RUN apt-get update # RUN apt-get install python3 python3-dev g++ unixodbc-dev unixodbc libpq-dev @@ -24,17 +33,17 @@ curl --output "02_process_data.py" ${baseUrl}"infra/scripts/index_scripts/02_pro # pip install pyodbc # Download the requirement file -curl --output "$requirementFile" "$requirementFileUrl" +# curl --output "$requirementFile" "$requirementFileUrl" -echo "Download completed" +# echo "Download completed" #Replace key vault name -sed -i "s/kv_to-be-replaced/${keyvaultName}/g" "01_create_search_index.py" -sed -i "s/mici_to-be-replaced/${managedIdentityClientId}/g" "01_create_search_index.py" -sed -i "s/kv_to-be-replaced/${keyvaultName}/g" "02_process_data.py" -sed -i "s/mici_to-be-replaced/${managedIdentityClientId}/g" "02_process_data.py" +sed -i "s/kv_to-be-replaced/${keyvaultName}/g" "infra/scripts/index_scripts/01_create_search_index.py" +sed -i "s/mici_to-be-replaced/${managedIdentityClientId}/g" "infra/scripts/index_scripts/01_create_search_index.py" +sed -i "s/kv_to-be-replaced/${keyvaultName}/g" "infra/scripts/index_scripts/02_process_data.py" +sed -i "s/mici_to-be-replaced/${managedIdentityClientId}/g" "infra/scripts/index_scripts/02_process_data.py" -pip install -r requirements.txt +pip install -r infra/scripts/index_scripts/requirements.txt -python 01_create_search_index.py -python 02_process_data.py +python infra/scripts/index_scripts/01_create_search_index.py +python infra/scripts/index_scripts/02_process_data.py From 2f9bf7033e49b663c730acaa782f0337bdd920be Mon Sep 17 00:00:00 2001 From: Harsh-Microsoft Date: Thu, 3 Apr 2025 16:10:02 +0530 Subject: [PATCH 2/9] refactor: update scripts and add process_sample_data.sh --- .gitignore | 4 ++- azure.yaml | 20 ++++++++------ infra/main.bicep | 8 ++++-- infra/scripts/copy_kb_files.sh | 22 ++++++++++++--- infra/scripts/process_sample_data.sh | 33 +++++++++++++++++++++++ infra/scripts/run_create_index_scripts.sh | 28 ++++++++++++++++--- 6 files changed, 98 insertions(+), 17 deletions(-) create mode 100644 infra/scripts/process_sample_data.sh diff --git a/.gitignore b/.gitignore index ec2bcbd7..dd27693a 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,6 @@ __pycache__/ venv myenv -scriptsenv/ \ No newline at end of file +scriptsenv/ + +scriptenv \ No newline at end of file diff --git a/azure.yaml b/azure.yaml index bf0e143b..12911a45 100644 --- a/azure.yaml +++ b/azure.yaml @@ -31,10 +31,12 @@ hooks: run: | Write-Host "Web app URL: " Write-Host "$env:WEB_APP_URL" -ForegroundColor Cyan - Write-Host "Run the following command in the terminal to copy data files to storage account: " - Write-Host "$env:copykbfiles" -ForegroundColor Cyan - Write-Host "Run the following command in the terminal to run the scripts: " - Write-Host "$env:createindex" -ForegroundColor Cyan + # Write-Host "Run the following command in the terminal to copy data files to storage account: " + # Write-Host "$env:copykbfiles" -ForegroundColor Cyan + # Write-Host "Run the following command in the terminal to run the scripts: " + # Write-Host "$env:createindex" -ForegroundColor Cyan + Write-Host "If you want to use the Sample Data, run the following command in the Bash terminal to process it:" + Write-Host "./infra/scripts/process_sample_data.sh $env:STORAGE_ACCOUNT_NAME $env:STORAGE_CONTAINER_NAME $env:KEY_VAULT_NAME" -ForegroundColor Cyan shell: pwsh continueOnError: false interactive: true @@ -42,10 +44,12 @@ hooks: run: | echo "Web app URL: " echo $WEB_APP_URL - echo "Run the following command in the terminal to copy data files to storage account: " - echo $copykbfiles - echo "Run the following command in the terminal to run the scripts: " - echo $createindex + # echo "Run the following command in the terminal to copy data files to storage account: " + # echo $copykbfiles + # echo "Run the following command in the terminal to run the scripts: " + # echo $createindex + Write-Host "If you want to use the Sample Data, run the following command in the Bash terminal to process it:" + Write-Host "./infra/scripts/process_sample_data.sh $env:STORAGE_ACCOUNT_NAME $env:STORAGE_CONTAINER_NAME kv-$env:KEY_VAULT_NAME" -ForegroundColor Cyan shell: sh continueOnError: false interactive: true \ No newline at end of file diff --git a/infra/main.bicep b/infra/main.bicep index 34b38604..f2a5526d 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -424,8 +424,12 @@ module cosmosDBModule 'deploy_cosmos_db.bicep' = { } scope: resourceGroup(resourceGroup().name) } -output copykbfiles string = './infra/scripts/copy_kb_files.sh ${storageAccount.outputs.storageName} ${storageAccount.outputs.storageContainer} ${managedIdentityModule.outputs.managedIdentityOutput.clientId}' -output createindex string = './infra/scripts/run_create_index_scripts.sh ${kvault.outputs.keyvaultName} ${managedIdentityModule.outputs.managedIdentityOutput.clientId}' +// output copykbfiles string = './infra/scripts/copy_kb_files.sh ${storageAccount.outputs.storageName} ${storageAccount.outputs.storageContainer} ${managedIdentityModule.outputs.managedIdentityOutput.clientId}' +// output createindex string = './infra/scripts/run_create_index_scripts.sh ${kvault.outputs.keyvaultName} ${managedIdentityModule.outputs.managedIdentityOutput.clientId}' + +output STORAGE_ACCOUNT_NAME string = storageAccount.outputs.storageName +output STORAGE_CONTAINER_NAME string = storageAccount.outputs.storageContainer +output KEY_VAULT_NAME string = kvault.outputs.keyvaultName // //========== Deployment script to upload sample data ========== // diff --git a/infra/scripts/copy_kb_files.sh b/infra/scripts/copy_kb_files.sh index 272ccc96..32c7bfb6 100644 --- a/infra/scripts/copy_kb_files.sh +++ b/infra/scripts/copy_kb_files.sh @@ -28,8 +28,24 @@ unzip infra/data/"$zipFileName1" -d infra/data/"$extractedFolder1" echo "Script Started" -# Authenticate with Azure using managed identity -az login --identity --client-id ${managedIdentityClientId} +# Authenticate with Azure +if az account show &> /dev/null; then + echo "Already authenticated with Azure." +else + if [ -n "$managedIdentityClientId" ]; then + # Use managed identity if running in Azure + echo "Authenticating with Managed Identity..." + az login --identity --client-id ${managedIdentityClientId} + else + # Use Azure CLI login if running locally + echo "Authenticating with Azure CLI..." + az login + fi + echo "Not authenticated with Azure. Attempting to authenticate..." +fi + + # Using az storage blob upload-batch to upload files with managed identity authentication, as the az storage fs directory upload command is not working with managed identity authentication. -az storage blob upload-batch --account-name "$storageAccount" --destination data/"$extractedFolder1" --source infra/data/"$extractedFolder1" --auth-mode login --pattern '*' --overwrite +echo "Uploading files to Azure Storage..." +az storage blob upload-batch --account-name "$storageAccount" --destination "$fileSystem"/"$extractedFolder1" --source infra/data/"$extractedFolder1" --auth-mode login --pattern '*' --overwrite # az storage blob upload-batch --account-name "$storageAccount" --destination data/"$extractedFolder2" --source /mnt/azscripts/azscriptinput/"$extractedFolder2" --auth-mode login --pattern '*' --overwrite \ No newline at end of file diff --git a/infra/scripts/process_sample_data.sh b/infra/scripts/process_sample_data.sh new file mode 100644 index 00000000..38f0daa2 --- /dev/null +++ b/infra/scripts/process_sample_data.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# Variables +storageAccount="$1" +fileSystem="$2" +keyvaultName="$3" +managedIdentityClientId="$4" + +# Check if all required arguments are provided +if [ -z "$storageAccount" ] || [ -z "$fileSystem" ] || [ -z "$keyvaultName" ]; then + echo "Usage: $0 [managedIdentityClientId]" + exit 1 +fi + +# Call copy_kb_files.sh +echo "Running copy_kb_files.sh" +bash infra/scripts/copy_kb_files.sh "$storageAccount" "$fileSystem" "$managedIdentityClientId" +if [ $? -ne 0 ]; then + echo "Error: copy_kb_files.sh failed." + exit 1 +fi +echo "copy_kb_files.sh completed successfully." + +# Call run_create_index_scripts.sh +echo "Running run_create_index_scripts.sh" +bash infra/scripts/run_create_index_scripts.sh "$keyvaultName" "$managedIdentityClientId" +if [ $? -ne 0 ]; then + echo "Error: run_create_index_scripts.sh failed." + exit 1 +fi +echo "run_create_index_scripts.sh completed successfully." + +echo "All scripts executed successfully." \ No newline at end of file diff --git a/infra/scripts/run_create_index_scripts.sh b/infra/scripts/run_create_index_scripts.sh index 7c80f1ec..e5a6655b 100644 --- a/infra/scripts/run_create_index_scripts.sh +++ b/infra/scripts/run_create_index_scripts.sh @@ -23,7 +23,7 @@ key_vault_resource_id=$(az keyvault show --name $keyvaultName --query id --outpu # Assign the Key Vault Administrator role to the user echo "Assigning the Key Vault Administrator role to the user." -az role assignment create --assignee $signed_user_id --role "Key Vault Administrator" --scope $key_vault_resource_id +az role assignment create --assignee $signed_user_id --role "Key Vault Administrator" --scope /$key_vault_resource_id # RUN apt-get update # RUN apt-get install python3 python3-dev g++ unixodbc-dev unixodbc libpq-dev @@ -39,11 +39,33 @@ az role assignment create --assignee $signed_user_id --role "Key Vault Administr #Replace key vault name sed -i "s/kv_to-be-replaced/${keyvaultName}/g" "infra/scripts/index_scripts/01_create_search_index.py" -sed -i "s/mici_to-be-replaced/${managedIdentityClientId}/g" "infra/scripts/index_scripts/01_create_search_index.py" sed -i "s/kv_to-be-replaced/${keyvaultName}/g" "infra/scripts/index_scripts/02_process_data.py" -sed -i "s/mici_to-be-replaced/${managedIdentityClientId}/g" "infra/scripts/index_scripts/02_process_data.py" +if [ -n "$managedIdentityClientId" ]; then + sed -i "s/mici_to-be-replaced/${managedIdentityClientId}/g" "infra/scripts/index_scripts/01_create_search_index.py" + sed -i "s/mici_to-be-replaced/${managedIdentityClientId}/g" "infra/scripts/index_scripts/02_process_data.py" +fi + +# create virtual environment +echo "Creating virtual environment" +# Check if the virtual environment already exists +if [ -d "infra/scripts/scriptenv" ]; then + echo "Virtual environment already exists. Skipping creation." +else + echo "Creating virtual environment" + python3 -m venv infra/scripts/scriptenv +fi +source infra/scripts/scriptenv/Scripts/activate + +# Install the requirements +echo "Installing requirements" pip install -r infra/scripts/index_scripts/requirements.txt +echo "Requirements installed" +# Run the scripts +echo "Running the scripts" +echo "Creating the search index" python infra/scripts/index_scripts/01_create_search_index.py +echo "Processing the data" python infra/scripts/index_scripts/02_process_data.py +echo "Scripts completed" From ad5fed243d4418a29b2fca7932481988b5330b73 Mon Sep 17 00:00:00 2001 From: Harsh-Microsoft Date: Thu, 3 Apr 2025 18:48:57 +0530 Subject: [PATCH 3/9] minor fix --- azure.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/azure.yaml b/azure.yaml index 12911a45..84cc5530 100644 --- a/azure.yaml +++ b/azure.yaml @@ -35,7 +35,7 @@ hooks: # Write-Host "$env:copykbfiles" -ForegroundColor Cyan # Write-Host "Run the following command in the terminal to run the scripts: " # Write-Host "$env:createindex" -ForegroundColor Cyan - Write-Host "If you want to use the Sample Data, run the following command in the Bash terminal to process it:" + Write-Host "`nIf you want to use the Sample Data, run the following command in the Bash terminal to process it:" Write-Host "./infra/scripts/process_sample_data.sh $env:STORAGE_ACCOUNT_NAME $env:STORAGE_CONTAINER_NAME $env:KEY_VAULT_NAME" -ForegroundColor Cyan shell: pwsh continueOnError: false @@ -48,8 +48,9 @@ hooks: # echo $copykbfiles # echo "Run the following command in the terminal to run the scripts: " # echo $createindex - Write-Host "If you want to use the Sample Data, run the following command in the Bash terminal to process it:" - Write-Host "./infra/scripts/process_sample_data.sh $env:STORAGE_ACCOUNT_NAME $env:STORAGE_CONTAINER_NAME kv-$env:KEY_VAULT_NAME" -ForegroundColor Cyan + echo "" + echo "If you want to use the Sample Data, run the following command in the terminal to process it:" + echo "./infra/scripts/process_sample_data.sh $env:STORAGE_ACCOUNT_NAME $env:STORAGE_CONTAINER_NAME $env:KEY_VAULT_NAME" -ForegroundColor Cyan shell: sh continueOnError: false interactive: true \ No newline at end of file From 3828e03979f8d205124d3bf678a9b49aa9dce2e5 Mon Sep 17 00:00:00 2001 From: Harsh-Microsoft Date: Thu, 3 Apr 2025 19:01:44 +0530 Subject: [PATCH 4/9] minor error handling fix for python scripts in index creation scripts --- infra/scripts/run_create_index_scripts.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/infra/scripts/run_create_index_scripts.sh b/infra/scripts/run_create_index_scripts.sh index e5a6655b..b33e75f3 100644 --- a/infra/scripts/run_create_index_scripts.sh +++ b/infra/scripts/run_create_index_scripts.sh @@ -63,9 +63,18 @@ pip install -r infra/scripts/index_scripts/requirements.txt echo "Requirements installed" # Run the scripts -echo "Running the scripts" +echo "Running the pyhton scripts" echo "Creating the search index" python infra/scripts/index_scripts/01_create_search_index.py +if [ $? -ne 0 ]; then + echo "Error: 01_create_search_index.py failed." + exit 1 +fi + echo "Processing the data" python infra/scripts/index_scripts/02_process_data.py +if [ $? -ne 0 ]; then + echo "Error: 02_process_data.py failed." + exit 1 +fi echo "Scripts completed" From 6e613f4b20398e4d9c18928e86630476f1106b01 Mon Sep 17 00:00:00 2001 From: Harsh-Microsoft Date: Thu, 3 Apr 2025 14:02:01 +0000 Subject: [PATCH 5/9] correct echo message in index creation script --- azure.yaml | 4 ++-- infra/scripts/run_create_index_scripts.sh | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/azure.yaml b/azure.yaml index 84cc5530..dd4f1208 100644 --- a/azure.yaml +++ b/azure.yaml @@ -36,7 +36,7 @@ hooks: # Write-Host "Run the following command in the terminal to run the scripts: " # Write-Host "$env:createindex" -ForegroundColor Cyan Write-Host "`nIf you want to use the Sample Data, run the following command in the Bash terminal to process it:" - Write-Host "./infra/scripts/process_sample_data.sh $env:STORAGE_ACCOUNT_NAME $env:STORAGE_CONTAINER_NAME $env:KEY_VAULT_NAME" -ForegroundColor Cyan + Write-Host "bash ./infra/scripts/process_sample_data.sh $env:STORAGE_ACCOUNT_NAME $env:STORAGE_CONTAINER_NAME $env:KEY_VAULT_NAME" -ForegroundColor Cyan shell: pwsh continueOnError: false interactive: true @@ -50,7 +50,7 @@ hooks: # echo $createindex echo "" echo "If you want to use the Sample Data, run the following command in the terminal to process it:" - echo "./infra/scripts/process_sample_data.sh $env:STORAGE_ACCOUNT_NAME $env:STORAGE_CONTAINER_NAME $env:KEY_VAULT_NAME" -ForegroundColor Cyan + echo "bash ./infra/scripts/process_sample_data.sh $STORAGE_ACCOUNT_NAME $STORAGE_CONTAINER_NAME $KEY_VAULT_NAME" shell: sh continueOnError: false interactive: true \ No newline at end of file diff --git a/infra/scripts/run_create_index_scripts.sh b/infra/scripts/run_create_index_scripts.sh index b33e75f3..3940153b 100644 --- a/infra/scripts/run_create_index_scripts.sh +++ b/infra/scripts/run_create_index_scripts.sh @@ -47,7 +47,6 @@ fi # create virtual environment -echo "Creating virtual environment" # Check if the virtual environment already exists if [ -d "infra/scripts/scriptenv" ]; then echo "Virtual environment already exists. Skipping creation." @@ -55,7 +54,7 @@ else echo "Creating virtual environment" python3 -m venv infra/scripts/scriptenv fi -source infra/scripts/scriptenv/Scripts/activate +source infra/scripts/scriptenv/bin/activate # Install the requirements echo "Installing requirements" @@ -63,7 +62,7 @@ pip install -r infra/scripts/index_scripts/requirements.txt echo "Requirements installed" # Run the scripts -echo "Running the pyhton scripts" +echo "Running the python scripts" echo "Creating the search index" python infra/scripts/index_scripts/01_create_search_index.py if [ $? -ne 0 ]; then From 5347792de53f3ebe3ce95b51a48b61b504733f6e Mon Sep 17 00:00:00 2001 From: Harsh-Microsoft Date: Thu, 3 Apr 2025 19:56:40 +0530 Subject: [PATCH 6/9] feat: add role assignment for signed-in user in copy_kb_files.sh --- infra/scripts/copy_kb_files.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/infra/scripts/copy_kb_files.sh b/infra/scripts/copy_kb_files.sh index 32c7bfb6..dd78c87a 100644 --- a/infra/scripts/copy_kb_files.sh +++ b/infra/scripts/copy_kb_files.sh @@ -44,6 +44,14 @@ else echo "Not authenticated with Azure. Attempting to authenticate..." fi +echo "Getting signed in user id" +signed_user_id=$(az ad signed-in-user show --query id -o tsv) + +echo "Getting storage account resource id" +storage_account_resource_id=$(az storage account show --name $storageAccount --query id --output tsv) + +# add Storage Blob Data Contributor role to the user +az role assignment create --assignee $signed_user_id --role "Storage Blob Data Contributor" --scope /$storage_account_resource_id # Using az storage blob upload-batch to upload files with managed identity authentication, as the az storage fs directory upload command is not working with managed identity authentication. echo "Uploading files to Azure Storage..." From 97ca74e9efe749834f1505d7a70e9157cc612f40 Mon Sep 17 00:00:00 2001 From: Malory Rose Date: Thu, 3 Apr 2025 11:08:25 -0700 Subject: [PATCH 7/9] update reame --- README.md | 7 ++++++- azure.yaml | 4 ---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index c53fd90e..b1cbf130 100644 --- a/README.md +++ b/README.md @@ -192,7 +192,12 @@ To change the azd parameters from the default values, follow the steps [here](./ * This deployment will take *7-10 minutes* to provision the resources in your account and set up the solution with sample data. * If you get an error or timeout with deployment, changing the location can help, as there may be availability constraints for the resources. -5. Once the deployment has completed successfully, open the [Azure Portal](https://portal.azure.com/), go to the deployed resource group, find the App Service and get the app URL from `Default domain`. +5. Once the deployment has completed successfully and you would like to use the sample data, run the bash command printed in the terminal. The bash command will look like the following: + ```shell + ./infra/scripts/process_sample_data.sh + ``` + +6. Open the [Azure Portal](https://portal.azure.com/), go to the deployed resource group, find the App Service and get the app URL from `Default domain`. 6. You can now delete the resources by running `azd down`, if you are done trying out the application. diff --git a/azure.yaml b/azure.yaml index dd4f1208..ec742a89 100644 --- a/azure.yaml +++ b/azure.yaml @@ -31,10 +31,6 @@ hooks: run: | Write-Host "Web app URL: " Write-Host "$env:WEB_APP_URL" -ForegroundColor Cyan - # Write-Host "Run the following command in the terminal to copy data files to storage account: " - # Write-Host "$env:copykbfiles" -ForegroundColor Cyan - # Write-Host "Run the following command in the terminal to run the scripts: " - # Write-Host "$env:createindex" -ForegroundColor Cyan Write-Host "`nIf you want to use the Sample Data, run the following command in the Bash terminal to process it:" Write-Host "bash ./infra/scripts/process_sample_data.sh $env:STORAGE_ACCOUNT_NAME $env:STORAGE_CONTAINER_NAME $env:KEY_VAULT_NAME" -ForegroundColor Cyan shell: pwsh From d4749b0dd0a26285955de4f776a0de0f9bcd70c8 Mon Sep 17 00:00:00 2001 From: Harsh-Microsoft Date: Fri, 4 Apr 2025 12:10:41 +0000 Subject: [PATCH 8/9] add role checks for Storage Blob Data Contributor and Key Vault Administrator in scripts and eliminated unnecessary output --- azure.yaml | 4 ---- infra/scripts/copy_kb_files.sh | 21 +++++++++++++++++---- infra/scripts/run_create_index_scripts.sh | 20 ++++++++++++++++---- 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/azure.yaml b/azure.yaml index ec742a89..d8d0047b 100644 --- a/azure.yaml +++ b/azure.yaml @@ -40,10 +40,6 @@ hooks: run: | echo "Web app URL: " echo $WEB_APP_URL - # echo "Run the following command in the terminal to copy data files to storage account: " - # echo $copykbfiles - # echo "Run the following command in the terminal to run the scripts: " - # echo $createindex echo "" echo "If you want to use the Sample Data, run the following command in the terminal to process it:" echo "bash ./infra/scripts/process_sample_data.sh $STORAGE_ACCOUNT_NAME $STORAGE_CONTAINER_NAME $KEY_VAULT_NAME" diff --git a/infra/scripts/copy_kb_files.sh b/infra/scripts/copy_kb_files.sh index dd78c87a..43ad34cf 100644 --- a/infra/scripts/copy_kb_files.sh +++ b/infra/scripts/copy_kb_files.sh @@ -50,10 +50,23 @@ signed_user_id=$(az ad signed-in-user show --query id -o tsv) echo "Getting storage account resource id" storage_account_resource_id=$(az storage account show --name $storageAccount --query id --output tsv) -# add Storage Blob Data Contributor role to the user -az role assignment create --assignee $signed_user_id --role "Storage Blob Data Contributor" --scope /$storage_account_resource_id +#check if user has the Storage Blob Data Contributor role, add it if not +echo "Checking if user has the Storage Blob Data Contributor role" +role_assignment=$(MSYS_NO_PATHCONV=1 az role assignment list --assignee $signed_user_id --role "Storage Blob Data Contributor" --scope $storage_account_resource_id --query "[].roleDefinitionId" -o tsv) +if [ -z "$role_assignment" ]; then + echo "User does not have the Storage Blob Data Contributor role. Assigning the role." + MSYS_NO_PATHCONV=1 az role assignment create --assignee $signed_user_id --role "Storage Blob Data Contributor" --scope $storage_account_resource_id --output none + if [ $? -eq 0 ]; then + echo "Role assignment completed successfully." + else + echo "Error: Role assignment failed." + exit 1 + fi +else + echo "User already has the Storage Blob Data Contributor role." +fi # Using az storage blob upload-batch to upload files with managed identity authentication, as the az storage fs directory upload command is not working with managed identity authentication. -echo "Uploading files to Azure Storage..." -az storage blob upload-batch --account-name "$storageAccount" --destination "$fileSystem"/"$extractedFolder1" --source infra/data/"$extractedFolder1" --auth-mode login --pattern '*' --overwrite +echo "Uploading files to Azure Storage" +az storage blob upload-batch --account-name "$storageAccount" --destination "$fileSystem"/"$extractedFolder1" --source infra/data/"$extractedFolder1" --auth-mode login --pattern '*' --overwrite --output none # az storage blob upload-batch --account-name "$storageAccount" --destination data/"$extractedFolder2" --source /mnt/azscripts/azscriptinput/"$extractedFolder2" --auth-mode login --pattern '*' --overwrite \ No newline at end of file diff --git a/infra/scripts/run_create_index_scripts.sh b/infra/scripts/run_create_index_scripts.sh index 3940153b..66ef7e7f 100644 --- a/infra/scripts/run_create_index_scripts.sh +++ b/infra/scripts/run_create_index_scripts.sh @@ -21,9 +21,21 @@ signed_user_id=$(az ad signed-in-user show --query id -o tsv) echo "Getting key vault resource id" key_vault_resource_id=$(az keyvault show --name $keyvaultName --query id --output tsv) -# Assign the Key Vault Administrator role to the user -echo "Assigning the Key Vault Administrator role to the user." -az role assignment create --assignee $signed_user_id --role "Key Vault Administrator" --scope /$key_vault_resource_id +# Check if the user has the Key Vault Administrator role +echo "Checking if user has the Key Vault Administrator role" +role_assignment=$(MSYS_NO_PATHCONV=1 az role assignment list --assignee $signed_user_id --role "Key Vault Administrator" --scope $key_vault_resource_id --query "[].roleDefinitionId" -o tsv) +if [ -z "$role_assignment" ]; then + echo "User does not have the Key Vault Administrator role. Assigning the role." + MSYS_NO_PATHCONV=1 az role assignment create --assignee $signed_user_id --role "Key Vault Administrator" --scope $key_vault_resource_id --output none + if [ $? -eq 0 ]; then + echo "Key Vault Administrator role assigned successfully." + else + echo "Failed to assign Key Vault Administrator role." + exit 1 + fi +else + echo "User already has the Key Vault Administrator role." +fi # RUN apt-get update # RUN apt-get install python3 python3-dev g++ unixodbc-dev unixodbc libpq-dev @@ -58,7 +70,7 @@ source infra/scripts/scriptenv/bin/activate # Install the requirements echo "Installing requirements" -pip install -r infra/scripts/index_scripts/requirements.txt +pip install --quiet -r infra/scripts/index_scripts/requirements.txt echo "Requirements installed" # Run the scripts From 98686205fb9ce85ed7846e118771970b0b04d8d2 Mon Sep 17 00:00:00 2001 From: Harsh-Microsoft Date: Fri, 4 Apr 2025 19:17:19 +0530 Subject: [PATCH 9/9] handled virtual environment activation for different OS --- infra/scripts/run_create_index_scripts.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/infra/scripts/run_create_index_scripts.sh b/infra/scripts/run_create_index_scripts.sh index 66ef7e7f..cee79f40 100644 --- a/infra/scripts/run_create_index_scripts.sh +++ b/infra/scripts/run_create_index_scripts.sh @@ -66,7 +66,12 @@ else echo "Creating virtual environment" python3 -m venv infra/scripts/scriptenv fi -source infra/scripts/scriptenv/bin/activate + +# handling virtual environment activation for different OS +activate_env_output=$(source infra/scripts/scriptenv/bin/activate 2>&1) +if [ -n "$activate_env_output" ]; then + source infra/scripts/scriptenv/Scripts/activate +fi # Install the requirements echo "Installing requirements"