diff --git a/.gitignore b/.gitignore index ec2bcbd7..dd27693a 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,6 @@ __pycache__/ venv myenv -scriptsenv/ \ No newline at end of file +scriptsenv/ + +scriptenv \ No newline at end of file diff --git a/README.md b/README.md index c53fd90e..b1cbf130 100644 --- a/README.md +++ b/README.md @@ -192,7 +192,12 @@ To change the azd parameters from the default values, follow the steps [here](./ * This deployment will take *7-10 minutes* to provision the resources in your account and set up the solution with sample data. * If you get an error or timeout with deployment, changing the location can help, as there may be availability constraints for the resources. -5. Once the deployment has completed successfully, open the [Azure Portal](https://portal.azure.com/), go to the deployed resource group, find the App Service and get the app URL from `Default domain`. +5. Once the deployment has completed successfully and you would like to use the sample data, run the bash command printed in the terminal. The bash command will look like the following: + ```shell + ./infra/scripts/process_sample_data.sh + ``` + +6. Open the [Azure Portal](https://portal.azure.com/), go to the deployed resource group, find the App Service and get the app URL from `Default domain`. 6. You can now delete the resources by running `azd down`, if you are done trying out the application. diff --git a/azure.yaml b/azure.yaml index 705bc041..d8d0047b 100644 --- a/azure.yaml +++ b/azure.yaml @@ -31,6 +31,8 @@ hooks: run: | Write-Host "Web app URL: " Write-Host "$env:WEB_APP_URL" -ForegroundColor Cyan + Write-Host "`nIf you want to use the Sample Data, run the following command in the Bash terminal to process it:" + Write-Host "bash ./infra/scripts/process_sample_data.sh $env:STORAGE_ACCOUNT_NAME $env:STORAGE_CONTAINER_NAME $env:KEY_VAULT_NAME" -ForegroundColor Cyan shell: pwsh continueOnError: false interactive: true @@ -38,6 +40,9 @@ hooks: run: | echo "Web app URL: " echo $WEB_APP_URL + echo "" + echo "If you want to use the Sample Data, run the following command in the terminal to process it:" + echo "bash ./infra/scripts/process_sample_data.sh $STORAGE_ACCOUNT_NAME $STORAGE_CONTAINER_NAME $KEY_VAULT_NAME" shell: sh continueOnError: false interactive: true \ No newline at end of file diff --git a/infra/main.bicep b/infra/main.bicep index aa34e4a5..f2a5526d 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -424,6 +424,12 @@ module cosmosDBModule 'deploy_cosmos_db.bicep' = { } scope: resourceGroup(resourceGroup().name) } +// output copykbfiles string = './infra/scripts/copy_kb_files.sh ${storageAccount.outputs.storageName} ${storageAccount.outputs.storageContainer} ${managedIdentityModule.outputs.managedIdentityOutput.clientId}' +// output createindex string = './infra/scripts/run_create_index_scripts.sh ${kvault.outputs.keyvaultName} ${managedIdentityModule.outputs.managedIdentityOutput.clientId}' + +output STORAGE_ACCOUNT_NAME string = storageAccount.outputs.storageName +output STORAGE_CONTAINER_NAME string = storageAccount.outputs.storageContainer +output KEY_VAULT_NAME string = kvault.outputs.keyvaultName // //========== Deployment script to upload sample data ========== // @@ -452,21 +458,21 @@ module cosmosDBModule 'deploy_cosmos_db.bicep' = { // dependsOn:[keyVault,uploadFiles] // } -//========== Deployment script to upload sample data ========== // -module uploadFiles 'deploy_post_deployment_scripts.bicep' = { - name : 'deploy_post_deployment_scripts' - params:{ - solutionName: solutionPrefix - solutionLocation: secondaryLocation - baseUrl: baseUrl - storageAccountName: storageAccount.outputs.storageName - containerName: storageAccount.outputs.storageContainer - managedIdentityObjectId:managedIdentityModule.outputs.managedIdentityOutput.id - managedIdentityClientId:managedIdentityModule.outputs.managedIdentityOutput.clientId - keyVaultName:aifoundry.outputs.keyvaultName - logAnalyticsWorkspaceResourceName: aifoundry.outputs.logAnalyticsWorkspaceResourceName - } -} +// //========== Deployment script to upload sample data ========== // +// module uploadFiles 'deploy_post_deployment_scripts.bicep' = { +// name : 'deploy_post_deployment_scripts' +// params:{ +// solutionName: solutionPrefix +// solutionLocation: secondaryLocation +// baseUrl: baseUrl +// storageAccountName: storageAccount.outputs.storageName +// containerName: storageAccount.outputs.storageContainer +// managedIdentityObjectId:managedIdentityModule.outputs.managedIdentityOutput.id +// managedIdentityClientId:managedIdentityModule.outputs.managedIdentityOutput.clientId +// keyVaultName:aifoundry.outputs.keyvaultName +// logAnalyticsWorkspaceResourceName: aifoundry.outputs.logAnalyticsWorkspaceResourceName +// } +// } // resource CosmosDB 'Microsoft.DocumentDB/databaseAccounts@2023-04-15' = { diff --git a/infra/main.json b/infra/main.json index 5e8eac8c..3c6b08dd 100644 --- a/infra/main.json +++ b/infra/main.json @@ -4,8 +4,8 @@ "metadata": { "_generator": { "name": "bicep", - "version": "0.34.44.8038", - "templateHash": "745056846550767942" + "version": "0.33.93.31351", + "templateHash": "2424049160506179064" } }, "parameters": { @@ -150,8 +150,8 @@ "metadata": { "_generator": { "name": "bicep", - "version": "0.34.44.8038", - "templateHash": "16854919160820907978" + "version": "0.33.93.31351", + "templateHash": "5719315788994459005" } }, "parameters": { @@ -243,8 +243,8 @@ "metadata": { "_generator": { "name": "bicep", - "version": "0.34.44.8038", - "templateHash": "998804936838864872" + "version": "0.33.93.31351", + "templateHash": "16444845925569233096" } }, "parameters": { @@ -383,8 +383,8 @@ "metadata": { "_generator": { "name": "bicep", - "version": "0.34.44.8038", - "templateHash": "15569997416548251984" + "version": "0.33.93.31351", + "templateHash": "627756007451802146" } }, "parameters": { @@ -981,8 +981,8 @@ "metadata": { "_generator": { "name": "bicep", - "version": "0.34.44.8038", - "templateHash": "4051638923493896337" + "version": "0.33.93.31351", + "templateHash": "12863612675732383951" } }, "parameters": { @@ -1210,8 +1210,8 @@ "metadata": { "_generator": { "name": "bicep", - "version": "0.34.44.8038", - "templateHash": "4077975888118623954" + "version": "0.33.93.31351", + "templateHash": "6455788440385967919" } }, "parameters": { @@ -1696,8 +1696,8 @@ "metadata": { "_generator": { "name": "bicep", - "version": "0.34.44.8038", - "templateHash": "6611317231290784098" + "version": "0.33.93.31351", + "templateHash": "16935405490916431865" } }, "parameters": { @@ -1874,198 +1874,20 @@ "dependsOn": [ "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_keyvault')]" ] - }, - { - "type": "Microsoft.Resources/deployments", - "apiVersion": "2022-09-01", - "name": "deploy_post_deployment_scripts", - "properties": { - "expressionEvaluationOptions": { - "scope": "inner" - }, - "mode": "Incremental", - "parameters": { - "solutionName": { - "value": "[variables('solutionPrefix')]" - }, - "solutionLocation": { - "value": "[parameters('secondaryLocation')]" - }, - "baseUrl": { - "value": "[variables('baseUrl')]" - }, - "storageAccountName": { - "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_storage_account'), '2022-09-01').outputs.storageName.value]" - }, - "containerName": { - "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_storage_account'), '2022-09-01').outputs.storageContainer.value]" - }, - "managedIdentityObjectId": { - "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_managed_identity'), '2022-09-01').outputs.managedIdentityOutput.value.id]" - }, - "managedIdentityClientId": { - "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_managed_identity'), '2022-09-01').outputs.managedIdentityOutput.value.clientId]" - }, - "keyVaultName": { - "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_ai_foundry'), '2022-09-01').outputs.keyvaultName.value]" - }, - "logAnalyticsWorkspaceResourceName": { - "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_ai_foundry'), '2022-09-01').outputs.logAnalyticsWorkspaceResourceName.value]" - } - }, - "template": { - "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", - "contentVersion": "1.0.0.0", - "metadata": { - "_generator": { - "name": "bicep", - "version": "0.34.44.8038", - "templateHash": "17578272684671627358" - } - }, - "parameters": { - "solutionName": { - "type": "string", - "metadata": { - "description": "Solution Name" - } - }, - "solutionLocation": { - "type": "string", - "metadata": { - "description": "Specifies the location for resources." - } - }, - "baseUrl": { - "type": "string" - }, - "managedIdentityObjectId": { - "type": "string" - }, - "managedIdentityClientId": { - "type": "string" - }, - "storageAccountName": { - "type": "string" - }, - "containerName": { - "type": "string" - }, - "containerAppName": { - "type": "string", - "defaultValue": "[format('ca-{0}', parameters('solutionName'))]" - }, - "environmentName": { - "type": "string", - "defaultValue": "[format('cae-{0}', parameters('solutionName'))]" - }, - "imageName": { - "type": "string", - "defaultValue": "python:3.11-alpine" - }, - "setupCopyKbFiles": { - "type": "string", - "defaultValue": "[format('{0}infra/scripts/copy_kb_files.sh', parameters('baseUrl'))]" - }, - "setupCreateIndexScriptsUrl": { - "type": "string", - "defaultValue": "[format('{0}infra/scripts/run_create_index_scripts.sh', parameters('baseUrl'))]" - }, - "keyVaultName": { - "type": "string" - }, - "logAnalyticsWorkspaceResourceName": { - "type": "string" - } - }, - "resources": [ - { - "type": "Microsoft.App/managedEnvironments", - "apiVersion": "2022-03-01", - "name": "[parameters('environmentName')]", - "location": "[parameters('solutionLocation')]", - "properties": { - "zoneRedundant": false, - "appLogsConfiguration": { - "destination": "log-analytics", - "logAnalyticsConfiguration": { - "customerId": "[reference(resourceId('Microsoft.OperationalInsights/workspaces', parameters('logAnalyticsWorkspaceResourceName')), '2020-10-01').customerId]", - "sharedKey": "[listKeys(resourceId('Microsoft.OperationalInsights/workspaces', parameters('logAnalyticsWorkspaceResourceName')), '2020-10-01').primarySharedKey]" - } - } - } - }, - { - "type": "Microsoft.App/containerApps", - "apiVersion": "2022-03-01", - "name": "[parameters('containerAppName')]", - "location": "[parameters('solutionLocation')]", - "identity": { - "type": "UserAssigned", - "userAssignedIdentities": { - "[format('{0}', parameters('managedIdentityObjectId'))]": {} - } - }, - "properties": { - "managedEnvironmentId": "[resourceId('Microsoft.App/managedEnvironments', parameters('environmentName'))]", - "configuration": { - "ingress": null, - "activeRevisionsMode": "Single" - }, - "template": { - "scale": { - "minReplicas": 1, - "maxReplicas": 1 - }, - "containers": [ - { - "name": "[parameters('containerAppName')]", - "image": "[parameters('imageName')]", - "resources": { - "cpu": 2, - "memory": "4.0Gi" - }, - "command": [ - "/bin/sh", - "-c", - "[format('mkdir -p /scripts && apk add --no-cache curl bash jq py3-pip gcc musl-dev libffi-dev openssl-dev python3-dev && pip install --upgrade azure-cli && apk add --no-cache --virtual .build-deps build-base unixodbc-dev && curl -s -o msodbcsql18_18.4.1.1-1_amd64.apk https://download.microsoft.com/download/7/6/d/76de322a-d860-4894-9945-f0cc5d6a45f8/msodbcsql18_18.4.1.1-1_amd64.apk && curl -s -o mssql-tools18_18.4.1.1-1_amd64.apk https://download.microsoft.com/download/7/6/d/76de322a-d860-4894-9945-f0cc5d6a45f8/mssql-tools18_18.4.1.1-1_amd64.apk && apk add --allow-untrusted msodbcsql18_18.4.1.1-1_amd64.apk && apk add --allow-untrusted mssql-tools18_18.4.1.1-1_amd64.apk && curl -s -o /scripts/copy_kb_files.sh {0} && chmod +x /scripts/copy_kb_files.sh && sh -x /scripts/copy_kb_files.sh {1} {2} {3} {4} && curl -s -o /scripts/run_create_index_scripts.sh {5} && chmod +x /scripts/run_create_index_scripts.sh && sh -x /scripts/run_create_index_scripts.sh {6} {7} {8} && apk add --no-cache ca-certificates less ncurses-terminfo-base krb5-libs libgcc libintl libssl3 libstdc++ tzdata userspace-rcu zlib icu-libs curl && apk -X https://dl-cdn.alpinelinux.org/alpine/edge/main add --no-cache lttng-ust openssh-client && echo \"Container app setup completed successfully.\"', parameters('setupCopyKbFiles'), parameters('storageAccountName'), parameters('containerName'), parameters('baseUrl'), parameters('managedIdentityClientId'), parameters('setupCreateIndexScriptsUrl'), parameters('baseUrl'), parameters('keyVaultName'), parameters('managedIdentityClientId'))]" - ], - "env": [ - { - "name": "STORAGE_ACCOUNT_NAME", - "value": "[parameters('storageAccountName')]" - }, - { - "name": "CONTAINER_NAME", - "value": "[parameters('containerName')]" - }, - { - "name": "APPSETTING_WEBSITE_SITE_NAME", - "value": "DUMMY" - } - ] - } - ] - } - }, - "dependsOn": [ - "[resourceId('Microsoft.App/managedEnvironments', parameters('environmentName'))]" - ] - } - ] - } - }, - "dependsOn": [ - "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_ai_foundry')]", - "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_managed_identity')]", - "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_storage_account')]" - ] } ], "outputs": { "WEB_APP_URL": { "type": "string", "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_app_service'), '2022-09-01').outputs.webAppUrl.value]" + }, + "copykbfiles": { + "type": "string", + "value": "[format('./infra/scripts/copy_kb_files.sh {0} {1} {2}', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_storage_account'), '2022-09-01').outputs.storageName.value, reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_storage_account'), '2022-09-01').outputs.storageContainer.value, reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_managed_identity'), '2022-09-01').outputs.managedIdentityOutput.value.clientId)]" + }, + "createindex": { + "type": "string", + "value": "[format('./infra/scripts/run_create_index_scripts.sh {0} {1}', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_keyvault'), '2022-09-01').outputs.keyvaultName.value, reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_managed_identity'), '2022-09-01').outputs.managedIdentityOutput.value.clientId)]" } } } \ No newline at end of file diff --git a/infra/scripts/copy_kb_files.sh b/infra/scripts/copy_kb_files.sh index ff3897ee..43ad34cf 100644 --- a/infra/scripts/copy_kb_files.sh +++ b/infra/scripts/copy_kb_files.sh @@ -3,8 +3,8 @@ # Variables storageAccount="$1" fileSystem="$2" -baseUrl="$3" -managedIdentityClientId="$4" +# baseUrl="$3" +managedIdentityClientId="$3" zipFileName1="pdfdata.zip" extractedFolder1="pdf" @@ -15,21 +15,58 @@ zipUrl1=${baseUrl}"infra/data/pdfdata.zip" # zipUrl2=${baseUrl}"infra/data/audio_data.zip" # Create folders if they do not exist -mkdir -p "/mnt/azscripts/azscriptinput/$extractedFolder1" +# mkdir -p "/mnt/azscripts/azscriptinput/$extractedFolder1" # mkdir -p "/mnt/azscripts/azscriptinput/$extractedFolder2" # Download the zip file -curl --output /mnt/azscripts/azscriptinput/"$zipFileName1" "$zipUrl1" +# curl --output /mnt/azscripts/azscriptinput/"$zipFileName1" "$zipUrl1" # curl --output /mnt/azscripts/azscriptinput/"$zipFileName2" "$zipUrl2" # Extract the zip file -unzip /mnt/azscripts/azscriptinput/"$zipFileName1" -d /mnt/azscripts/azscriptinput/"$extractedFolder1" +unzip infra/data/"$zipFileName1" -d infra/data/"$extractedFolder1" # unzip /mnt/azscripts/azscriptinput/"$zipFileName2" -d /mnt/azscripts/azscriptinput/"$extractedFolder2" echo "Script Started" -# Authenticate with Azure using managed identity -az login --identity --client-id ${managedIdentityClientId} +# Authenticate with Azure +if az account show &> /dev/null; then + echo "Already authenticated with Azure." +else + if [ -n "$managedIdentityClientId" ]; then + # Use managed identity if running in Azure + echo "Authenticating with Managed Identity..." + az login --identity --client-id ${managedIdentityClientId} + else + # Use Azure CLI login if running locally + echo "Authenticating with Azure CLI..." + az login + fi + echo "Not authenticated with Azure. Attempting to authenticate..." +fi + +echo "Getting signed in user id" +signed_user_id=$(az ad signed-in-user show --query id -o tsv) + +echo "Getting storage account resource id" +storage_account_resource_id=$(az storage account show --name $storageAccount --query id --output tsv) + +#check if user has the Storage Blob Data Contributor role, add it if not +echo "Checking if user has the Storage Blob Data Contributor role" +role_assignment=$(MSYS_NO_PATHCONV=1 az role assignment list --assignee $signed_user_id --role "Storage Blob Data Contributor" --scope $storage_account_resource_id --query "[].roleDefinitionId" -o tsv) +if [ -z "$role_assignment" ]; then + echo "User does not have the Storage Blob Data Contributor role. Assigning the role." + MSYS_NO_PATHCONV=1 az role assignment create --assignee $signed_user_id --role "Storage Blob Data Contributor" --scope $storage_account_resource_id --output none + if [ $? -eq 0 ]; then + echo "Role assignment completed successfully." + else + echo "Error: Role assignment failed." + exit 1 + fi +else + echo "User already has the Storage Blob Data Contributor role." +fi + # Using az storage blob upload-batch to upload files with managed identity authentication, as the az storage fs directory upload command is not working with managed identity authentication. -az storage blob upload-batch --account-name "$storageAccount" --destination data/"$extractedFolder1" --source /mnt/azscripts/azscriptinput/"$extractedFolder1" --auth-mode login --pattern '*' --overwrite +echo "Uploading files to Azure Storage" +az storage blob upload-batch --account-name "$storageAccount" --destination "$fileSystem"/"$extractedFolder1" --source infra/data/"$extractedFolder1" --auth-mode login --pattern '*' --overwrite --output none # az storage blob upload-batch --account-name "$storageAccount" --destination data/"$extractedFolder2" --source /mnt/azscripts/azscriptinput/"$extractedFolder2" --auth-mode login --pattern '*' --overwrite \ No newline at end of file diff --git a/infra/scripts/process_sample_data.sh b/infra/scripts/process_sample_data.sh new file mode 100644 index 00000000..38f0daa2 --- /dev/null +++ b/infra/scripts/process_sample_data.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# Variables +storageAccount="$1" +fileSystem="$2" +keyvaultName="$3" +managedIdentityClientId="$4" + +# Check if all required arguments are provided +if [ -z "$storageAccount" ] || [ -z "$fileSystem" ] || [ -z "$keyvaultName" ]; then + echo "Usage: $0 [managedIdentityClientId]" + exit 1 +fi + +# Call copy_kb_files.sh +echo "Running copy_kb_files.sh" +bash infra/scripts/copy_kb_files.sh "$storageAccount" "$fileSystem" "$managedIdentityClientId" +if [ $? -ne 0 ]; then + echo "Error: copy_kb_files.sh failed." + exit 1 +fi +echo "copy_kb_files.sh completed successfully." + +# Call run_create_index_scripts.sh +echo "Running run_create_index_scripts.sh" +bash infra/scripts/run_create_index_scripts.sh "$keyvaultName" "$managedIdentityClientId" +if [ $? -ne 0 ]; then + echo "Error: run_create_index_scripts.sh failed." + exit 1 +fi +echo "run_create_index_scripts.sh completed successfully." + +echo "All scripts executed successfully." \ No newline at end of file diff --git a/infra/scripts/run_create_index_scripts.sh b/infra/scripts/run_create_index_scripts.sh index 9c598bcf..cee79f40 100644 --- a/infra/scripts/run_create_index_scripts.sh +++ b/infra/scripts/run_create_index_scripts.sh @@ -2,19 +2,40 @@ echo "started the script" # Variables -baseUrl="$1" -keyvaultName="$2" -managedIdentityClientId="$3" -requirementFile="requirements.txt" -requirementFileUrl=${baseUrl}"infra/scripts/index_scripts/requirements.txt" +# baseUrl="$1" +keyvaultName="$1" +managedIdentityClientId="$2" +# requirementFile="infra/scripts/index_scripts/requirements.txt" +# requirementFileUrl=${baseUrl}"infra/scripts/index_scripts/requirements.txt" echo "Script Started" -# Download the create_index and create table python files -curl --output "01_create_search_index.py" ${baseUrl}"infra/scripts/index_scripts/01_create_search_index.py" -curl --output "02_process_data.py" ${baseUrl}"infra/scripts/index_scripts/02_process_data.py" +echo "Getting signed in user id" +signed_user_id=$(az ad signed-in-user show --query id -o tsv) +# # Download the create_index and create table python files +# curl --output "01_create_search_index.py" ${baseUrl}"infra/scripts/index_scripts/01_create_search_index.py" +# curl --output "02_process_data.py" ${baseUrl}"infra/scripts/index_scripts/02_process_data.py" +# Define the scope for the Key Vault (replace with your Key Vault resource ID) +echo "Getting key vault resource id" +key_vault_resource_id=$(az keyvault show --name $keyvaultName --query id --output tsv) + +# Check if the user has the Key Vault Administrator role +echo "Checking if user has the Key Vault Administrator role" +role_assignment=$(MSYS_NO_PATHCONV=1 az role assignment list --assignee $signed_user_id --role "Key Vault Administrator" --scope $key_vault_resource_id --query "[].roleDefinitionId" -o tsv) +if [ -z "$role_assignment" ]; then + echo "User does not have the Key Vault Administrator role. Assigning the role." + MSYS_NO_PATHCONV=1 az role assignment create --assignee $signed_user_id --role "Key Vault Administrator" --scope $key_vault_resource_id --output none + if [ $? -eq 0 ]; then + echo "Key Vault Administrator role assigned successfully." + else + echo "Failed to assign Key Vault Administrator role." + exit 1 + fi +else + echo "User already has the Key Vault Administrator role." +fi # RUN apt-get update # RUN apt-get install python3 python3-dev g++ unixodbc-dev unixodbc libpq-dev @@ -24,17 +45,52 @@ curl --output "02_process_data.py" ${baseUrl}"infra/scripts/index_scripts/02_pro # pip install pyodbc # Download the requirement file -curl --output "$requirementFile" "$requirementFileUrl" +# curl --output "$requirementFile" "$requirementFileUrl" -echo "Download completed" +# echo "Download completed" #Replace key vault name -sed -i "s/kv_to-be-replaced/${keyvaultName}/g" "01_create_search_index.py" -sed -i "s/mici_to-be-replaced/${managedIdentityClientId}/g" "01_create_search_index.py" -sed -i "s/kv_to-be-replaced/${keyvaultName}/g" "02_process_data.py" -sed -i "s/mici_to-be-replaced/${managedIdentityClientId}/g" "02_process_data.py" +sed -i "s/kv_to-be-replaced/${keyvaultName}/g" "infra/scripts/index_scripts/01_create_search_index.py" +sed -i "s/kv_to-be-replaced/${keyvaultName}/g" "infra/scripts/index_scripts/02_process_data.py" +if [ -n "$managedIdentityClientId" ]; then + sed -i "s/mici_to-be-replaced/${managedIdentityClientId}/g" "infra/scripts/index_scripts/01_create_search_index.py" + sed -i "s/mici_to-be-replaced/${managedIdentityClientId}/g" "infra/scripts/index_scripts/02_process_data.py" +fi + + +# create virtual environment +# Check if the virtual environment already exists +if [ -d "infra/scripts/scriptenv" ]; then + echo "Virtual environment already exists. Skipping creation." +else + echo "Creating virtual environment" + python3 -m venv infra/scripts/scriptenv +fi + +# handling virtual environment activation for different OS +activate_env_output=$(source infra/scripts/scriptenv/bin/activate 2>&1) +if [ -n "$activate_env_output" ]; then + source infra/scripts/scriptenv/Scripts/activate +fi + +# Install the requirements +echo "Installing requirements" +pip install --quiet -r infra/scripts/index_scripts/requirements.txt +echo "Requirements installed" -pip install -r requirements.txt +# Run the scripts +echo "Running the python scripts" +echo "Creating the search index" +python infra/scripts/index_scripts/01_create_search_index.py +if [ $? -ne 0 ]; then + echo "Error: 01_create_search_index.py failed." + exit 1 +fi -python 01_create_search_index.py -python 02_process_data.py +echo "Processing the data" +python infra/scripts/index_scripts/02_process_data.py +if [ $? -ne 0 ]; then + echo "Error: 02_process_data.py failed." + exit 1 +fi +echo "Scripts completed"