diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index e9d5a293c..f143c9dc4 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -3,7 +3,21 @@ "image": "mcr.microsoft.com/devcontainers/python:3.11-bullseye", "forwardPorts": [50505], "features": { - "ghcr.io/azure/azure-dev/azd:latest": {} + "ghcr.io/devcontainers/features/node:1": { + "nodeGypDependencies": true, + "installYarnUsingApt": true, + "version": "lts", + "pnpmVersion": "latest", + "nvmVersion": "latest" + }, + "ghcr.io/devcontainers/features/azure-cli:1": { + "installBicep": true, + "version": "latest", + "bicepVersion": "latest" + }, + "ghcr.io/azure/azure-dev/azd:0": { + "version": "stable" + } }, "customizations": { "vscode": { @@ -16,7 +30,7 @@ ] } }, - "postStartCommand": "git pull origin main && python3 -m pip install -r infra/scripts/index_scripts/requirements.txt && curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash && chmod +x ./scripts/quota_check_params.sh", + "postCreateCommand": "bash ./.devcontainer/setup_env.sh", "remoteUser": "vscode", "hostRequirements": { "memory": "4gb" diff --git a/.devcontainer/setup_env.sh b/.devcontainer/setup_env.sh new file mode 100644 index 000000000..91de5b222 --- /dev/null +++ b/.devcontainer/setup_env.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +git fetch +git pull + +# provide execute permission to quotacheck script +sudo chmod +x ./scripts/quota_check_params.sh \ No newline at end of file diff --git a/.gitignore b/.gitignore index dd27693a3..0abb7a034 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,5 @@ myenv scriptsenv/ -scriptenv \ No newline at end of file +scriptenv +pdf \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json index be37e5c29..d184e8ac0 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -6,7 +6,7 @@ "type": "python", "request": "launch", "module": "quart", - "cwd": "${workspaceFolder}", + "cwd": "${workspaceFolder}/src", "env": { "QUART_APP": "app:app", "QUART_ENV": "development", diff --git a/README.md b/README.md index eb6076c60..3bc3215d0 100644 --- a/README.md +++ b/README.md @@ -193,7 +193,11 @@ To change the azd parameters from the default values, follow the steps [here](./ * This deployment will take *7-10 minutes* to provision the resources in your account and set up the solution with sample data. * If you get an error or timeout with deployment, changing the location can help, as there may be availability constraints for the resources. -5. Once the deployment has completed successfully and you would like to use the sample data, run the bash command printed in the terminal. The bash command will look like the following: +5. Once the deployment has completed successfully and you would like to use the sample data, run the bash command printed in the terminal. The bash command will look like the following: + ```shell + bash ./infra/scripts/process_sample_data.sh + ``` + if you don't have azd env then you need to pass parameters along with the command. Then the command will look like the following: ```shell bash ./infra/scripts/process_sample_data.sh ``` diff --git a/azure.yaml b/azure.yaml index 64d978545..11595c029 100644 --- a/azure.yaml +++ b/azure.yaml @@ -32,7 +32,7 @@ hooks: Write-Host "Web app URL: " Write-Host "$env:WEB_APP_URL" -ForegroundColor Cyan Write-Host "`nIf you want to use the Sample Data, run the following command in the Bash terminal to process it:" - Write-Host "bash ./infra/scripts/process_sample_data.sh $env:STORAGE_ACCOUNT_NAME $env:STORAGE_CONTAINER_NAME $env:KEY_VAULT_NAME $env:COSMOSDB_ACCOUNT_NAME $env:RESOURCE_GROUP_NAME" -ForegroundColor Cyan + Write-Host "bash ./infra/scripts/process_sample_data.sh" -ForegroundColor Cyan shell: pwsh continueOnError: false interactive: true @@ -41,8 +41,8 @@ hooks: echo "Web app URL: " echo $WEB_APP_URL echo "" - echo "If you want to use the Sample Data, run the following command in the terminal to process it:" - echo "bash ./infra/scripts/process_sample_data.sh $STORAGE_ACCOUNT_NAME $STORAGE_CONTAINER_NAME $KEY_VAULT_NAME $COSMOSDB_ACCOUNT_NAME $RESOURCE_GROUP_NAME" + echo "If you want to use the Sample Data, run the following command in the bash terminal to process it:" + echo "bash ./infra/scripts/process_sample_data.sh" shell: sh continueOnError: false interactive: true \ No newline at end of file diff --git a/docs/README_LOCAL.md b/docs/README_LOCAL.md index 58b44d505..40325afa5 100644 --- a/docs/README_LOCAL.md +++ b/docs/README_LOCAL.md @@ -1,7 +1,7 @@ ### Deploy from your local machine #### Local Setup: Basic Chat Experience -1. Copy `.env.sample` to a new file called `.env` and configure the settings as described in the [Environment variables](#environment-variables) section. +1. Copy `.env.sample` present in `src` folder to a new file called `.env` and configure the settings as described in the [Environment variables](#environment-variables) section. These variables are required: - `AZURE_OPENAI_RESOURCE` @@ -17,7 +17,7 @@ See the [documentation](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/reference#example-response-2) for more information on these parameters. -2. Start the app with `start.cmd`. This will build the frontend, install backend dependencies, and then start the app. Or, just run the backend in debug mode using the VSCode debug configuration in `.vscode/launch.json`. +2. Start the app with `start.cmd` or `start.sh`. This will build the frontend, install backend dependencies, and then start the app. Or, just run the backend in debug mode using the VSCode debug configuration in `.vscode/launch.json`. 3. You can see the local running app at http://127.0.0.1:50505. If you experience a port conflict and the app does not load, stop the application in the terminal (CTRL-C on Windows), edit the `start.cmd` file and change the port to a value not in use (i.e., 5000). @@ -50,7 +50,7 @@ NOTE: You may find you need to set: MacOS: `export NODE_OPTIONS="--max-old-space - `AZURE_SEARCH_STRICTNESS` - `AZURE_OPENAI_EMBEDDING_NAME` -3. Start the app with `start.cmd`. This will build the frontend, install backend dependencies, and then start the app. Or, just run the backend in debug mode using the VSCode debug configuration in `.vscode/launch.json`. +3. Start the app with `start.cmd` or `start.sh`. This will build the frontend, install backend dependencies, and then start the app. Or, just run the backend in debug mode using the VSCode debug configuration in `.vscode/launch.json`. 4. You can see the local running app at http://127.0.0.1:50505. If you experience a port conflict and the app does not load, stop the application in the terminal (CTRL-C on Windows), edit the `start.cmd` file and change the port to a value not in use (i.e., 5000). NOTE: You may find you need to set: MacOS: `export NODE_OPTIONS="--max-old-space-size=8192"` or Windows: `set NODE_OPTIONS=--max-old-space-size=8192` to avoid running out of memory when building the frontend. @@ -62,7 +62,7 @@ To enable chat history, you will need to set up CosmosDB resources. The ARM temp - `AZURE_COSMOSDB_CONVERSATIONS_CONTAINER` - `AZURE_COSMOSDB_ACCOUNT_KEY` -As above, start the app with `start.cmd`, then visit the local running app at http://127.0.0.1:50505. Or, just run the backend in debug mode using the VSCode debug configuration in `.vscode/launch.json`. If you experience a port conflict and the app does not load, stop the application in the terminal (CTRL-C on Windows), edit the `start.cmd` file and change the port to a value not in use (i.e., 5000). +As above, start the app with `start.cmd` or `start.sh`, then visit the local running app at http://127.0.0.1:50505. Or, just run the backend in debug mode using the VSCode debug configuration in `.vscode/launch.json`. If you experience a port conflict and the app does not load, stop the application in the terminal (CTRL-C on Windows), edit the `start.cmd` file and change the port to a value not in use (i.e., 5000). #### Local Setup: Enable Message Feedback To enable message feedback, you will need to set up CosmosDB resources. Then specify these additional environment variable: @@ -75,10 +75,10 @@ To enable message feedback, you will need to set up CosmosDB resources. Then spe You can use the [Azure CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli) to deploy the app from your local machine. Make sure you have version 2.48.1 or later. -If this is your first time deploying the app, you can use [az webapp up](https://learn.microsoft.com/en-us/cli/azure/webapp?view=azure-cli-latest#az-webapp-up). Run the following two commands from the root folder of the repo, updating the placeholder values to your desired app name, resource group, location, and subscription. You can also change the SKU if desired. +If this is your first time deploying the app, you can use [az webapp up](https://learn.microsoft.com/en-us/cli/azure/webapp?view=azure-cli-latest#az-webapp-up). Run the following two commands from the `src` folder of the repo, updating the placeholder values to your desired app name, resource group, location, and subscription. You can also change the SKU if desired. 1. `az webapp up --runtime PYTHON:3.11 --sku B1 --name --resource-group --location --subscription ` -1. `az webapp config set --startup-file "python3 -m gunicorn app:app" --name ` +1. `az webapp config set --startup-file "python3 -m gunicorn app:app" --name --resource-group ` If you've deployed the app previously, first run this command to update the appsettings to allow local code deployment: @@ -88,10 +88,10 @@ Check the runtime stack for your app by viewing the app service resource in the Check the SKU in the same way. Use the abbreviated SKU name in the argument below, e.g. for "Basic (B1)" the SKU is `B1`. -Then, use these commands to deploy your local code to the existing app: +Then, use these commands from `src` folder to deploy your local code to the existing app: 1. `az webapp up --runtime --sku --name --resource-group ` -1. `az webapp config set --startup-file "python3 -m gunicorn app:app" --name ` +1. `az webapp config set --startup-file "python3 -m gunicorn app:app" --name --resource-group ` Make sure that the app name and resource group match exactly for the app that was previously deployed. diff --git a/infra/scripts/copy_kb_files.sh b/infra/scripts/copy_kb_files.sh index 43ad34cf2..f8a17f005 100644 --- a/infra/scripts/copy_kb_files.sh +++ b/infra/scripts/copy_kb_files.sh @@ -6,26 +6,6 @@ fileSystem="$2" # baseUrl="$3" managedIdentityClientId="$3" -zipFileName1="pdfdata.zip" -extractedFolder1="pdf" -zipUrl1=${baseUrl}"infra/data/pdfdata.zip" - -# zipFileName2="audio_data.zip" -# extractedFolder2="audiodata" -# zipUrl2=${baseUrl}"infra/data/audio_data.zip" - -# Create folders if they do not exist -# mkdir -p "/mnt/azscripts/azscriptinput/$extractedFolder1" -# mkdir -p "/mnt/azscripts/azscriptinput/$extractedFolder2" - -# Download the zip file -# curl --output /mnt/azscripts/azscriptinput/"$zipFileName1" "$zipUrl1" -# curl --output /mnt/azscripts/azscriptinput/"$zipFileName2" "$zipUrl2" - -# Extract the zip file -unzip infra/data/"$zipFileName1" -d infra/data/"$extractedFolder1" -# unzip /mnt/azscripts/azscriptinput/"$zipFileName2" -d /mnt/azscripts/azscriptinput/"$extractedFolder2" - echo "Script Started" # Authenticate with Azure @@ -58,6 +38,23 @@ if [ -z "$role_assignment" ]; then MSYS_NO_PATHCONV=1 az role assignment create --assignee $signed_user_id --role "Storage Blob Data Contributor" --scope $storage_account_resource_id --output none if [ $? -eq 0 ]; then echo "Role assignment completed successfully." + retries=3 + while [ $retries -gt 0 ]; do + # Check if the role assignment was successful + role_assignment_check=$(MSYS_NO_PATHCONV=1 az role assignment list --assignee $signed_user_id --role "Storage Blob Data Contributor" --scope $storage_account_resource_id --query "[].roleDefinitionId" -o tsv) + if [ -n "$role_assignment_check" ]; then + echo "Role assignment verified successfully." + break + else + echo "Role assignment not found, retrying..." + ((retries--)) + sleep 10 + fi + done + if [ $retries -eq 0 ]; then + echo "Error: Role assignment verification failed after multiple attempts. Try rerunning the script." + exit 1 + fi else echo "Error: Role assignment failed." exit 1 @@ -66,6 +63,27 @@ else echo "User already has the Storage Blob Data Contributor role." fi +zipFileName1="pdfdata.zip" +extractedFolder1="pdf" +zipUrl1="infra/data/$zipFileName1" +# zipUrl1=${baseUrl}"infra/data/$zipFileName1" + +# zipFileName2="audio_data.zip" +# extractedFolder2="audiodata" +# zipUrl2=${baseUrl}"infra/data/audio_data.zip" + +# Create folders if they do not exist +# mkdir -p "/mnt/azscripts/azscriptinput/$extractedFolder1" +# mkdir -p "/mnt/azscripts/azscriptinput/$extractedFolder2" + +# Download the zip file +# curl --output /mnt/azscripts/azscriptinput/"$zipFileName1" "$zipUrl1" +# curl --output /mnt/azscripts/azscriptinput/"$zipFileName2" "$zipUrl2" + +# Extract the zip file +unzip -o $zipUrl1 -d infra/data/"$extractedFolder1" +# unzip /mnt/azscripts/azscriptinput/"$zipFileName2" -d /mnt/azscripts/azscriptinput/"$extractedFolder2" + # Using az storage blob upload-batch to upload files with managed identity authentication, as the az storage fs directory upload command is not working with managed identity authentication. echo "Uploading files to Azure Storage" az storage blob upload-batch --account-name "$storageAccount" --destination "$fileSystem"/"$extractedFolder1" --source infra/data/"$extractedFolder1" --auth-mode login --pattern '*' --overwrite --output none diff --git a/infra/scripts/process_sample_data.sh b/infra/scripts/process_sample_data.sh index 30529053e..432e25832 100644 --- a/infra/scripts/process_sample_data.sh +++ b/infra/scripts/process_sample_data.sh @@ -8,9 +8,31 @@ cosmosDbAccountName="$4" resourceGroupName="$5" managedIdentityClientId="$6" +# get parameters from azd env, if not provided +if [ -z "$resourceGroupName" ]; then + resourceGroupName=$(azd env get-value RESOURCE_GROUP_NAME) +fi + +if [ -z "$cosmosDbAccountName" ]; then + cosmosDbAccountName=$(azd env get-value COSMOSDB_ACCOUNT_NAME) +fi + +if [ -z "$storageAccount" ]; then + storageAccount=$(azd env get-value STORAGE_ACCOUNT_NAME) +fi + +if [ -z "$fileSystem" ]; then + fileSystem=$(azd env get-value STORAGE_CONTAINER_NAME) +fi + +if [ -z "$keyvaultName" ]; then + keyvaultName=$(azd env get-value KEY_VAULT_NAME) +fi + + # Check if all required arguments are provided if [ -z "$storageAccount" ] || [ -z "$fileSystem" ] || [ -z "$keyvaultName" ] || [ -z "$cosmosDbAccountName" ] || [ -z "$resourceGroupName" ]; then - echo "Usage: $0 [managedIdentityClientId]" + echo "Usage: $0 " exit 1 fi diff --git a/infra/scripts/run_create_index_scripts.sh b/infra/scripts/run_create_index_scripts.sh index 2987212ce..9942e6ac1 100644 --- a/infra/scripts/run_create_index_scripts.sh +++ b/infra/scripts/run_create_index_scripts.sh @@ -72,6 +72,15 @@ if [ -n "$managedIdentityClientId" ]; then sed -i "s/mici_to-be-replaced/${managedIdentityClientId}/g" "infra/scripts/index_scripts/02_process_data.py" fi +# Determine the correct Python command +if command -v python3 &> /dev/null; then + PYTHON_CMD="python3" +elif command -v python &> /dev/null; then + PYTHON_CMD="python" +else + echo "Python is not installed on this system. Or it is not added in the PATH." + exit 1 +fi # create virtual environment # Check if the virtual environment already exists @@ -79,13 +88,18 @@ if [ -d "infra/scripts/scriptenv" ]; then echo "Virtual environment already exists. Skipping creation." else echo "Creating virtual environment" - python3 -m venv infra/scripts/scriptenv + $PYTHON_CMD -m venv infra/scripts/scriptenv fi -# handling virtual environment activation for different OS -activate_env_output=$(source infra/scripts/scriptenv/bin/activate 2>&1) -if [ -n "$activate_env_output" ]; then - source infra/scripts/scriptenv/Scripts/activate +# Activate the virtual environment +if [ -f "infra/scripts/scriptenv/bin/activate" ]; then + echo "Activating virtual environment (Linux/macOS)" + source "infra/scripts/scriptenv/bin/activate" +elif [ -f "infra/scripts/scriptenv/Scripts/activate" ]; then + echo "Activating virtual environment (Windows)" + source "infra/scripts/scriptenv/Scripts/activate" +else + echo "Error activating virtual environment. Requirements may be installed globally." fi # Install the requirements @@ -93,19 +107,36 @@ echo "Installing requirements" pip install --quiet -r infra/scripts/index_scripts/requirements.txt echo "Requirements installed" +error_flag=false # Run the scripts echo "Running the python scripts" echo "Creating the search index" python infra/scripts/index_scripts/01_create_search_index.py if [ $? -ne 0 ]; then echo "Error: 01_create_search_index.py failed." - exit 1 + error_flag=true fi -echo "Processing the data" -python infra/scripts/index_scripts/02_process_data.py -if [ $? -ne 0 ]; then - echo "Error: 02_process_data.py failed." +if [ "$error_flag" = false ]; then + echo "Processing the data" + python infra/scripts/index_scripts/02_process_data.py + if [ $? -ne 0 ]; then + echo "Error: 02_process_data.py failed." + error_flag=true + fi +fi + +# revert the key vault name and managed identity client id in the python files +sed -i "s/${keyvaultName}/kv_to-be-replaced/g" "infra/scripts/index_scripts/01_create_search_index.py" +sed -i "s/${keyvaultName}/kv_to-be-replaced/g" "infra/scripts/index_scripts/02_process_data.py" +if [ -n "$managedIdentityClientId" ]; then + sed -i "s/${managedIdentityClientId}/mici_to-be-replaced/g" "infra/scripts/index_scripts/01_create_search_index.py" + sed -i "s/${managedIdentityClientId}/mici_to-be-replaced/g" "infra/scripts/index_scripts/02_process_data.py" +fi + +if [ "$error_flag" = true ]; then + echo "Error: One or more scripts failed during the script execution." exit 1 fi + echo "Scripts completed" diff --git a/src/start.sh b/src/start.sh index 8e1644b74..6da710044 100644 --- a/src/start.sh +++ b/src/start.sh @@ -2,6 +2,16 @@ export NODE_OPTIONS=--max_old_space_size=8192 +# Restoring backend python packages +echo "" +echo "Restoring backend python packages" +echo "" +python3 -m pip install -r requirements.txt +if [ $? -ne 0 ]; then + echo "Failed to restore backend python packages" + exit $? +fi + echo "" echo "Restoring frontend npm packages" echo "" @@ -22,12 +32,10 @@ if [ $? -ne 0 ]; then fi cd .. -. ./scripts/loadenv.sh - echo "" echo "Starting backend" echo "" -./.venv/bin/python -m quart run --port=50505 --host=127.0.0.1 --reload +python3 -m quart run --port=50505 --host=127.0.0.1 --reload if [ $? -ne 0 ]; then echo "Failed to start backend" exit $?