From 6bbc2cecda9e6132a675256ba55b96e23913b844 Mon Sep 17 00:00:00 2001 From: Pragya Chaudhary Date: Wed, 10 Jun 2026 14:30:05 +0530 Subject: [PATCH 1/3] Add caching to Google Sheets API calls to prevent rate limiting When updating multiple rows in Google Sheets, the find_sheet_id_by_name() method was being called repeatedly for the same spreadsheet and sheet, causing excessive API calls and hitting the Google Sheets API rate limit of 60 read requests per minute. This commit adds an in-memory cache to store sheet IDs, significantly reducing the number of API calls. The cache is session-scoped and automatically invalidated when new sheets are created. Also fixes a typo in error logging (logger.into -> logger.info). Co-authored-by: Cursor --- .../common/google_drive/google_drive_operations.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/cloud_governance/common/google_drive/google_drive_operations.py b/cloud_governance/common/google_drive/google_drive_operations.py index 6fc329e2..f68794b4 100644 --- a/cloud_governance/common/google_drive/google_drive_operations.py +++ b/cloud_governance/common/google_drive/google_drive_operations.py @@ -22,6 +22,7 @@ class GoogleDriveOperations: def __init__(self): self.__environment_variables_dict = environment_variables.environment_variables_dict self.__service = None + self.__sheet_id_cache = {} if self.__environment_variables_dict.get('GOOGLE_APPLICATION_CREDENTIALS'): self.__creds, _ = google.auth.default() self.__service = build('sheets', 'v4', credentials=self.__creds, num_retries=self.RETRIES) @@ -46,6 +47,9 @@ def create_work_sheet(self, gsheet_id: str, sheet_name: str): if self.__service: self.__service.spreadsheets().batchUpdate(spreadsheetId=gsheet_id, body=create_worksheet_meta_data).execute() + cache_key = f"{gsheet_id}:{sheet_name}" + if cache_key in self.__sheet_id_cache: + del self.__sheet_id_cache[cache_key] logger.info(f'{sheet_name} worksheet created') else: logger.info(f'{sheet_name} Worksheet Already present') @@ -105,6 +109,10 @@ def find_sheet_id_by_name(self, sheet_name: str, spreadsheet_id: str): @param spreadsheet_id: @return: """ + cache_key = f"{spreadsheet_id}:{sheet_name}" + if cache_key in self.__sheet_id_cache: + return self.__sheet_id_cache[cache_key] + if self.__service: sheets_with_properties = self.__service.spreadsheets().get(spreadsheetId=spreadsheet_id, fields='sheets.properties').execute().get( @@ -112,7 +120,9 @@ def find_sheet_id_by_name(self, sheet_name: str, spreadsheet_id: str): for sheet in sheets_with_properties: if 'title' in sheet['properties'].keys(): if sheet['properties']['title'] == sheet_name: - return sheet['properties']['sheetId'] + sheet_id = sheet['properties']['sheetId'] + self.__sheet_id_cache[cache_key] = sheet_id + return sheet_id return '' @logger_time_stamp @@ -143,7 +153,7 @@ def delete_rows(self, spreadsheet_id: str, sheet_name: str, row_number: int): updating = self.__service.spreadsheets().batchUpdate(spreadsheetId=spreadsheet_id, body=update_data) updating.execute() except HttpError as error: - logger.into(f'An error occurred: {error}') + logger.info(f'An error occurred: {error}') @logger_time_stamp def paste_csv_to_gsheet(self, csv_path, spreadsheet_id: str, sheet_name: str): From db9b1415ef52a08dcef9cb2aea720a8b70cce26d Mon Sep 17 00:00:00 2001 From: Pragya Chaudhary Date: Wed, 10 Jun 2026 14:47:04 +0530 Subject: [PATCH 2/3] Add cache invalidation and retry logic for stale sheet IDs Addresses potential issue where cached sheet IDs can become stale if sheets are renamed/deleted/recreated by external processes during a job run. This would cause operations to fail with "sheet not found" errors with no recovery mechanism. Changes: - Added _evict_sheet_cache() method to remove stale cache entries - Added use_cache parameter to find_sheet_id_by_name() to bypass cache - Wrapped delete_rows(), update_row_in_gsheet(), and paste_csv_to_gsheet() with retry logic that: 1. Attempts operation with cached sheet ID 2. On "not found" errors, evicts cache and retries once with fresh lookup 3. Fails gracefully if second attempt also fails This defensive approach prevents complete job failures when sheet metadata changes during execution. Co-authored-by: Cursor --- .../google_drive/google_drive_operations.py | 148 +++++++++++------- 1 file changed, 95 insertions(+), 53 deletions(-) diff --git a/cloud_governance/common/google_drive/google_drive_operations.py b/cloud_governance/common/google_drive/google_drive_operations.py index f68794b4..abcb1e84 100644 --- a/cloud_governance/common/google_drive/google_drive_operations.py +++ b/cloud_governance/common/google_drive/google_drive_operations.py @@ -101,16 +101,28 @@ def append_values(self, spreadsheet_id, sheet_name: str, values: list, value_inp except HttpError as error: logger.info(f'An error occurred: {error}') + def _evict_sheet_cache(self, sheet_name: str, spreadsheet_id: str): + """ + Evict a sheet ID from cache (used when sheet is not found or has been modified) + @param sheet_name: + @param spreadsheet_id: + """ + cache_key = f"{spreadsheet_id}:{sheet_name}" + if cache_key in self.__sheet_id_cache: + del self.__sheet_id_cache[cache_key] + logger.info(f'Evicted stale cache entry for sheet: {sheet_name}') + @logger_time_stamp - def find_sheet_id_by_name(self, sheet_name: str, spreadsheet_id: str): + def find_sheet_id_by_name(self, sheet_name: str, spreadsheet_id: str, use_cache: bool = True): """ This method find the sheet id in the spreadsheet @param sheet_name: @param spreadsheet_id: + @param use_cache: If False, bypass cache and fetch fresh data @return: """ cache_key = f"{spreadsheet_id}:{sheet_name}" - if cache_key in self.__sheet_id_cache: + if use_cache and cache_key in self.__sheet_id_cache: return self.__sheet_id_cache[cache_key] if self.__service: @@ -135,25 +147,35 @@ def delete_rows(self, spreadsheet_id: str, sheet_name: str, row_number: int): @return: """ if self.__service: - try: - spreadsheet_data = [ - { - "deleteDimension": { - "range": { - "sheetId": self.find_sheet_id_by_name(sheet_name=sheet_name, - spreadsheet_id=spreadsheet_id), - "dimension": "ROWS", - "startIndex": row_number, - "endIndex": row_number + 1 + for attempt in range(2): + try: + use_cache = attempt == 0 + sheet_id = self.find_sheet_id_by_name(sheet_name=sheet_name, + spreadsheet_id=spreadsheet_id, + use_cache=use_cache) + spreadsheet_data = [ + { + "deleteDimension": { + "range": { + "sheetId": sheet_id, + "dimension": "ROWS", + "startIndex": row_number, + "endIndex": row_number + 1 + } } } - } - ] - update_data = {"requests": spreadsheet_data} - updating = self.__service.spreadsheets().batchUpdate(spreadsheetId=spreadsheet_id, body=update_data) - updating.execute() - except HttpError as error: - logger.info(f'An error occurred: {error}') + ] + update_data = {"requests": spreadsheet_data} + updating = self.__service.spreadsheets().batchUpdate(spreadsheetId=spreadsheet_id, body=update_data) + updating.execute() + break + except HttpError as error: + if attempt == 0 and ('Unable to parse range' in str(error) or 'not found' in str(error).lower()): + self._evict_sheet_cache(sheet_name=sheet_name, spreadsheet_id=spreadsheet_id) + logger.info(f'Retrying delete_rows after cache eviction for sheet: {sheet_name}') + continue + logger.info(f'An error occurred: {error}') + break @logger_time_stamp def paste_csv_to_gsheet(self, csv_path, spreadsheet_id: str, sheet_name: str): @@ -170,25 +192,35 @@ def paste_csv_to_gsheet(self, csv_path, spreadsheet_id: str, sheet_name: str): with open(csv_path, 'r') as csv_file: csv_contents = csv_file.read() if csv_contents: - sheet_id = self.find_sheet_id_by_name(sheet_name=sheet_name, spreadsheet_id=spreadsheet_id) - body = { - 'requests': [{ - 'pasteData': { - "coordinate": { - "sheetId": sheet_id, - "rowIndex": "0", - "columnIndex": "0", - }, - "data": csv_contents, - "type": 'PASTE_NORMAL', - "delimiter": ',', + for attempt in range(2): + try: + use_cache = attempt == 0 + sheet_id = self.find_sheet_id_by_name(sheet_name=sheet_name, spreadsheet_id=spreadsheet_id, + use_cache=use_cache) + body = { + 'requests': [{ + 'pasteData': { + "coordinate": { + "sheetId": sheet_id, + "rowIndex": "0", + "columnIndex": "0", + }, + "data": csv_contents, + "type": 'PASTE_NORMAL', + "delimiter": ',', + } + }] } - }] - } - request = self.__service.spreadsheets().batchUpdate(spreadsheetId=spreadsheet_id, body=body) - response = request.execute() - logger.info(f'Pasted data into the {sheet_name}') - return response + request = self.__service.spreadsheets().batchUpdate(spreadsheetId=spreadsheet_id, body=body) + response = request.execute() + logger.info(f'Pasted data into the {sheet_name}') + return response + except HttpError as error: + if attempt == 0 and ('Unable to parse range' in str(error) or 'not found' in str(error).lower()): + self._evict_sheet_cache(sheet_name=sheet_name, spreadsheet_id=spreadsheet_id) + logger.info(f'Retrying paste_csv_to_gsheet after cache eviction for sheet: {sheet_name}') + continue + raise error @logger_time_stamp def update_row_in_gsheet(self, data: list, gsheet_id: str, row: int, sheet_name: str): @@ -197,19 +229,29 @@ def update_row_in_gsheet(self, data: list, gsheet_id: str, row: int, sheet_name: @return: """ if self.__service: - sheet_id = self.find_sheet_id_by_name(sheet_name=sheet_name, spreadsheet_id=gsheet_id) - requests_body = {'requests': [{ - 'updateCells': { - 'rows': [{"values": data}], - 'fields': '*', - 'start': { - "sheetId": sheet_id, - "rowIndex": row, - "columnIndex": '0' - } - } - }]} - request = self.__service.spreadsheets().batchUpdate(spreadsheetId=gsheet_id, body=requests_body) - response = request.execute() - logger.info(f'Updated the row in the worksheet {sheet_name}') - return response + for attempt in range(2): + try: + use_cache = attempt == 0 + sheet_id = self.find_sheet_id_by_name(sheet_name=sheet_name, spreadsheet_id=gsheet_id, + use_cache=use_cache) + requests_body = {'requests': [{ + 'updateCells': { + 'rows': [{"values": data}], + 'fields': '*', + 'start': { + "sheetId": sheet_id, + "rowIndex": row, + "columnIndex": '0' + } + } + }]} + request = self.__service.spreadsheets().batchUpdate(spreadsheetId=gsheet_id, body=requests_body) + response = request.execute() + logger.info(f'Updated the row in the worksheet {sheet_name}') + return response + except HttpError as error: + if attempt == 0 and ('Unable to parse range' in str(error) or 'not found' in str(error).lower()): + self._evict_sheet_cache(sheet_name=sheet_name, spreadsheet_id=gsheet_id) + logger.info(f'Retrying update_row_in_gsheet after cache eviction for sheet: {sheet_name}') + continue + raise error From 4a864a97a82798b61d86c501a5253faa5916fcfb Mon Sep 17 00:00:00 2001 From: Pragya Chaudhary Date: Wed, 10 Jun 2026 15:13:34 +0530 Subject: [PATCH 3/3] Address coderabbit review comments --- .../common/google_drive/google_drive_operations.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cloud_governance/common/google_drive/google_drive_operations.py b/cloud_governance/common/google_drive/google_drive_operations.py index abcb1e84..9baa0708 100644 --- a/cloud_governance/common/google_drive/google_drive_operations.py +++ b/cloud_governance/common/google_drive/google_drive_operations.py @@ -174,8 +174,8 @@ def delete_rows(self, spreadsheet_id: str, sheet_name: str, row_number: int): self._evict_sheet_cache(sheet_name=sheet_name, spreadsheet_id=spreadsheet_id) logger.info(f'Retrying delete_rows after cache eviction for sheet: {sheet_name}') continue - logger.info(f'An error occurred: {error}') - break + logger.error(f'Failed to delete rows in sheet {sheet_name}: {error}') + raise @logger_time_stamp def paste_csv_to_gsheet(self, csv_path, spreadsheet_id: str, sheet_name: str): @@ -202,8 +202,8 @@ def paste_csv_to_gsheet(self, csv_path, spreadsheet_id: str, sheet_name: str): 'pasteData': { "coordinate": { "sheetId": sheet_id, - "rowIndex": "0", - "columnIndex": "0", + "rowIndex": 0, + "columnIndex": 0, }, "data": csv_contents, "type": 'PASTE_NORMAL', @@ -241,7 +241,7 @@ def update_row_in_gsheet(self, data: list, gsheet_id: str, row: int, sheet_name: 'start': { "sheetId": sheet_id, "rowIndex": row, - "columnIndex": '0' + "columnIndex": 0 } } }]}