Skip to content

Commit d775cb7

Browse files
Add caching to Google Sheets API calls to prevent rate limiting (#1002)
* Add caching to Google Sheets API calls to prevent rate limiting When updating multiple rows in Google Sheets, the find_sheet_id_by_name() method was being called repeatedly for the same spreadsheet and sheet, causing excessive API calls and hitting the Google Sheets API rate limit of 60 read requests per minute. This commit adds an in-memory cache to store sheet IDs, significantly reducing the number of API calls. The cache is session-scoped and automatically invalidated when new sheets are created. Also fixes a typo in error logging (logger.into -> logger.info). Co-authored-by: Cursor <cursoragent@cursor.com> * Add cache invalidation and retry logic for stale sheet IDs Addresses potential issue where cached sheet IDs can become stale if sheets are renamed/deleted/recreated by external processes during a job run. This would cause operations to fail with "sheet not found" errors with no recovery mechanism. Changes: - Added _evict_sheet_cache() method to remove stale cache entries - Added use_cache parameter to find_sheet_id_by_name() to bypass cache - Wrapped delete_rows(), update_row_in_gsheet(), and paste_csv_to_gsheet() with retry logic that: 1. Attempts operation with cached sheet ID 2. On "not found" errors, evicts cache and retries once with fresh lookup 3. Fails gracefully if second attempt also fails This defensive approach prevents complete job failures when sheet metadata changes during execution. Co-authored-by: Cursor <cursoragent@cursor.com> * Address coderabbit review comments --------- Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent 4faca1f commit d775cb7

1 file changed

Lines changed: 105 additions & 53 deletions

File tree

cloud_governance/common/google_drive/google_drive_operations.py

Lines changed: 105 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ class GoogleDriveOperations:
2222
def __init__(self):
2323
self.__environment_variables_dict = environment_variables.environment_variables_dict
2424
self.__service = None
25+
self.__sheet_id_cache = {}
2526
if self.__environment_variables_dict.get('GOOGLE_APPLICATION_CREDENTIALS'):
2627
self.__creds, _ = google.auth.default()
2728
self.__service = build('sheets', 'v4', credentials=self.__creds, num_retries=self.RETRIES)
@@ -46,6 +47,9 @@ def create_work_sheet(self, gsheet_id: str, sheet_name: str):
4647
if self.__service:
4748
self.__service.spreadsheets().batchUpdate(spreadsheetId=gsheet_id,
4849
body=create_worksheet_meta_data).execute()
50+
cache_key = f"{gsheet_id}:{sheet_name}"
51+
if cache_key in self.__sheet_id_cache:
52+
del self.__sheet_id_cache[cache_key]
4953
logger.info(f'{sheet_name} worksheet created')
5054
else:
5155
logger.info(f'{sheet_name} Worksheet Already present')
@@ -97,22 +101,40 @@ def append_values(self, spreadsheet_id, sheet_name: str, values: list, value_inp
97101
except HttpError as error:
98102
logger.info(f'An error occurred: {error}')
99103

104+
def _evict_sheet_cache(self, sheet_name: str, spreadsheet_id: str):
105+
"""
106+
Evict a sheet ID from cache (used when sheet is not found or has been modified)
107+
@param sheet_name:
108+
@param spreadsheet_id:
109+
"""
110+
cache_key = f"{spreadsheet_id}:{sheet_name}"
111+
if cache_key in self.__sheet_id_cache:
112+
del self.__sheet_id_cache[cache_key]
113+
logger.info(f'Evicted stale cache entry for sheet: {sheet_name}')
114+
100115
@logger_time_stamp
101-
def find_sheet_id_by_name(self, sheet_name: str, spreadsheet_id: str):
116+
def find_sheet_id_by_name(self, sheet_name: str, spreadsheet_id: str, use_cache: bool = True):
102117
"""
103118
This method find the sheet id in the spreadsheet
104119
@param sheet_name:
105120
@param spreadsheet_id:
121+
@param use_cache: If False, bypass cache and fetch fresh data
106122
@return:
107123
"""
124+
cache_key = f"{spreadsheet_id}:{sheet_name}"
125+
if use_cache and cache_key in self.__sheet_id_cache:
126+
return self.__sheet_id_cache[cache_key]
127+
108128
if self.__service:
109129
sheets_with_properties = self.__service.spreadsheets().get(spreadsheetId=spreadsheet_id,
110130
fields='sheets.properties').execute().get(
111131
'sheets')
112132
for sheet in sheets_with_properties:
113133
if 'title' in sheet['properties'].keys():
114134
if sheet['properties']['title'] == sheet_name:
115-
return sheet['properties']['sheetId']
135+
sheet_id = sheet['properties']['sheetId']
136+
self.__sheet_id_cache[cache_key] = sheet_id
137+
return sheet_id
116138
return ''
117139

118140
@logger_time_stamp
@@ -125,25 +147,35 @@ def delete_rows(self, spreadsheet_id: str, sheet_name: str, row_number: int):
125147
@return:
126148
"""
127149
if self.__service:
128-
try:
129-
spreadsheet_data = [
130-
{
131-
"deleteDimension": {
132-
"range": {
133-
"sheetId": self.find_sheet_id_by_name(sheet_name=sheet_name,
134-
spreadsheet_id=spreadsheet_id),
135-
"dimension": "ROWS",
136-
"startIndex": row_number,
137-
"endIndex": row_number + 1
150+
for attempt in range(2):
151+
try:
152+
use_cache = attempt == 0
153+
sheet_id = self.find_sheet_id_by_name(sheet_name=sheet_name,
154+
spreadsheet_id=spreadsheet_id,
155+
use_cache=use_cache)
156+
spreadsheet_data = [
157+
{
158+
"deleteDimension": {
159+
"range": {
160+
"sheetId": sheet_id,
161+
"dimension": "ROWS",
162+
"startIndex": row_number,
163+
"endIndex": row_number + 1
164+
}
138165
}
139166
}
140-
}
141-
]
142-
update_data = {"requests": spreadsheet_data}
143-
updating = self.__service.spreadsheets().batchUpdate(spreadsheetId=spreadsheet_id, body=update_data)
144-
updating.execute()
145-
except HttpError as error:
146-
logger.into(f'An error occurred: {error}')
167+
]
168+
update_data = {"requests": spreadsheet_data}
169+
updating = self.__service.spreadsheets().batchUpdate(spreadsheetId=spreadsheet_id, body=update_data)
170+
updating.execute()
171+
break
172+
except HttpError as error:
173+
if attempt == 0 and ('Unable to parse range' in str(error) or 'not found' in str(error).lower()):
174+
self._evict_sheet_cache(sheet_name=sheet_name, spreadsheet_id=spreadsheet_id)
175+
logger.info(f'Retrying delete_rows after cache eviction for sheet: {sheet_name}')
176+
continue
177+
logger.error(f'Failed to delete rows in sheet {sheet_name}: {error}')
178+
raise
147179

148180
@logger_time_stamp
149181
def paste_csv_to_gsheet(self, csv_path, spreadsheet_id: str, sheet_name: str):
@@ -160,25 +192,35 @@ def paste_csv_to_gsheet(self, csv_path, spreadsheet_id: str, sheet_name: str):
160192
with open(csv_path, 'r') as csv_file:
161193
csv_contents = csv_file.read()
162194
if csv_contents:
163-
sheet_id = self.find_sheet_id_by_name(sheet_name=sheet_name, spreadsheet_id=spreadsheet_id)
164-
body = {
165-
'requests': [{
166-
'pasteData': {
167-
"coordinate": {
168-
"sheetId": sheet_id,
169-
"rowIndex": "0",
170-
"columnIndex": "0",
171-
},
172-
"data": csv_contents,
173-
"type": 'PASTE_NORMAL',
174-
"delimiter": ',',
195+
for attempt in range(2):
196+
try:
197+
use_cache = attempt == 0
198+
sheet_id = self.find_sheet_id_by_name(sheet_name=sheet_name, spreadsheet_id=spreadsheet_id,
199+
use_cache=use_cache)
200+
body = {
201+
'requests': [{
202+
'pasteData': {
203+
"coordinate": {
204+
"sheetId": sheet_id,
205+
"rowIndex": 0,
206+
"columnIndex": 0,
207+
},
208+
"data": csv_contents,
209+
"type": 'PASTE_NORMAL',
210+
"delimiter": ',',
211+
}
212+
}]
175213
}
176-
}]
177-
}
178-
request = self.__service.spreadsheets().batchUpdate(spreadsheetId=spreadsheet_id, body=body)
179-
response = request.execute()
180-
logger.info(f'Pasted data into the {sheet_name}')
181-
return response
214+
request = self.__service.spreadsheets().batchUpdate(spreadsheetId=spreadsheet_id, body=body)
215+
response = request.execute()
216+
logger.info(f'Pasted data into the {sheet_name}')
217+
return response
218+
except HttpError as error:
219+
if attempt == 0 and ('Unable to parse range' in str(error) or 'not found' in str(error).lower()):
220+
self._evict_sheet_cache(sheet_name=sheet_name, spreadsheet_id=spreadsheet_id)
221+
logger.info(f'Retrying paste_csv_to_gsheet after cache eviction for sheet: {sheet_name}')
222+
continue
223+
raise error
182224

183225
@logger_time_stamp
184226
def update_row_in_gsheet(self, data: list, gsheet_id: str, row: int, sheet_name: str):
@@ -187,19 +229,29 @@ def update_row_in_gsheet(self, data: list, gsheet_id: str, row: int, sheet_name:
187229
@return:
188230
"""
189231
if self.__service:
190-
sheet_id = self.find_sheet_id_by_name(sheet_name=sheet_name, spreadsheet_id=gsheet_id)
191-
requests_body = {'requests': [{
192-
'updateCells': {
193-
'rows': [{"values": data}],
194-
'fields': '*',
195-
'start': {
196-
"sheetId": sheet_id,
197-
"rowIndex": row,
198-
"columnIndex": '0'
199-
}
200-
}
201-
}]}
202-
request = self.__service.spreadsheets().batchUpdate(spreadsheetId=gsheet_id, body=requests_body)
203-
response = request.execute()
204-
logger.info(f'Updated the row in the worksheet {sheet_name}')
205-
return response
232+
for attempt in range(2):
233+
try:
234+
use_cache = attempt == 0
235+
sheet_id = self.find_sheet_id_by_name(sheet_name=sheet_name, spreadsheet_id=gsheet_id,
236+
use_cache=use_cache)
237+
requests_body = {'requests': [{
238+
'updateCells': {
239+
'rows': [{"values": data}],
240+
'fields': '*',
241+
'start': {
242+
"sheetId": sheet_id,
243+
"rowIndex": row,
244+
"columnIndex": 0
245+
}
246+
}
247+
}]}
248+
request = self.__service.spreadsheets().batchUpdate(spreadsheetId=gsheet_id, body=requests_body)
249+
response = request.execute()
250+
logger.info(f'Updated the row in the worksheet {sheet_name}')
251+
return response
252+
except HttpError as error:
253+
if attempt == 0 and ('Unable to parse range' in str(error) or 'not found' in str(error).lower()):
254+
self._evict_sheet_cache(sheet_name=sheet_name, spreadsheet_id=gsheet_id)
255+
logger.info(f'Retrying update_row_in_gsheet after cache eviction for sheet: {sheet_name}')
256+
continue
257+
raise error

0 commit comments

Comments
 (0)