diff --git a/docs/docs_ru/tutorials/google_spreadsheets_agent/google_sheets.ipynb b/docs/docs_ru/tutorials/google_spreadsheets_agent/google_sheets.ipynb new file mode 100644 index 0000000000..4bbe100134 --- /dev/null +++ b/docs/docs_ru/tutorials/google_spreadsheets_agent/google_sheets.ipynb @@ -0,0 +1,1833 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import gspread \n", + "import openai\n", + "import operator\n", + "import requests\n", + "import json\n", + "import numpy as np\n", + "\n", + "import os.path\n", + "import string\n", + "\n", + "from google.auth.transport.requests import Request\n", + "from google.oauth2.credentials import Credentials\n", + "from google_auth_oauthlib.flow import InstalledAppFlow\n", + "from googleapiclient.discovery import build\n", + "from googleapiclient.errors import HttpError\n", + "\n", + "from pydantic import BaseModel, Field, Extra\n", + "from typing import Type, TypedDict, Annotated, List\n", + "\n", + "from langchain.schema import HumanMessage, SystemMessage\n", + "from langchain_openai import ChatOpenAI\n", + "from langchain.tools import BaseTool\n", + "\n", + "from langgraph.graph import StateGraph, END, START\n", + "from langgraph.checkpoint.memory import MemorySaver\n", + "from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, AIMessage, ChatMessage, ToolMessage\n", + "from langchain_google_genai import ChatGoogleGenerativeAI" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# API\n", + "\n", + "В этом разделе расмещены все функции и классы для операций с Google Spreadsheet листами через API\n", + "\n", + "Подробнее в [документации](https://developers.google.com/sheets/api/guides/concepts)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def column_to_index(column):\n", + " index = 0\n", + " for i, char in enumerate(reversed(column)):\n", + " index += (string.ascii_uppercase.index(char) + 1) * (26 ** i)\n", + " return index - 1\n", + "\n", + "def index_to_column(index):\n", + " column = \"\"\n", + " while index >= 0:\n", + " index, remainder = divmod(index, 26)\n", + " column = string.ascii_uppercase[remainder] + column\n", + " index -= 1\n", + " return column\n", + "\n", + "def parse_range(sheetId, cell_range):\n", + " start_cell, end_cell = cell_range.split(':')\n", + " \n", + " start_col = ''.join(filter(str.isalpha, start_cell))\n", + " start_row = ''.join(filter(str.isdigit, start_cell))\n", + " \n", + " end_col = ''.join(filter(str.isalpha, end_cell))\n", + " end_row = ''.join(filter(str.isdigit, end_cell))\n", + " \n", + " return {\n", + " \"sheetId\": sheetId,\n", + " \"startRowIndex\": int(start_row) - 1,\n", + " \"endRowIndex\": int(end_row),\n", + " \"startColumnIndex\": column_to_index(start_col),\n", + " \"endColumnIndex\": column_to_index(end_col) + 1\n", + " }\n", + " \n", + " \n", + "class GooogleSheetsApi:\n", + " def __init__(self, credentials, user_data):\n", + " SCOPES = [\"https://www.googleapis.com/auth/spreadsheets\", \"https://www.googleapis.com/auth/drive.file\"]\n", + " self.creds = None\n", + " if os.path.exists(user_data):\n", + " self.creds = Credentials.from_authorized_user_file(user_data, SCOPES)\n", + " if not self.creds or not self.creds.valid:\n", + " if self.creds and self.creds.expired and self.creds.refresh_token:\n", + " self.creds.refresh(Request())\n", + " else:\n", + " flow = InstalledAppFlow.from_client_secrets_file(\n", + " credentials, SCOPES\n", + " )\n", + " self.creds = flow.run_local_server(port=0)\n", + " with open(user_data, \"w\") as token:\n", + " token.write(self.creds.to_json())\n", + " \n", + " \n", + " def set_sheet_id(self, spreadsheet_id):\n", + " self.spreadsheet_id = spreadsheet_id\n", + " \n", + " \n", + " def create_spreadsheet(self, title):\n", + " service = build(\"sheets\", \"v4\", credentials=self.creds)\n", + " spreadsheet = {\"properties\": {\"title\": title}}\n", + " spreadsheet = (\n", + " service.spreadsheets()\n", + " .create(body=spreadsheet, fields=\"spreadsheetId\")\n", + " .execute()\n", + " )\n", + " return spreadsheet.get(\"spreadsheetId\")\n", + " \n", + " \n", + " def read_values(self, range_name='A1:AZ100000'):\n", + " service = build(\"sheets\", \"v4\", credentials=self.creds)\n", + " result = (\n", + " service.spreadsheets()\n", + " .values()\n", + " .get(spreadsheetId=self.spreadsheet_id, range=range_name)\n", + " .execute()\n", + " )\n", + " rows = result.get(\"values\", [])\n", + " return rows\n", + "\n", + "\n", + " def write_values(self, range_name, values):\n", + " service = build(\"sheets\", \"v4\", credentials=self.creds)\n", + " body = {\"values\": values}\n", + " body = (\n", + " service.spreadsheets()\n", + " .values()\n", + " .update(\n", + " spreadsheetId=self.spreadsheet_id, \n", + " range=range_name, \n", + " valueInputOption=\"USER_ENTERED\",\n", + " body=body)\n", + " .execute()\n", + " )\n", + " return body\n", + "\n", + "\n", + " def autofill(self, range_name):\n", + " service = build(\"sheets\", \"v4\", credentials=self.creds)\n", + " requests = []\n", + " requests.append(\n", + " {\n", + " \"autoFill\": {\n", + " \"useAlternateSeries\": False,\n", + " \"range\": parse_range(0, range_name)\n", + " }\n", + " }\n", + " )\n", + " body = {\"requests\": requests}\n", + " response = (\n", + " service.spreadsheets()\n", + " .batchUpdate(spreadsheetId=self.spreadsheet_id, body=body)\n", + " .execute()\n", + " )\n", + " return response\n", + " \n", + " \n", + " def repeat_formula(self, formula, range_name):\n", + " service = build(\"sheets\", \"v4\", credentials=self.creds)\n", + " requests = []\n", + " requests.append(\n", + " {\n", + " \"repeatCell\": {\n", + " \"range\": parse_range(0, range_name),\n", + " \"cell\": {\n", + " \"userEnteredValue\": {\n", + " \"formulaValue\": formula\n", + " }\n", + " },\n", + " \"fields\": \"userEnteredValue\"\n", + " }\n", + " }\n", + " )\n", + " body = {\"requests\": requests}\n", + " response = (\n", + " service.spreadsheets()\n", + " .batchUpdate(spreadsheetId=self.spreadsheet_id, body=body)\n", + " .execute()\n", + " )\n", + " return response\n", + " \n", + " \n", + " def create_pivot_table(self, source_range, target_cell, rows=[], columns=[], values=[]):\n", + " service = build(\"sheets\", \"v4\", credentials=self.creds)\n", + " requests = []\n", + " requests.append(\n", + " {\n", + " \"updateCells\": {\n", + " \"rows\": {\n", + " \"values\": [\n", + " {\n", + " \"pivotTable\": {\n", + " \"source\": parse_range(0, source_range),\n", + " \"rows\": [\n", + " {\n", + " \"sourceColumnOffset\": row,\n", + " \"sortOrder\": \"ASCENDING\",\n", + " \"showTotals\": False,\n", + " } for row in rows\n", + " ],\n", + " \"columns\": [\n", + " {\n", + " \"sourceColumnOffset\": col,\n", + " \"sortOrder\": \"ASCENDING\",\n", + " \"showTotals\": False,\n", + " } for col in columns\n", + " ],\n", + " \"values\": [\n", + " {\n", + " \"summarizeFunction\": func,\n", + " \"sourceColumnOffset\": val,\n", + " } for val, func in values\n", + " ],\n", + " \"valueLayout\": \"HORIZONTAL\",\n", + " }\n", + " }\n", + " ]\n", + " },\n", + " \"start\": {\n", + " \"sheetId\": 0,\n", + " \"rowIndex\": int(''.join(filter(str.isdigit, target_cell))),\n", + " \"columnIndex\": column_to_index(''.join(filter(str.isalpha, target_cell))),\n", + " },\n", + " \"fields\": \"pivotTable\",\n", + " }\n", + " }\n", + " )\n", + "\n", + " body = {\"requests\": requests}\n", + " response = (\n", + " service.spreadsheets()\n", + " .batchUpdate(spreadsheetId=self.spreadsheet_id, body=body)\n", + " .execute()\n", + " )\n", + " return response\n", + "\n", + "\n", + " def create_chart(self, chart_type, target_cell, domain_range, chart_series, title=\"\", botton_axis_title=\"\", left_axis_title=\"\"):\n", + " service = build(\"sheets\", \"v4\", credentials=self.creds)\n", + " requests = []\n", + " requests.append(\n", + " {\n", + " \"addChart\": {\n", + " \"chart\": {\n", + " \"spec\": {\n", + " \"title\": title,\n", + " \"basicChart\": {\n", + " \"chartType\": chart_type, # LINE, COLUMN, AREA\n", + " \"legendPosition\": \"BOTTOM_LEGEND\",\n", + " \"axis\": [\n", + " {\n", + " \"position\": \"BOTTOM_AXIS\",\n", + " \"title\": botton_axis_title\n", + " },\n", + " {\n", + " \"position\": \"LEFT_AXIS\",\n", + " \"title\": left_axis_title\n", + " }\n", + " ],\n", + " \"domains\": [\n", + " {\n", + " \"domain\": {\n", + " \"sourceRange\": {\n", + " \"sources\": parse_range(0, domain_range)\n", + " }\n", + " }\n", + " }\n", + " ],\n", + " \"series\": [\n", + " {\n", + " \"series\": {\n", + " \"sourceRange\": {\n", + " \"sources\": [parse_range(0, serie)]\n", + " }\n", + " },\n", + " \"targetAxis\": \"LEFT_AXIS\"\n", + " } for serie in chart_series \n", + " ],\n", + " }\n", + " },\n", + " \"position\": {\n", + " \"overlayPosition\": {\n", + " \"anchorCell\": {\n", + " \"sheetId\": 0,\n", + " \"rowIndex\": int(''.join(filter(str.isdigit, target_cell)))-1,\n", + " \"columnIndex\": column_to_index(''.join(filter(str.isalpha, target_cell))),\n", + " }\n", + " }\n", + " }\n", + " }\n", + " }\n", + " }\n", + " )\n", + " body = {\"requests\": requests}\n", + " response = (\n", + " service.spreadsheets()\n", + " .batchUpdate(spreadsheetId=self.spreadsheet_id, body=body)\n", + " .execute()\n", + " )\n", + " return response\n", + " \n", + " \n", + " def get_metadata(self):\n", + " service = build(\"sheets\", \"v4\", credentials=self.creds)\n", + " response = (\n", + " service.spreadsheets()\n", + " .get(spreadsheetId=self.spreadsheet_id)\n", + " .execute()\n", + " )\n", + " return response\n", + "\n", + "\n", + " def get_errors(self):\n", + " res = self.read_values(\"A1:V50\")\n", + " return [(index_to_column(col) + str(row+1), el) for row, r in enumerate(res) for col, el in enumerate(r) if len(el) > 0 and el[0] == '#']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Tools\n", + "\n", + "Определения функций (tools), которыми будет пользоваться агент\n", + "\n", + "Tools основаны на абстракции **BaseTool** из gigachain" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# ------------------------------------------------- Auxiliary functions ----------------------------------------------------\n", + "def matrix_to_markdown(header, matrix):\n", + " result = \"| \" + \" | \".join(header) + \" |\\n\"\n", + " result += \"| \" + \" | \".join([\"---\"] * len(header)) + \" |\\n\"\n", + " for row in matrix:\n", + " result += \"| \" + \" | \".join(map(str, row)) + \" |\\n\"\n", + " return result\n", + "\n", + "def column_to_index(column):\n", + " index = 0\n", + " for i, char in enumerate(reversed(column)):\n", + " index += (string.ascii_uppercase.index(char) + 1) * (26 ** i)\n", + " return index - 1\n", + "\n", + "def index_to_column(index):\n", + " column = \"\"\n", + " while index >= 0:\n", + " index, remainder = divmod(index, 26)\n", + " column = string.ascii_uppercase[remainder] + column\n", + " index -= 1\n", + " return column\n", + "\n", + "def markdown_to_matrix(markdown):\n", + " for _ in range(4):\n", + " markdown = markdown.replace('\\\\n', '\\n')\n", + " rows = [row for row in markdown.split('\\n') if row != '']\n", + " headers = [title.strip() for title in rows[0].split('|') if title != '']\n", + " delims = ''.join([title.strip() for title in rows[1].split('|') if title != ''])\n", + " matrix = [headers]\n", + " for row in rows[2:]:\n", + " matrix.append([item.strip() for item in row.split('|') if item != ''])\n", + " return matrix\n", + "\n", + "def longest_leftmost_non_empty_subarray(lst):\n", + " i = 0\n", + " while i < len(lst) and lst[i] == '':\n", + " i += 1\n", + " result, indicies = [], []\n", + " while i < len(lst) and lst[i] != '':\n", + " result.append(lst[i])\n", + " indicies.append(i)\n", + " i += 1\n", + " return result, indicies\n", + "\n", + "def get_data_outline(gapi, max_rows=20):\n", + " max_rows = 20\n", + " values_list = gapi.read_values()\n", + " if len(values_list) == 0:\n", + " return \"No data on the sheet\", \"No data on the sheet\"\n", + " \n", + " first_row = 0\n", + " while len(values_list[first_row]) == 0:\n", + " first_row += 1\n", + " values_list = values_list[first_row:]\n", + " first_row += 1\n", + " \n", + " res, ind = longest_leftmost_non_empty_subarray(values_list[0])\n", + " table = np.array(values_list).T[ind].T\n", + " if table.shape[0] > max_rows+1:\n", + " header = table[0].reshape(1, -1)\n", + " table = table[1:]\n", + " selected_indices = np.random.choice(table.shape[0], max_rows, replace=False)\n", + " table = table[selected_indices, :]\n", + " table = np.concatenate([header, table])\n", + " table = table.tolist()\n", + " return matrix_to_markdown(table[0], table[1:]), f\"{chr(ord('A') + ind[0])}{first_row}:{chr(ord('A') + ind[-1])}{len(values_list)+first_row-1}\"\n", + "\n", + "\n", + "def describe_pivot_in_cell(api, cell):\n", + " res = api.get_metadata()['sheets'][0]['properties']['gridProperties']\n", + " max_row = res['rowCount']\n", + " max_col = index_to_column(res['columnCount'] - 1)\n", + " data = api.read_values(f\"{cell}:{max_col}{max_row}\")\n", + " row_number = len(data)\n", + " col_number = max(len(row) for row in data)\n", + " start_col = ''.join(filter(str.isalpha, cell))\n", + " start_row = int(''.join(filter(str.isdigit, cell))) + 1\n", + " end_col = index_to_column(column_to_index(start_col) + col_number - 1)\n", + " end_row = start_row + row_number - 2\n", + " return \\\n", + " f\"\"\"\n", + " Pivot table was written to cell {cell}\n", + " Pivot table main data is located in range {start_col}{start_row}:{end_col}{end_row}\n", + " Pivot table has {row_number-2} rows values and {col_number-1} columns values\n", + " \"\"\"\n", + "\n", + "# ------------------------------------------------- Descriptions -----------------------------------------------------------\n", + " \n", + "class write_table_description(BaseModel):\n", + " range_name: str = Field(description=\"The name of the range on this sheet in A1 notation.\")\n", + " markdown_table: str = Field(description=\"Markdown table that will be written into the range.\")\n", + " \n", + "class write_value_description(BaseModel):\n", + " cell_id: str = Field(description=\"ID of the cell in A1 notation.\")\n", + " value: str = Field(description=\"Value or formula that will be written into the cell. Formula should start with symbol '='.\")\n", + " \n", + "class ask_user_general_question_description(BaseModel):\n", + " question: str = Field(description=\"General question to the user. Should be a single sentence. Answer will be a text string.\")\n", + " \n", + "class ask_user_alternative_question_description(BaseModel):\n", + " question: str = Field(description=\"Alternative question to the user. Should be a single sentence.\")\n", + " options: list = Field(description=\"Options for user to answer this question, should be a list. The number of answer options should not exceed 5.\")\n", + " \n", + "class create_pivot_table_description(BaseModel): \n", + " source_range: str = Field(description=\"The name of the source range of pivot table in A1 notation. Defines where data will be read from.\")\n", + " target_cell: str = Field(description=\"ID of the target cell in A1 notation. Pivot table will be written into this cell.\")\n", + " rows: list = Field(description=\\\n", + " \"\"\"\n", + " Column indices in the source range that will be rows in the pivot table. Numbering starts from 0. Should be list of non-negative integers.\n", + " For example: [0, 2] means that columns with indices 1 and 3 will be rows in the pivot table.\n", + " List can be empty, then there will be no rows in pivot table.\n", + " \"\"\"\n", + " )\n", + " columns: list = Field(description=\\\n", + " \"\"\"\n", + " Column indices in the source range that will be columns in the pivot table. Numbering starts from 0. Should be list of non-negative integers.\n", + " For example: [1, 3] means that columns with indices 1 and 3 will be columns in the pivot table.\n", + " List can be empty, then there will be no columns in pivot table.\n", + " \"\"\"\n", + " )\n", + " values: list = Field(description=\\\n", + " \"\"\"\n", + " Column indices in the source range that will be data values the pivot table. Numbering starts from 0. \n", + " Should be list of tuples: the first tuple element is column index, the second tuple element is aggregation function.\n", + " Aggregation function can be one of these: \"SUM\", \"COUNTA\", \"AVERAGE\", \"MAX\", \"MIN\"\n", + " For example: [(4, \"SUM\")] means that columns with index 4 will summed up in the pivot table.\n", + " List can be empty, then no values will be written into the pivot table.\n", + " \"\"\"\n", + " )\n", + "\n", + "class draw_chart_description(BaseModel):\n", + " chart_type: str = Field(description=\"Type of the chart, can be on of the following: LINE, COLUMN, AREA\")\n", + " target_cell: str = Field(description=\"ID of the target cell in A1 notation. Chart will be written into this cell.\")\n", + " domain_range: str = Field(description=\"Range name of chart domain in A1 notation (like K9:M123). This domain will be the X axis.\")\n", + " chart_series: list = Field(description=\\\n", + " \"\"\"\n", + " List of range names in A1 notation. Each element of this list defines range name of data for chart series.\n", + " For example: [\"I5:I19\", \"J5:J19\"] means that there will be two series on the chart: with data from range \"I5:I19\" and \"J5:J19\".\n", + " \"\"\"\n", + " )\n", + " title: str = Field(description=\"Title of the chart\")\n", + " x_title: str = Field(description=\"Title of the X axis\")\n", + " y_title: str = Field(description=\"Title of the Y axis (or series)\")\n", + " \n", + "class repeat_formula_description(BaseModel):\n", + " formula: str = Field(description=\"Formula to be repeated in range. Should start with '='\")\n", + " range_name: str = Field(description=\"The name of the range, where formula will be repeated.\")\n", + "\n", + " \n", + "class autofill_constant_description(BaseModel):\n", + " constant: str = Field(description=\"Constant value to be written in a range (not only one cell).\")\n", + " range_name: str = Field(description=\"The name of the range to be autofilled with constant value.\")\n", + " \n", + " \n", + "class autofill_delta_description(BaseModel):\n", + " first_value: str = Field(description=\"First value to be written in a range.\")\n", + " second_value: str = Field(description=\"Sencond value to be written in a range.\")\n", + " range_name: str = Field(description=\"The name of the range to be autofilled with constant difference between values.\")\n", + " \n", + "class get_stock_data_description(BaseModel):\n", + " stock_id: str = Field(description=\"Identifier of a stock.\")\n", + " start_dt: str = Field(description=\"Start date in form yyyy-mm-dd\")\n", + " end_dt: str = Field(description=\"End date in form yyyy-mm-dd\")\n", + " cell_id: str = Field(description=\"ID of the target cell in A1 notation.\")\n", + "\n", + "# ------------------------------------------------- Tools ----------------------------------------------------------- \n", + "\n", + "class write_table_tool(BaseTool, extra='allow'):\n", + " name = \"write_table\"\n", + " description = \\\n", + " \"\"\"\n", + " Call this function with a cell ID and a markdown table to write this table into the specified cell.\n", + " Markdown table must have headers. Pass it into argument as a string.\n", + " Formulas should begin with \"=\".\n", + " \"\"\"\n", + " args_schema: Type[BaseModel] = write_table_description\n", + "\n", + " def __init__(self, gapi, **data):\n", + " super().__init__(**data)\n", + " self.gapi = gapi\n", + "\n", + " def _run(self, range_name, markdown_table):\n", + " matrix = markdown_to_matrix(markdown_table)\n", + " self.gapi.write_values(range_name, matrix)\n", + " return f\"The data from the table {markdown_table} has been written into the cell {range_name}\"\n", + " \n", + " \n", + "class write_value_tool(BaseTool, extra='allow'):\n", + " name = \"write_value\"\n", + " description = \\\n", + " \"\"\"\n", + " Call this function with a cell ID and a value of formula to write this value or formula into only one specified cell.\n", + " Only use formulas you have an access to. If formula has arguments, they should be separated by commas.\n", + " Example of a formula: \"=SUM(A1:A5)\"\n", + " \"\"\"\n", + " args_schema: Type[BaseModel] = write_value_description\n", + "\n", + " def __init__(self, gapi, **data):\n", + " super().__init__(**data)\n", + " self.gapi = gapi\n", + "\n", + " def _run(self, cell_id, value):\n", + " self.gapi.write_values(cell_id, [[str(value).replace(\"'\", \"\\\"\")]])\n", + " return f\"Value {value} has been written into the cell {cell_id}\"\n", + "\n", + " \n", + " \n", + "class ask_user_general_question_tool(BaseTool):\n", + " name = \"ask_user_general_question\"\n", + " description = \\\n", + " \"\"\"\n", + " Call this function with a general question to the user.\n", + " Do this if you need extra information from user.\n", + " \"\"\"\n", + " args_schema: Type[BaseModel] = ask_user_general_question_description\n", + "\n", + " def _run(self, question):\n", + " print(question)\n", + " answer = input()\n", + " return answer\n", + " \n", + " \n", + "class ask_user_alternative_question_tool(BaseTool):\n", + " name = \"ask_user_alternative_question\"\n", + " description = \\\n", + " \"\"\"\n", + " Call this function with a alternative question to the user.\n", + " Provide 5 or less option for user to chose from.\n", + " \"\"\"\n", + " args_schema: Type[BaseModel] = ask_user_alternative_question_description\n", + "\n", + " def _run(self, question, options):\n", + " print(question)\n", + " print(\"Please chose one of the following options: (enter number)\")\n", + " print(\"\\n\".join([f\"{num}) {option}\" for num, option in enumerate(options)]))\n", + " answer = input()\n", + " return options[answer]\n", + " \n", + " \n", + "class create_pivot_table_tool(BaseTool, extra='allow'):\n", + " name = \"create_pivot_table\"\n", + " description = \\\n", + " \"\"\"\n", + " Call this function to create a pivot table with data on the sheet.\n", + " \"\"\"\n", + " args_schema: Type[BaseModel] = create_pivot_table_description\n", + " \n", + " def __init__(self, gapi, **data):\n", + " super().__init__(**data)\n", + " self.gapi = gapi\n", + "\n", + " def _run(self, source_range, target_cell, rows, columns, values):\n", + " self.gapi.create_pivot_table(source_range, target_cell, rows=rows, columns=columns, values=values)\n", + " decs = describe_pivot_in_cell(self.gapi, target_cell)\n", + " return f\"Created pivot table from data in {source_range} with rows {rows}, columns {columns} and values {values}\\n\" + decs\n", + " \n", + " \n", + "class draw_chart_tool(BaseTool, extra='allow'):\n", + " name = \"draw_chart\"\n", + " description = \\\n", + " \"\"\"\n", + " Call this function to create a line, column or area chart with data on the sheet.\n", + " \"\"\"\n", + " args_schema: Type[BaseModel] = draw_chart_description\n", + " \n", + " def __init__(self, gapi, **data):\n", + " super().__init__(**data)\n", + " self.gapi = gapi\n", + "\n", + " def _run(self, chart_type, target_cell, domain_range, chart_series, title, x_title, y_title):\n", + " self.gapi.create_chart(chart_type, target_cell, domain_range, chart_series, title=title, botton_axis_title=x_title, left_axis_title=y_title)\n", + " return f\"Created {chart_type} chart from data in {chart_series} with domain in {domain_range}\"\n", + " \n", + " \n", + "class repeat_formula_tool(BaseTool, extra='allow'):\n", + " name = \"repeat_formula\"\n", + " description = \\\n", + " \"\"\"\n", + " Call this function to repeat formula in specified range.\n", + " The formula's range automatically increments for each row and column in the range, starting with the upper left cell. \n", + " For example, if cell B1 has the formula =FLOOR(A1*PI()), while cell D6 has the formula =FLOOR(C6*PI()).\n", + " \"\"\"\n", + " args_schema: Type[BaseModel] = repeat_formula_description\n", + " \n", + " def __init__(self, gapi, **data):\n", + " super().__init__(**data)\n", + " self.gapi = gapi\n", + "\n", + " def _run(self, formula, range_name):\n", + " self.gapi.repeat_formula(formula, range_name)\n", + " return f\"Repeated formula {formula} in range {range_name}\"\n", + " \n", + " \n", + "class autofill_constant_tool(BaseTool, extra='allow'):\n", + " name = \"autofill_constant\"\n", + " description = \\\n", + " \"\"\"\n", + " Call this function to autofill constant value in specified range.\n", + " For example, if you need to write value \"678\" into all cells of range \"G3:G8\", call this function with constant \"678\" and range_name \"G3:G8\".\n", + " \"\"\"\n", + " args_schema: Type[BaseModel] = autofill_constant_description\n", + " \n", + " def __init__(self, gapi, **data):\n", + " super().__init__(**data)\n", + " self.gapi = gapi\n", + "\n", + " def _run(self, constant, range_name):\n", + " self.gapi.write_values(range_name.split(':')[0], [[constant]])\n", + " self.gapi.autofill(range_name)\n", + " return f\"Constant value {constant} was written in every cell of range {range_name}\"\n", + " \n", + " \n", + "class autofill_delta_tool(BaseTool, extra='allow'):\n", + " name = \"autofill_delta\"\n", + " description = \\\n", + " \"\"\"\n", + " Call this function to autofill values in specified range with a constant difference.\n", + " For example, if you need to write values \"1 2 3 4 5 6\" into range \"G3:G8\", call this function with \n", + " - fisrt_value: \"1\"\n", + " - second_value: \"2\"\n", + " - range_name \"G3:G8\".\n", + " This will work because all elements in \"1 2 3 4 5 6\" have constant difference.\n", + " \"\"\"\n", + " args_schema: Type[BaseModel] = autofill_delta_description\n", + " \n", + " def __init__(self, gapi, **data):\n", + " super().__init__(**data)\n", + " self.gapi = gapi\n", + "\n", + " def _run(self, first_value, second_value, range_name):\n", + " first_cell = range_name.split(':')[0]\n", + " start_col = ''.join(filter(str.isalpha, first_cell))\n", + " start_row = ''.join(filter(str.isdigit, first_cell))\n", + " start_row = str(int(start_row)+1)\n", + " second_cell = start_col + start_row\n", + " self.gapi.write_values(first_cell, [[first_value]])\n", + " self.gapi.write_values(second_cell, [[second_value]])\n", + " self.gapi.autofill(range_name)\n", + " return f\"Values were written into range {range_name}\"\n", + " \n", + "\n", + "class get_stock_data_tool(BaseTool, extra='allow'):\n", + " name = \"get_stock_data\"\n", + " description = \\\n", + " \"\"\"\n", + " Call this function to write stock data into specified cell.\n", + " You have access to the following stock identifiers (stock_id):\n", + " - \"SBER\" for SberBank or Sber\n", + " - \"YNDX\" for Yandex \n", + " - \"GAZP\" for Gazprom\n", + " Data will occupy two columns: one for dates, one for prices.\n", + " \"\"\"\n", + " args_schema: Type[BaseModel] = get_stock_data_description\n", + " \n", + " def __init__(self, gapi, **data):\n", + " super().__init__(**data)\n", + " self.gapi = gapi\n", + "\n", + " def _run(self, stock_id, start_dt, end_dt, cell_id):\n", + " res = requests.get(f'https://iss.moex.com/iss/engines/stock/markets/shares/securities/{stock_id}/candles.json?from={start_dt}&till={end_dt}&interval=24').json()\n", + " prices = [tmp[4]/tmp[5] for tmp in res['candles']['data']]\n", + " dates = [tmp[6][:10] for tmp in res['candles']['data']]\n", + " matrix = [[date, price] for date, price in zip(dates, prices)]\n", + " response = self.gapi.write_values(cell_id, matrix)\n", + " result_range = response['updatedRange'].split('!')[1]\n", + " return \\\n", + " f\"\"\"\n", + " Stock data has been written to range: {result_range}\n", + " First column contains dates, second column contains prices.\n", + " Table does not contain header.\n", + " \"\"\"\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Agent\n", + "\n", + "Основной код агента на gigagraph \n", + "\n", + "Подробно основные компоненты и логика работы рассмотрены [здесь](наша_статья)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class AgentState(TypedDict):\n", + " messages: Annotated[list[AnyMessage], operator.add]\n", + " \n", + " \n", + "class GoogleSheetsAgent:\n", + " def __init__(self, gapi, gsheet_tools, feedback_tools):\n", + " self.model = None\n", + " \n", + " builder = StateGraph(AgentState)\n", + " builder.add_node(\"Main annotator\", self.annotator_node)\n", + " builder.add_node(\"Annotation reflector\", self.annotator_reflection_node)\n", + " builder.add_node(\"Annotation caller\", self.call_node)\n", + " builder.add_node(\"Annotation tooler\", self.tool_node)\n", + " builder.add_node(\"Plan proxy\", self.plan_proxy)\n", + " builder.add_node(\"Usual planner\", self.plan_node)\n", + " builder.add_node(\"COT SC planner\", self.plan_node_cot)\n", + " builder.add_node(\"TOT planner\", self.plan_node_tot)\n", + " builder.add_node(\"TOT SC planner\", self.plan_node_tot_sc)\n", + " builder.add_node(\"Planner cells critic\", self.plan_critic_cells_node)\n", + " builder.add_node(\"Planner formulas critic\", self.plan_critic_formulas_node)\n", + " builder.add_node(\"Main executor\", self.executor_node)\n", + " builder.add_node(\"Main caller\", self.call_node)\n", + " builder.add_node(\"Main tooler\", self.tool_node)\n", + " builder.add_node(\"Error reflector\", self.error_corrector_node)\n", + " builder.add_node(\"Error caller\", self.call_node)\n", + " builder.add_node(\"Error tooler\", self.tool_node)\n", + " \n", + " builder.add_edge(\"Annotation reflector\", \"Annotation caller\")\n", + " builder.add_edge(\"Annotation tooler\", \"Annotation caller\")\n", + " builder.add_edge(\"Usual planner\", \"Planner cells critic\")\n", + " builder.add_edge(\"COT SC planner\", \"Planner cells critic\")\n", + " builder.add_edge(\"TOT planner\", \"Planner cells critic\")\n", + " builder.add_edge(\"TOT SC planner\", \"Planner cells critic\")\n", + " builder.add_edge(\"Main executor\", \"Main caller\")\n", + " builder.add_edge(\"Main tooler\", \"Main caller\")\n", + " builder.add_edge(\"Error tooler\", \"Error caller\")\n", + " \n", + " builder.add_conditional_edges(\n", + " \"Main caller\", \n", + " self.exists_action, \n", + " {True: \"Main tooler\", False: \"Error reflector\"}\n", + " )\n", + " builder.add_conditional_edges(\n", + " \"Annotation caller\", \n", + " self.exists_action, \n", + " {True: \"Annotation tooler\", False: \"Plan proxy\"}\n", + " )\n", + " builder.add_conditional_edges(\n", + " START, \n", + " self.outline_node, \n", + " {True: \"Main annotator\", False: \"Plan proxy\"}\n", + " )\n", + " builder.add_conditional_edges(\n", + " \"Main annotator\", \n", + " self.data_is_relation, \n", + " {True: \"Annotation reflector\", False: \"Plan proxy\"}\n", + " )\n", + " builder.add_conditional_edges(\n", + " \"Planner cells critic\", \n", + " self.continue_plan_critic_cells, \n", + " {True: \"Planner cells critic\", False: \"Planner formulas critic\"}\n", + " )\n", + " builder.add_conditional_edges(\n", + " \"Planner formulas critic\", \n", + " self.continue_plan_critic_formulas, \n", + " {True: \"Planner formulas critic\", False: \"Main executor\"}\n", + " )\n", + " builder.add_conditional_edges(\n", + " \"Plan proxy\", \n", + " self.switch_planner, \n", + " {\n", + " \"Usual\": \"Usual planner\", \n", + " \"COT_SC\": \"COT SC planner\",\n", + " \"TOT\": \"TOT planner\",\n", + " \"TOT_SC\" : \"TOT SC planner\"\n", + " }\n", + " )\n", + " builder.add_conditional_edges(\n", + " \"Error reflector\", \n", + " self.exists_errors, \n", + " {True: \"Error caller\", False: END}\n", + " )\n", + " builder.add_conditional_edges(\n", + " \"Error caller\", \n", + " self.exists_action, \n", + " {True: \"Error tooler\", False: \"Error reflector\"}\n", + " )\n", + " \n", + " self.graph = builder.compile()\n", + " self.logs = \"\"\n", + " \n", + " self.gsheet_tools_list = gsheet_tools\n", + " self.gsheet_tools_dict = {t.name: t for t in gsheet_tools}\n", + " self.feedback_tools_list = feedback_tools\n", + " self.feedback_tools_dict = {t.name: t for t in feedback_tools}\n", + " \n", + " self.gapi = gapi\n", + " \n", + " \n", + " def get_logs(self):\n", + " return self.logs\n", + " \n", + " \n", + " def invoke(self, query, prompts, \n", + " model_name = 'gpt-4o',\n", + " temperature = 0.0,\n", + " planner_mode=\"Usual\", \n", + " plan_critic_formulas_max_tries=2, \n", + " plan_critic_cells_max_tries=2, \n", + " plan_sc_number=5, \n", + " tot_number=5,\n", + " tot_max_rounds = 20,\n", + " tot_mode=\"one_call\",\n", + " errors_correction_max_tries=2\n", + " ):\n", + " state = \\\n", + " {\n", + " \"messages\" : []\n", + " }\n", + " self.query = query\n", + " self.logs = \"\"\n", + " self.model = ChatOpenAI(model_name=model_name, temperature=temperature)\n", + " self.prompts = prompts\n", + " self.input_tokens = 0\n", + " self.output_tokens = 0\n", + " self.plan_critic_formulas_tries = plan_critic_formulas_max_tries\n", + " self.plan_critic_cells_tries = plan_critic_cells_max_tries\n", + " self.plan_sc_number = plan_sc_number\n", + " self.planner_mode = planner_mode\n", + " self.tot_number = tot_number\n", + " self.tot_max_rounds = tot_max_rounds\n", + " self.tot_mode = tot_mode\n", + " self.errors_correction_tries = errors_correction_max_tries+1\n", + " \n", + " result = self.graph.invoke(state)\n", + " result = result.get('messages')[-1].content\n", + " \n", + " logs = self.get_logs()\n", + " logs = f\"Model: {model_name}\\nTemperature: {temperature}\\nQuery: {query}\\n\\n\" + logs\n", + " return result, logs\n", + " \n", + " \n", + " def print_logs(self, log):\n", + " self.logs += str(log) + '\\n'\n", + " \n", + " def print_header(self, header):\n", + " self.logs += f\"\\n{header:-^80}\\n\"\n", + " \n", + " \n", + " def outline_node(self, state: AgentState):\n", + " self.print_logs(\"Data outlining: getting data...\")\n", + " sample, data_range = get_data_outline(self.gapi)\n", + " self.print_logs(\"Data outlining: data obtained\")\n", + " self.sample = sample\n", + " self.data_range = data_range\n", + " return self.sample != \"No data on the sheet\"\n", + " \n", + " \n", + " def annotator_node(self, state: AgentState):\n", + " messages = [\n", + " SystemMessage(content=self.prompts['annotation_prompt']), \n", + " HumanMessage(content=self.prompts['annotation_task'].format(query=self.query, sample=self.sample))\n", + " ]\n", + " response = self.model.invoke(messages)\n", + " \n", + " self.print_header(\"ANNOTATION PROMPT\") \n", + " for messge in messages:\n", + " self.print_logs(messge.content)\n", + " self.print_header(\"ANNOTATION RESPONSE\")\n", + " self.print_logs(response.content) \n", + " self.input_tokens += response.usage_metadata['input_tokens']\n", + " self.output_tokens += response.usage_metadata['output_tokens']\n", + " \n", + " return {\"messages\": [response]}\n", + " \n", + " \n", + " def annotator_reflection_node(self, state: AgentState):\n", + " outline = state['messages'][-1].content\n", + " messages = [\n", + " SystemMessage(content=self.prompts['annotation_reflector']), \n", + " HumanMessage(content=outline)\n", + " ]\n", + " self.print_header(\"ANNOTATION REFLECTOR PROMPT\")\n", + " for messge in messages:\n", + " self.print_logs(messge.content)\n", + " self.tools_list = self.feedback_tools_list\n", + " self.tools_dict = self.feedback_tools_dict\n", + " return {\"messages\": messages}\n", + " \n", + " def plan_proxy(self, state):\n", + " pass\n", + " \n", + " def plan_node(self, state: AgentState):\n", + " self.outline = state['messages'][-1].content if len(state['messages']) > 0 else \"No data on the sheet\"\n", + " messages = [\n", + " SystemMessage(content=self.prompts['planner_prompt']), \n", + " HumanMessage(content=self.prompts['planner_task'].format(query=self.query, outline=self.outline, cells=self.data_range))\n", + " ]\n", + " response = self.model.invoke(messages)\n", + " \n", + " self.print_header(\"PLAN PROMPT\")\n", + " for messge in messages:\n", + " self.print_logs(messge.content)\n", + " self.print_header(\"PLAN RESPONSE\")\n", + " self.print_logs(response.content) \n", + " self.input_tokens += response.usage_metadata['input_tokens']\n", + " self.output_tokens += response.usage_metadata['output_tokens']\n", + " json_res = json.loads(response.content.split(\"Result:\\n\")[-1].replace(\"```\", \"\").replace(\"json\", \"\"))\n", + " self.plan = \"\\n\".join([f\"{n+1}) {el}\" for n, el in enumerate(json_res.get('plan'))])\n", + " \n", + "\n", + " def plan_node_cot(self, state: AgentState):\n", + " self.outline = state['messages'][-1].content if len(state['messages']) > 0 else \"No data on the sheet\"\n", + " plans = []\n", + " messages = [\n", + " SystemMessage(content=self.prompts['planner_prompt']), \n", + " HumanMessage(content=self.prompts['planner_task'].format(query=self.query, outline=self.outline, cells=self.data_range))\n", + " ]\n", + " for i in range(self.plan_sc_number):\n", + " response = self.model.invoke(messages)\n", + " json_res = json.loads(response.content.split(\"Result:\\n\")[-1].replace(\"```\", \"\").replace(\"json\", \"\"))\n", + " plan = \"\\n\".join([f\"{n+1}) {el}\" for n, el in enumerate(json_res.get('plan'))])\n", + " plans.append(plan)\n", + " self.input_tokens += response.usage_metadata['input_tokens']\n", + " self.output_tokens += response.usage_metadata['output_tokens']\n", + " self.print_header(\"PLAN PROMPT\")\n", + " for messge in messages:\n", + " self.print_logs(messge.content)\n", + " plans_prompt = \"\\n\\n\".join([f\"Plan {n}:\\n{plan}\" for n, plan in enumerate(plans)])\n", + " messages = [\n", + " SystemMessage(content=self.prompts['planner_cot_voter_prompt']), \n", + " HumanMessage(content=plans_prompt)\n", + " ]\n", + " response = self.model.invoke(messages)\n", + " self.print_header(\"PLAN COT PROMPT\")\n", + " for messge in messages:\n", + " self.print_logs(messge.content)\n", + " self.print_header(\"PLAN COT RESPONSE\")\n", + " self.print_logs(response.content) \n", + " self.input_tokens += response.usage_metadata['input_tokens'] \n", + " self.output_tokens += response.usage_metadata['output_tokens']\n", + " json_res = json.loads(response.content.split(\"Result:\\n\")[-1].replace(\"```\", \"\").replace(\"json\", \"\"))\n", + " self.plan = \"\\n\".join([f\"{n+1}) {el}\" for n, el in enumerate(json_res.get('plan'))])\n", + " \n", + " \n", + " def plan_node_tot(self, state: AgentState):\n", + " self.outline = state['messages'][-1].content if len(state['messages']) > 0 else \"No data on the sheet\"\n", + " self.plan = self.generate_tot_plan(mode=self.tot_mode)\n", + " \n", + " \n", + " def plan_node_tot_sc(self, state: AgentState):\n", + " self.outline = state['messages'][-1].content if len(state['messages']) > 0 else \"No data on the sheet\"\n", + " plans = []\n", + " for i in range(self.plan_sc_number):\n", + " plans.append(self.generate_tot_plan(mode=self.tot_mode))\n", + " plans_prompt = \"\\n\\n\".join([f\"Plan {n}:\\n{plan}\" for n, plan in enumerate(plans)])\n", + " messages = [\n", + " SystemMessage(content=self.prompts['planner_cot_voter_prompt']), \n", + " HumanMessage(content=plans_prompt)\n", + " ]\n", + " response = self.model.invoke(messages)\n", + " self.print_header(\"PLAN COT PROMPT\")\n", + " for messge in messages:\n", + " self.print_logs(messge.content)\n", + " self.print_header(\"PLAN COT RESPONSE\")\n", + " self.print_logs(response.content) \n", + " self.input_tokens += response.usage_metadata['input_tokens'] \n", + " self.output_tokens += response.usage_metadata['output_tokens']\n", + " json_res = json.loads(response.content.split(\"Result:\\n\")[-1].replace(\"```\", \"\").replace(\"json\", \"\"))\n", + " self.plan = \"\\n\".join([f\"{n+1}) {el}\" for n, el in enumerate(json_res.get('plan'))])\n", + " \n", + " \n", + " def executor_node(self, state):\n", + " messages = [\n", + " SystemMessage(content=self.prompts['executor_prompt']), \n", + " HumanMessage(content=self.plan)\n", + " ]\n", + " state['messages'] = []\n", + " self.print_header(\"TASK PROMPT\")\n", + " for messge in messages:\n", + " self.print_logs(messge.content)\n", + " self.tools_list = self.gsheet_tools_list\n", + " self.tools_dict = self.gsheet_tools_dict\n", + " return {\"messages\": messages}\n", + " \n", + "\n", + " def plan_critic_cells_node(self, state: AgentState):\n", + " messages = [\n", + " SystemMessage(content=self.prompts['planner_critic_cells_prompt']), \n", + " HumanMessage(content=self.prompts['planner_critic_prompt'].format(\n", + " query=self.query, \n", + " outline=self.outline, \n", + " cells=self.data_range, \n", + " plan=self.plan\n", + " ))\n", + " ]\n", + " response = self.model.invoke(messages)\n", + " \n", + " self.print_header(\"PLAN CRITIC (CELLS) PROMPT\") \n", + " for messge in messages:\n", + " self.print_logs(messge.content)\n", + " self.print_header(\"PLAN CRITIC (CELLS) RESPONSE\")\n", + " self.print_logs(response.content) \n", + " self.input_tokens += response.usage_metadata['input_tokens']\n", + " self.output_tokens += response.usage_metadata['output_tokens']\n", + " \n", + " json_res = json.loads(response.content.split(\"Result:\\n\")[-1].replace(\"```\", \"\").replace(\"json\", \"\"))\n", + " \n", + " self.change_str = json_res.get(\"changed\") == \"True\"\n", + " self.plan_critic_cells_tries -= 1\n", + " if self.change_str:\n", + " self.plan = \"\\n\".join(json_res.get('plan'))\n", + " \n", + " \n", + " def plan_critic_formulas_node(self, state: AgentState):\n", + " messages = [\n", + " SystemMessage(content=self.prompts['planner_critic_formulas_prompt']), \n", + " HumanMessage(content=self.prompts['planner_critic_prompt'].format(\n", + " query=self.query, \n", + " outline=self.outline, \n", + " cells=self.data_range, \n", + " plan=self.plan\n", + " ))\n", + " ]\n", + " response = self.model.invoke(messages)\n", + " \n", + " self.print_header(\"PLAN CRITIC (FORMULAS) PROMPT\") \n", + " for messge in messages:\n", + " self.print_logs(messge.content)\n", + " self.print_header(\"PLAN CRITIC (FORMULAS) RESPONSE\")\n", + " self.print_logs(response.content) \n", + " self.input_tokens += response.usage_metadata['input_tokens']\n", + " self.output_tokens += response.usage_metadata['output_tokens']\n", + " \n", + " json_res = json.loads(response.content.split(\"Result:\\n\")[-1].replace(\"```\", \"\").replace(\"json\", \"\"))\n", + " \n", + " self.change_str = json_res.get(\"changed\") == \"True\"\n", + " self.plan_critic_formulas_tries -= 1\n", + " if self.change_str:\n", + " self.plan = \"\\n\".join(json_res.get('plan'))\n", + " \n", + " \n", + " def call_node(self, state: AgentState, config):\n", + " messages = state['messages']\n", + " model_with_tools = self.model.bind_tools(self.tools_list)\n", + " response = model_with_tools.invoke(messages)\n", + "\n", + " self.print_header(\"TASK AGENT RESPONSE\")\n", + " self.print_logs(\"Content: \" + response.content) \n", + " self.print_logs(\"Tool calling: \" + str(len(response.tool_calls) > 0))\n", + " self.input_tokens += response.usage_metadata['input_tokens']\n", + " self.output_tokens += response.usage_metadata['output_tokens']\n", + " \n", + " return {\"messages\": [response]}\n", + "\n", + "\n", + " def tool_node(self, state: AgentState, config):\n", + " tool_calls = state['messages'][-1].tool_calls\n", + " results = []\n", + "\n", + " self.print_header(\"TOOL CALLING\")\n", + " \n", + " for tool in tool_calls:\n", + " self.print_logs(f\"Tool: {tool['name']}\")\n", + " if not tool['name'] in self.tools_dict: \n", + " self.print_logs(\"\\nBad tool name!\")\n", + " result = \"Bad tool name, retry!\" \n", + " else:\n", + " self.print_logs('Args: ' + str(tool['args']))\n", + " result = self.tools_dict[tool['name']].invoke(tool['args'])\n", + " self.print_logs('Result: ' + str(result))\n", + " results.append(ToolMessage(tool_call_id=tool['id'], name=tool['name'], content=str(result)))\n", + " return {'messages': results}\n", + " \n", + " \n", + " def error_corrector_node(self, state):\n", + " self.errors = \"\\n\".join([str(a) for a in self.gapi.get_errors()])\n", + " messages = state['messages'] + [\n", + " SystemMessage(content=self.prompts['error_corrector_prompt']), \n", + " HumanMessage(content=self.prompts['error_corrector_task'].format(\n", + " errors=self.errors\n", + " ))\n", + " ]\n", + " state['messages'] = []\n", + " self.print_header(\"ERROR CORRECTION PROMPT\")\n", + " for messge in messages[-2:]:\n", + " self.print_logs(messge.content)\n", + " self.tools_list = self.gsheet_tools_list\n", + " self.tools_dict = self.gsheet_tools_dict\n", + " return {\"messages\": messages}\n", + "\n", + "\n", + " def exists_action(self, state: AgentState):\n", + " result = state['messages'][-1]\n", + " return len(result.tool_calls) > 0\n", + " \n", + " def exists_errors(self, state: AgentState):\n", + " self.errors_correction_tries -= 1\n", + " return len(self.gapi.get_errors()) > 0 and self.errors_correction_tries > 0\n", + " \n", + " def data_is_relation(self, state: AgentState):\n", + " result = state['messages'][-1]\n", + " return result.content.split('\\n')[0] == 'relational'\n", + " \n", + " def switch_planner(self, state):\n", + " return self.planner_mode\n", + " \n", + " def continue_plan_critic_cells(self, state: AgentState):\n", + " return self.change_str and self.plan_critic_cells_tries > 0\n", + "\n", + " def continue_plan_critic_formulas(self, state):\n", + " return self.change_str and self.plan_critic_formulas_tries > 0\n", + " \n", + " def tot_plan_generate_options(self, mode, current_plan_str):\n", + " match mode:\n", + " case 'one_call':\n", + " messages = [\n", + " SystemMessage(content=self.prompts['planner_tot_generator_prompt'].format(\n", + " tot_number=self.tot_number\n", + " )), \n", + " HumanMessage(content=self.prompts['planner_tot_generator_task'].format(\n", + " query=self.query, \n", + " outline=self.outline, \n", + " cells=self.data_range, \n", + " plan=current_plan_str\n", + " ))\n", + " ]\n", + " response = self.model.invoke(messages)\n", + " self.input_tokens += response.usage_metadata['input_tokens']\n", + " self.output_tokens += response.usage_metadata['output_tokens']\n", + " self.print_logs(response.content)\n", + " options = json.loads(response.content.split(\"Result:\\n\")[-1].replace(\"```\", \"\").replace(\"json\", \"\")).get('next_step_options')\n", + " case 'many_calls':\n", + " options = []\n", + " messages = [\n", + " SystemMessage(content=self.prompts['planner_tot_generator_prompt'].format(\n", + " tot_number=1\n", + " )), \n", + " HumanMessage(content=self.prompts['planner_tot_generator_task'].format(\n", + " query=self.query, \n", + " outline=self.outline, \n", + " cells=self.data_range, \n", + " plan=current_plan_str\n", + " ))\n", + " ]\n", + " for i in range(self.tot_number):\n", + " response = self.model.invoke(messages)\n", + " self.input_tokens += response.usage_metadata['input_tokens']\n", + " self.output_tokens += response.usage_metadata['output_tokens']\n", + " self.print_logs(response.content)\n", + " opt = json.loads(response.content.split(\"Result:\\n\")[-1].replace(\"```\", \"\").replace(\"json\", \"\")).get('next_step_options')[0]\n", + " options.append(opt)\n", + " return options\n", + "\n", + " \n", + " def generate_tot_plan(self, mode):\n", + " current_plan = []\n", + " current_plan_str = \"Plan is empty\"\n", + " terminate = False\n", + " rounds = 0\n", + " while not terminate and rounds < self.tot_max_rounds:\n", + " self.print_header(f\"TOT PLAN (STEP {rounds})\") \n", + " options = self.tot_plan_generate_options(mode, current_plan_str)\n", + " options_str = \"\\n\".join([f\"{n+1}) {el}\" for n, el in enumerate(options)])\n", + " messages = [\n", + " SystemMessage(content=self.prompts['planner_tot_discriminator_prompt']), \n", + " HumanMessage(content=self.prompts['planner_tot_discriminator_task'].format(\n", + " query=self.query, \n", + " outline=self.outline, \n", + " cells=self.data_range, \n", + " plan=current_plan_str,\n", + " options=options_str\n", + " ))\n", + " ]\n", + " response = self.model.invoke(messages)\n", + " self.print_logs(messages[1].content)\n", + " self.print_logs(response.content)\n", + " self.input_tokens += response.usage_metadata['input_tokens']\n", + " self.output_tokens += response.usage_metadata['output_tokens']\n", + " json_res = json.loads(response.content.split(\"Result:\\n\")[-1].replace(\"```\", \"\").replace(\"json\", \"\"))\n", + " next_step = options[int(json_res.get('next_step'))-1]\n", + " current_plan.append(next_step)\n", + " current_plan_str = \"\\n\".join([f\"{n+1}) {el}\" for n, el in enumerate(current_plan)])\n", + " terminate = json_res.get('terminate') == \"True\"\n", + " rounds += 1\n", + " return current_plan_str\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prompts\n", + "\n", + "Весь набор необходимых промптов для агента" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open('prompts.json', 'r') as f:\n", + " prompts = json.load(f)\n", + " \n", + "with open('formulas.txt', 'r') as f:\n", + " formulas = f.read()\n", + " \n", + "annotation_prompt = \\\n", + "\"\"\"\n", + "You are a professional data annotator\n", + "You are given a markdown table with columns names and sample data\n", + "Also you are given a user query\n", + "First, you should find out if this table is in a usual relation database form or in a form of dictionary\n", + "Example of a dictionary:\n", + "| cats | 5 |\n", + "| --- | --- |\n", + "| dogs | 4 |\n", + "| all | 9 |\n", + "\n", + "If this table is in a dictionary form, then you should return a json dictionary of this table, for example:\n", + "{\n", + " \"cats\" : 5,\n", + " \"dogs\" : 4,\n", + " \"all\" : 9,\n", + "}\n", + "\n", + "If table is in a relational database form, then you should do following steps:\n", + "Using user query and sample in markdown table describe all data columns in the following json form:\n", + "{\n", + " \"name\" : name of column,\n", + " \"description\" : brief description of this column, try to provide meaning of this column for the user\n", + " \"type\" : one of the following: number, date, text, empty\n", + "}.\n", + "If you don't understand meaning of the columns, write unknown in description.\n", + "\n", + "At the end give brief (1-2 sentences) annotation about entire table in the following json form:\n", + "\"table_description\" : description of this table\n", + "Make sure that this description useful for the user.\n", + "\n", + "Example:\n", + "{\n", + " \"columns\": [\n", + " {\n", + " \"name\": \"column1\",\n", + " \"description\": \"Number of cats\",\n", + " \"type\": \"number\"\n", + " },\n", + " {\n", + " \"name\": \"column2\",\n", + " \"description\": \"Number of dogs\",\n", + " \"type\": \"number\"\n", + " }\n", + " ],\n", + " \"table_description\": \"Table with information about cats and dogs\"\n", + "}\n", + "\n", + "Yout first word should be either \"dictionary\" (if table is in a dictionary form) or \"relational\" (if table is in a relational database form)\n", + "Then return only json dictionary (if table is in a dictionary form) or json description (if table is in a relational database form)\n", + "If sample is empty, them return empty json!\n", + "Make sure your output after first word can be read by json.loads!\n", + "\"\"\"\n", + "\n", + "annotation_reflector = \\\n", + "\"\"\"\n", + "You are a professional data annotator\n", + "You are given a prepared annotation of a table in the json form\n", + "Example\n", + "{\n", + " \"columns\": [\n", + " {\n", + " \"name\": \"column1\",\n", + " \"description\": \"Number of cats\",\n", + " \"type\": \"number\"\n", + " },\n", + " {\n", + " \"name\": \"column2\",\n", + " \"description\": \"Number of dogs\",\n", + " \"type\": \"number\"\n", + " }\n", + " ],\n", + " \"table_description\": \"Table with information about cats and dogs\"\n", + "}\n", + "\n", + "If you can't clearly understand meaning of the column or field with its description, ask the user for clarification\n", + "For example, you can ask \"What does the column mean?\"\n", + "Use this option if and only if you have no idea of the column meaning, do it only as a last resort!\n", + "If everything is clear, print final annotation\n", + "Make sure youe output can be read by json.loads!\n", + "\"\"\"\n", + "\n", + "planner_prompt = \\\n", + "\"\"\"\n", + "You are an expert data analyst who is tasked with writing a detailed plan for data analysis task in Gooogle Sheets\n", + "Your plan is step-by-step. It should be a plain list of actions.\n", + "\n", + "You can do these actions:\n", + "- Get stock market data for a specific company and time interval and write to specified cell (result table will have two columns!)\n", + "- Write a markdown table to specified cell (always use \"=\" in front of formulas of values)\n", + "- Write a formula or a value to one specified cell \n", + "- Create a pivot table from existing data\n", + "- Create a simple chart of one of the following types: line, column, area\n", + "- Repeat a formula to a range of cells with automatic increment of cell address\n", + "- Autofill blank range with a constant value\n", + "- Autofill blank range with a data series with constant difference (for example, 1 2 3 4 5 ...)\n", + "\n", + "For a pivot table provide, what data should be used and what columns of this data will go to rows, columns and values of pivot table\n", + "For a chart provide, what data should be used and what range will be domain (X axis) and what ranges will be data series \n", + "If you write a formula of a cell to markdown table always use symbol \"=\" in front of it!\n", + "\n", + "Do not give any examples, only plan for the task, be precise. Think step by step.\n", + "You can use only steps written above!\n", + "You will be given user query (task) and information about data in the sheet\n", + "Given information above, write a detailed plan for a following query\n", + "Fow a good plan I will pay 100000 dollars.\n", + "Do not give any examples!\n", + "\n", + "Your answer should be in the following form:\n", + "Thoughts: *your thoughts, think step by step*\n", + "Result:\n", + "{\n", + " \"plan\": result plan (should be a list of strings)\n", + "}\n", + "Make sure that output after \"Result\" can be parsed with json.loads()!\n", + "\"\"\"\n", + "\n", + "planner_cot_voter_prompt = \\\n", + "\"\"\"\n", + "You are an senior data analyst who is tasked with chosing a best plan for data analysis task in Gooogle Sheets\n", + "You will be given user query (task), information about data in the sheet and several plans for this query.\n", + "You should find the best plan and print it.\n", + "\n", + "Your answer should be in the following form:\n", + "Thoughts: *your thoughts, think step by step*\n", + "Result:\n", + "{\n", + " \"plan\": best plan (should be a list of strings)\n", + "}\n", + "Make sure that output after \"Result\" can be parsed with json.loads()!\n", + "\"\"\"\n", + "\n", + "planner_tot_generator_prompt = \\\n", + "\"\"\"\n", + "You are an expert data analyst who is tasked with writing a detailed plan for data analysis task in Gooogle Sheets\n", + "You can do these actions:\n", + "- Get stock market data for a specific company and time interval and write to specified cell (result table will have to columns!)\n", + "- Write a markdown table to specified cell (always use \"=\" in front of formulas of values)\n", + "- Write a formula or a value to one specified cell \n", + "- Create a pivot table from existing data\n", + "- Create a simple chart of one of the following types: line, column, area\n", + "- Repeat a formula to a range of cells with automatic increment of cell address\n", + "- Autofill blank range with a constant value\n", + "- Autofill blank range with a data series with constant difference (for example, 1 2 3 4 5 ...)\n", + "\n", + "For a pivot table provide, what data should be used and what columns of this data will go to rows, columns and values of pivot table\n", + "For a chart provide, what data should be used and what range will be domain (X axis) and what ranges will be data series \n", + "If you write a formula of a cell to markdown table always use symbol \"=\" in front of it!\n", + "\n", + "You will be given user query (task), information about data in the sheet and current plan\n", + "You should produce {tot_number} possible options for the next step of the plan.\n", + "Do not give any examples, only {tot_number} possible options for the next step for the task, be precise. Think step by step.\n", + "If you think, that plan is finished, print \"End\" as option.\n", + "You can use only steps written above!\n", + "For good options I will pay you 100000 dollars.\n", + "\n", + "Your answer should be in the following form:\n", + "Thoughts: *your thoughts, think step by step*\n", + "Result:\n", + "{{\n", + " \"next_step_options\": {tot_number} options for the next step of the plan (should be a list of strings)\n", + "}}\n", + "Make sure that output after \"Result\" can be parsed with json.loads()!\n", + "\"\"\"\n", + "\n", + "planner_tot_discriminator_prompt = \\\n", + "\"\"\"\n", + "You are an expert data analyst who is tasked with writing a detailed plan for data analysis task in Gooogle Sheets\n", + "You will be given user query (task), information about data in the sheet, current plan and options for the next step.\n", + "You should choose the best option for the next step of the plan.\n", + "Do not repeat plan steps and do not add redudant steps!\n", + "If this step should be terminating (to finish the plan), set \"terminate\" to \"True\".\n", + "Also set \"terminate\" to \"True\" if you chose \"End\" option.\n", + "For good choice I will pay you 100000 dollars.\n", + "\n", + "Your answer should be in the following form:\n", + "Thoughts: *your thoughts, think step by step*\n", + "Result:\n", + "{\n", + " \"next_step\": number of chosen option (should be a number),\n", + " \"terminate\": \"True\" or \"False\" (if plan should be finished with this step)\n", + "}\n", + "Make sure that output after \"Result\" can be parsed with json.loads()!\n", + "\"\"\"\n", + "\n", + "planner_critic_cells_prompt = \\\n", + "\"\"\"\n", + "You are a senior data analyst who is tasked with validating plan for data analysis task in Gooogle Sheets\n", + "You should check if all cells in a given plan are correct and change them if needed.\n", + "\n", + "You will be given user query (task), information about data in the sheet and a plan for this query\n", + "Ensure that these requirements are met:\n", + "- All formulas have correct adresses of data ranges in arguments\n", + "- Result data ranges on the sheets do not overlap\n", + "- All tables and formulas on the sheet are displayes in a human convinient way.\n", + "\n", + "You can only change cells addresses in formulas and tables, nothing more!\n", + "\n", + "Your answer should be in the following form:\n", + "Thoughts: *your thoughts, think step by step*\n", + "Result:\n", + "{\n", + " \"changed\" : \"True\" or \"False\",\n", + " \"changes_list\" : List of changes (should be list of strings),\n", + " \"plan\" : Modified or original plan (should be list of strings)\n", + "}\n", + "Make sure that output after \"Result\" can be parsed with json.loads()!\n", + "\"\"\"\n", + "\n", + "planner_critic_formulas_prompt = \\\n", + "f\"\"\"\n", + "You are a senior data analyst who is tasked with validating plan for data analysis task in Gooogle Sheets\n", + "Ypu should check if all formulas have correct names, correct number of argumnets and correct types of arguments.\n", + "\n", + "You will be given user query (task), information about data in the sheet and a plan for this query\n", + "This is the complete list of formulas you can use:\n", + "{formulas}\n", + "\n", + "Ensure that plan use only these formulas with correct names, correct number of arguments and correct types of arguments.\n", + "Think step by step. Fow a good solution I will pay 100000 dollars.\n", + "Carefully study list of formulas and check, that all formulas in plan are correct!\n", + "You can only change formulas names or arguments, nothing more!\n", + "\n", + "Your answer should be in the following form:\n", + "Thoughts: *your thoughts*\n", + "Result:\n", + "{{\n", + " \"changed\" : \"True\" or \"False\",\n", + " \"changes_list\" : List of changes (empty if nothing changed),\n", + " \"plan\" : Modified plan (or original plan if nothing changed)\n", + "}}\n", + "Make sure that output after \"Result\" can be parsed with json.loads()!\n", + "\"\"\"\n", + "\n", + "error_corrector_prompt = \\\n", + "\"\"\"\n", + "You are a senior data analyst who is tasked with correction errors in Gooogle Sheets task.\n", + "Your coworker has created and executed a plan for user task, but made some mistakes. You should correct these mistakes. \n", + "You will be given a list of errors.\n", + "Correct these errors.\n", + "\"\"\"\n", + "\n", + "executor_prompt = \\\n", + "\"\"\"\n", + "You are helpful assistant who can work with markdown tables and use Google Sheets API to read and write data\n", + "You have a bunch of functions you can call to iteract with Google Sheets API.\n", + "\n", + "Do all steps from the following plan:\n", + "\"\"\"\n", + "\n", + "annotation_task = \\\n", + "\"\"\"\n", + "Query: {query}\n", + "Sample: \n", + "{sample}\n", + "\"\"\"\n", + "\n", + "planner_task = \\\n", + "\"\"\"\n", + "User query: {query}\n", + "Data outline: {outline}\n", + "Data is located in cells {cells}\n", + "\"\"\"\n", + "\n", + "planner_critic_prompt = \\\n", + "\"\"\"\n", + "User query: {query}\n", + "Data outline: {outline}\n", + "Data is located in cells {cells}\n", + "Plan: \n", + "{plan}\n", + "\"\"\"\n", + "\n", + "planner_tot_generator_task = \\\n", + "\"\"\"\n", + "User query: {query}\n", + "Data outline: {outline}\n", + "Data is located in cells {cells}\n", + "\n", + "Current plan: \n", + "{plan}\n", + "\"\"\"\n", + "\n", + "planner_tot_discriminator_task = \\\n", + "\"\"\"\n", + "User query: {query}\n", + "Data outline: {outline}\n", + "Data is located in cells {cells}\n", + "\n", + "Current plan: \n", + "{plan}\n", + "\n", + "Possible options:\n", + "{options}\n", + "\"\"\"\n", + "\n", + "error_corrector_task = \\\n", + "\"\"\"\n", + "List of errors in form (cell_id, error_name):\n", + "{errors}\n", + "\"\"\"\n", + "\n", + "prompts['annotation_prompt'] = annotation_prompt\n", + "prompts['planner_prompt'] = planner_prompt\n", + "prompts['executor_prompt'] = executor_prompt\n", + "prompts['annotation_reflector'] = annotation_reflector\n", + "prompts['annotation_task'] = annotation_task\n", + "prompts['planner_task'] = planner_task\n", + "prompts['planner_critic_cells_prompt'] = planner_critic_cells_prompt\n", + "prompts['planner_critic_formulas_prompt'] = planner_critic_formulas_prompt\n", + "prompts['planner_critic_prompt'] = planner_critic_prompt\n", + "prompts['planner_cot_voter_prompt'] = planner_cot_voter_prompt\n", + "prompts['planner_tot_generator_prompt'] = planner_tot_generator_prompt\n", + "prompts['planner_tot_discriminator_prompt'] = planner_tot_discriminator_prompt\n", + "prompts['planner_tot_generator_task'] = planner_tot_generator_task\n", + "prompts['planner_tot_discriminator_task'] = planner_tot_discriminator_task\n", + "prompts['error_corrector_prompt'] = error_corrector_prompt\n", + "prompts['error_corrector_task'] = error_corrector_task\n", + "\n", + "with open('prompts.json', 'w') as f:\n", + " json.dump(prompts, f)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Execution\n", + "\n", + "Запуск агента\n", + "\n", + "Предварительно необходимо создать директорию \"../creds\", куда положить файл \"credentials.json\" из личного кабинета Google API\n", + "\n", + "Файл \"authorized_user.json\" создастся автоматически\n", + "\n", + "Подробнее [в документации](https://developers.google.com/workspace/guides/get-started)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gapi = GooogleSheetsApi(credentials=f\"../creds/credentials.json\", user_data=f\"../creds/authorized_user.json\")\n", + "\n", + "gsheet_tools = \\\n", + "[\n", + " write_table_tool(gapi), \n", + " write_value_tool(gapi), \n", + " create_pivot_table_tool(gapi),\n", + " draw_chart_tool(gapi),\n", + " repeat_formula_tool(gapi),\n", + " autofill_constant_tool(gapi),\n", + " autofill_delta_tool(gapi),\n", + " get_stock_data_tool(gapi),\n", + "]\n", + "feedback_tools = [ask_user_general_question_tool()]\n", + "\n", + "agent = GoogleSheetsAgent(gapi, gsheet_tools, feedback_tools)\n", + "\n", + "with open('prompts.json', 'r') as f:\n", + " prompts = json.load(f)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Для запуска агента нужно указать:\n", + "- sheet_id: id листа (находится в url)\n", + "- query: запрос в свободной текстовой форме\n", + "\n", + "Логи выполнения можно смотреть в \"agent.logs\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "showcase = \\\n", + "('sheet_id', # testcase_1\n", + "\"\"\"\n", + "query\n", + "\"\"\")," + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gapi.set_sheet_id(showcase[0])\n", + "agent_response, agent_logs = agent.invoke(\n", + " query=showcase[1], \n", + " prompts=prompts,\n", + " model_name=\"gpt-4o\", # \"gpt-4o\", \"gpt-4o-mini\"\n", + " temperature=0.00,\n", + " planner_mode=\"COT_SC\", # Usual, COT_SC, TOT, TOT_SC\n", + " plan_critic_formulas_max_tries=2, \n", + " plan_critic_cells_max_tries=2, \n", + " plan_sc_number=3, \n", + " tot_number=3,\n", + " tot_max_rounds = 20,\n", + " tot_mode=\"many_calls\", # \"one_call\", \"many_calls\"\n", + " errors_correction_max_tries=1\n", + ")\n", + "print(f\"\"\"\n", + "Overall input tokens: {agent.input_tokens}\n", + "Overall output tokens: {agent.output_tokens}\n", + "Overall total tokens: {agent.input_tokens + agent.output_tokens}\n", + "Estimate cost for GPT4o: {(agent.input_tokens*5 + agent.output_tokens*15)/10**6}$\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(agent.logs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Formuals\n", + "\n", + "Список формул для промптов\n", + "\n", + "Нужно сохранить как \"formulas.txt\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "AVEDEV(value1, [value2, ...])\tCalculates the average of the magnitudes of deviations of data from a dataset's mean.\n", + "AVERAGE(value1, [value2, ...])\tReturns the numerical average value in a dataset, ignoring text.\n", + "AVERAGE.WEIGHTED(values, weights, [additional values], [additional weights])\tFinds the weighted average of a set of values, given the values and the corresponding weights..\n", + "AVERAGEA(value1, [value2, ...])\tReturns the numerical average value in a dataset.\n", + "AVERAGEIF(criteria_range, criterion, [average_range])\tReturns the average of a range depending on criteria.\n", + "AVERAGEIFS(average_range, criteria_range1, criterion1, [criteria_range2, criterion2, ...])\tReturns the average of a range depending on multiple criteria.\n", + "BETA.DIST(value, alpha, beta, cumulative, lower_bound, upper_bound)\tReturns the probability of a given value as defined by the beta distribution function..\n", + "BETA.INV(probability, alpha, beta, lower_bound, upper_bound)\tReturns the value of the inverse beta distribution function for a given probability.. \n", + "BETADIST(value, alpha, beta, lower_bound, upper_bound)\tSee BETA.DIST.\n", + "BETAINV(probability, alpha, beta, lower_bound, upper_bound)\t See BETA.INV \n", + "BINOM.DIST(num_successes, num_trials, prob_success, cumulative)\tSee BINOMDIST \n", + "BINOM.INV(num_trials, prob_success, target_prob)\tSee CRITBINOM\n", + "BINOMDIST(num_successes, num_trials, prob_success, cumulative)\tCalculates the probability of drawing a certain number of successes (or a maximum number of successes) in a certain number of tries given a population of a certain size containing a certain number of successes, with replacement of draws.\n", + "CHIDIST(x, degrees_freedom)\tCalculates the right-tailed chi-squared distribution, often used in hypothesis testing.\n", + "CHIINV(probability, degrees_freedom)\tCalculates the inverse of the right-tailed chi-squared distribution.\n", + "CHISQ.DIST(x, degrees_freedom, cumulative)\tCalculates the left-tailed chi-squared distribution, often used in hypothesis testing.\n", + "CHISQ.DIST.RT(x, degrees_freedom)\tCalculates the right-tailed chi-squared distribution, which is commonly used in hypothesis testing.\n", + "CHISQ.INV(probability, degrees_freedom)\tCalculates the inverse of the left-tailed chi-squared distribution.\n", + "CHISQ.INV.RT(probability, degrees_freedom)\tCalculates the inverse of the right-tailed chi-squared distribution.\n", + "CHISQ.TEST(observed_range, expected_range)\tSee CHITEST\n", + "CHITEST(observed_range, expected_range)\tReturns the probability associated with a Pearson’s chi-squared test on the two ranges of data. Determines the likelihood that the observed categorical data is drawn from an expected distribution.\n", + "CONFIDENCE(alpha, standard_deviation, pop_size)\tSee CONFIDENCE.NORM\n", + "CONFIDENCE.NORM(alpha, standard_deviation, pop_size)\tCalculates the width of half the confidence interval for a normal distribution..\n", + "CONFIDENCE.T(alpha, standard_deviation, size)\tCalculates the width of half the confidence interval for a Student’s t-distribution..\n", + "CORREL(data_y, data_x)\tCalculates r, the Pearson product-moment correlation coefficient of a dataset.\n", + "COUNT(value1, [value2, ...])\tReturns a count of the number of numeric values in a dataset.\n", + "COUNTA(value1, [value2, ...])\tReturns a count of the number of values in a dataset.\n", + "COVAR(data_y, data_x)\tCalculates the covariance of a dataset.\n", + "COVARIANCE.P(data_y, data_x)\tSee COVAR\n", + "COVARIANCE.S(data_y, data_x)\tCalculates the covariance of a dataset, where the dataset is a sample of the total population..\n", + "CRITBINOM(num_trials, prob_success, target_prob)\tCalculates the smallest value for which the cumulative binomial distribution is greater than or equal to a specified criteria.\n", + "DEVSQ(value1, value2)\tCalculates the sum of squares of deviations based on a sample.\n", + "EXPON.DIST(x, LAMBDA, cumulative)\tReturns the value of the exponential distribution function with a specified LAMBDA at a specified value.. \n", + "EXPONDIST(x, LAMBDA, cumulative)\tSee EXPON.DIST\n", + "FDIST(x, degrees_freedom1, degrees_freedom2)\tSee F.DIST.RT.\n", + "FINV(probability, degrees_freedom1, degrees_freedom2)\tSee F.INV.RT\n", + "FISHER(value)\tReturns the Fisher transformation of a specified value.\n", + "FISHERINV(value)\tReturns the inverse Fisher transformation of a specified value.\n", + "FORECAST(x, data_y, data_x)\tCalculates the expected y-value for a specified x based on a linear regression of a dataset.\n", + "FORECAST.LINEAR(x, data_y, data_x)\tSee FORECAST \n", + "FTEST(range1, range2)\tReturns the probability associated with an F-test for equality of variances. Determines whether two samples are likely to have come from populations with the same variance.\n", + "GAMMA(number)\tReturns the Gamma function evaluated at the specified value..\n", + "GAMMA.DIST(x, alpha, beta, cumulative)\tCalculates the gamma distribution, a two-parameter continuous probability distribution.\n", + "GAMMA.INV(probability, alpha, beta)\tThe GAMMA.INV function returns the value of the inverse gamma cumulative distribution function for the specified probability and alpha and beta parameters..\n", + "GAMMADIST(x, alpha, beta, cumulative)\tSee GAMMA.DIST \n", + "GAMMAINV(probability, alpha, beta)\tSee GAMMA.INV.\n", + "GAUSS(z)\tThe GAUSS function returns the probability that a random variable, drawn from a normal distribution, will be between the mean and z standard deviations above (or below) the mean..\n", + "GEOMEAN(value1, value2)\tCalculates the geometric mean of a dataset.\n", + "HARMEAN(value1, value2)\tCalculates the harmonic mean of a dataset.\n", + "HYPGEOM.DIST(num_successes, num_draws, successes_in_pop, pop_size)\tSee HYPGEOMDIST \n", + "HYPGEOMDIST(num_successes, num_draws, successes_in_pop, pop_size)\t Calculates the probability of drawing a certain number of successes in a certain number of tries given a population of a certain size containing a certain number of successes, without replacement of draws.\n", + "INTERCEPT(data_y, data_x)\tCalculates the y-value at which the line resulting from linear regression of a dataset will intersect the y-axis (x=0).\n", + "KURT(value1, value2)\tCalculates the kurtosis of a dataset, which describes the shape, and in particular the \"peakedness\" of that dataset.\n", + "LARGE(data, n)\tReturns the nth largest element from a data set, where n is user-defined.\n", + "LOGINV(x, mean, standard_deviation)\tReturns the value of the inverse log-normal cumulative distribution with given mean and standard deviation at a specified value.\n", + "LOGNORM.DIST(x, mean, standard_deviation)\tSee LOGNORMDIST\n", + "LOGNORM.INV(x, mean, standard_deviation)\tSee LOGINV\n", + "LOGNORMDIST(x, mean, standard_deviation)\tReturns the value of the log-normal cumulative distribution with given mean and standard deviation at a specified value.\n", + "MARGINOFERROR(range, confidence)\tCalculates the amount of random sampling error given a range of values and a confidence level.\n", + "MAX(value1, [value2, ...])\tReturns the maximum value in a numeric dataset.\n", + "MAXA(value1, value2)\tReturns the maximum numeric value in a dataset.\n", + "MAXIFS(range, criteria_range1, criterion1, [criteria_range2, criterion2], …)\tReturns the maximum value in a range of cells, filtered by a set of criteria..\n", + "MEDIAN(value1, [value2, ...])\tReturns the median value in a numeric dataset.\n", + "MIN(value1, [value2, ...])\tReturns the minimum value in a numeric dataset.\n", + "MINA(value1, value2)\tReturns the minimum numeric value in a dataset.\n", + "MINIFS(range, criteria_range1, criterion1, [criteria_range2, criterion2], …)\tReturns the minimum value in a range of cells, filtered by a set of criteria..\n", + "MODE(value1, [value2, ...])\tReturns the most commonly occurring value in a dataset.\n", + "MODE.MULT(value1, value2)\tReturns the most commonly occurring values in a dataset..\n", + "MODE.SNGL(value1, [value2, ...])\tSee MODE \n", + "NEGBINOM.DIST(num_failures, num_successes, prob_success)\tSee NEGBINOMDIST \n", + "NEGBINOMDIST(num_failures, num_successes, prob_success)\tCalculates the probability of drawing a certain number of failures before a certain number of successes given a probability of success in independent trials.\n", + "NORM.DIST(x, mean, standard_deviation, cumulative)\tSee NORMDIST \n", + "NORM.INV(x, mean, standard_deviation)\tSee NORMINV \n", + "NORM.S.DIST(x)\tSee NORMSDIST\n", + "NORM.S.INV(x)\tSee NORMSINV\n", + "NORMDIST(x, mean, standard_deviation, cumulative)\tReturns the value of the normal distribution function (or normal cumulative distribution function) for a specified value, mean, and standard deviation.\n", + "NORMINV(x, mean, standard_deviation)\tReturns the value of the inverse normal distribution function for a specified value, mean, and standard deviation.\n", + "NORMSDIST(x)\tReturns the value of the standard normal cumulative distribution function for a specified value.\n", + "NORMSINV(x)\tReturns the value of the inverse standard normal distribution function for a specified value.\n", + "PEARSON(data_y, data_x)\tCalculates r, the Pearson product-moment correlation coefficient of a dataset.\n", + "PERCENTILE(data, percentile)\tReturns the value at a given percentile of a dataset.\n", + "PERCENTILE.EXC(data, percentile)\tReturns the value at a given percentile of a dataset, exclusive of 0 and 1..\n", + "PERCENTILE.INC(data, percentile)\tSee PERCENTILE\n", + "PERCENTRANK(data, value, [significant_digits])\tReturns the percentage rank (percentile) of a specified value in a dataset.\n", + "PERCENTRANK.EXC(data, value, [significant_digits])\tReturns the percentage rank (percentile) from 0 to 1 exclusive of a specified value in a dataset.\n", + "PERCENTRANK.INC(data, value, [significant_digits])\tReturns the percentage rank (percentile) from 0 to 1 inclusive of a specified value in a dataset.\n", + "PERMUTATIONA(number, number_chosen)\tReturns the number of permutations for selecting a group of objects (with replacement) from a total number of objects..\n", + "PERMUT(n, k)\tReturns the number of ways to choose some number of objects from a pool of a given size of objects, considering order.\n", + "PHI(x)\tThe PHI function returns the value of the normal distribution with mean 0 and standard deviation 1..\n", + "POISSON(x, mean, cumulative)\tSee POISSON.DIST\n", + "POISSON.DIST(x, mean, [cumulative])\tReturns the value of the Poisson distribution function (or Poisson cumulative distribution function) for a specified value and mean.. \n", + "PROB(data, probabilities, low_limit, [high_limit])\tGiven a set of values and corresponding probabilities, calculates the probability that a value chosen at random falls between two limits.\n", + "QUARTILE(data, quartile_number)\tReturns a value nearest to a specified quartile of a dataset.\n", + "QUARTILE.EXC(data, quartile_number)\tReturns value nearest to a given quartile of a dataset, exclusive of 0 and 4..\n", + "QUARTILE.INC(data, quartile_number)\tSee QUARTILE\n", + "RANK(value, data, [is_ascending])\tReturns the rank of a specified value in a dataset.\n", + "RANK.AVG(value, data, [is_ascending])\tReturns the rank of a specified value in a dataset. If there is more than one entry of the same value in the dataset, the average rank of the entries will be returned.\n", + "RANK.EQ(value, data, [is_ascending])\tReturns the rank of a specified value in a dataset. If there is more than one entry of the same value in the dataset, the top rank of the entries will be returned.\n", + "RSQ(data_y, data_x)\tCalculates the square of r, the Pearson product-moment correlation coefficient of a dataset.\n", + "SKEW(value1, value2)\tCalculates the skewness of a dataset, which describes the symmetry of that dataset about the mean.\n", + "SKEW.P(value1, value2)\tCalculates the skewness of a dataset that represents the entire population..\n", + "SLOPE(data_y, data_x)\tCalculates the slope of the line resulting from linear regression of a dataset.\n", + "SMALL(data, n)\tReturns the nth smallest element from a data set, where n is user-defined.\n", + "STANDARDIZE(value, mean, standard_deviation)\tCalculates the normalized equivalent of a random variable given mean and standard deviation of the distribution.\n", + "STDEV(value1, [value2, ...])\tCalculates the standard deviation based on a sample.\n", + "STDEV.P(value1, [value2, ...])\tSee STDEVP\n", + "STDEV.S(value1, [value2, ...])\tSee STDEV\n", + "STDEVA(value1, value2)\tCalculates the standard deviation based on a sample, setting text to the value `0`.\n", + "STDEVP(value1, value2)\tCalculates the standard deviation based on an entire population.\n", + "STDEVPA(value1, value2)\tCalculates the standard deviation based on an entire population, setting text to the value `0`.\n", + "STEYX(data_y, data_x)\tCalculates the standard error of the predicted y-value for each x in the regression of a dataset.\n", + "TDIST(x, degrees_freedom, tails)\tCalculates the probability for Student's t-distribution with a given input (x).\n", + "TINV(probability, degrees_freedom)\tSee T.INV.2T\n", + "TRIMMEAN(data, exclude_proportion)\tCalculates the mean of a dataset excluding some proportion of data from the high and low ends of the dataset.\n", + "TTEST(range1, range2, tails, type)\tSee T.TEST.\n", + "VAR(value1, [value2, ...])\tCalculates the variance based on a sample.\n", + "VAR.P(value1, [value2, ...]) See VARP\n", + "VAR.S(value1, [value2, ...]) See VAR\n", + "VARA(value1, value2)\tCalculates an estimate of variance based on a sample, setting text to the value `0`.\n", + "VARP(value1, value2)\tCalculates the variance based on an entire population.\n", + "VARPA(value1, value2,...)\tCalculates the variance based on an entire population, setting text to the value `0`.\n", + "WEIBULL(x, shape, scale, cumulative)\tReturns the value of the Weibull distribution function (or Weibull cumulative distribution function) for a specified shape and scale.\n", + "WEIBULL.DIST(x, shape, scale, cumulative)\tSee WEIBULL\n", + "ZTEST(data, value, [standard_deviation])\tSee Z.TEST.\n" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}