|
| 1 | +import re |
| 2 | +import json |
| 3 | +import logging |
| 4 | +from typing import Dict, Any, Generator, Optional, Tuple |
| 5 | + |
| 6 | + |
| 7 | +class MindsDBSQLStreamParser: |
| 8 | + """ |
| 9 | + A utility class for parsing SQL queries from MindsDB completion streams. |
| 10 | +
|
| 11 | + This class provides methods to process completion streams, extract SQL queries, |
| 12 | + and accumulate full responses. |
| 13 | +
|
| 14 | + Attributes: |
| 15 | + logger (logging.Logger): The logger instance for this class. |
| 16 | + """ |
| 17 | + |
| 18 | + def __init__(self, log_level: int = logging.INFO): |
| 19 | + """ |
| 20 | + Initialize the MindsDBSQLStreamParser. |
| 21 | +
|
| 22 | + Args: |
| 23 | + log_level (int, optional): The logging level to use. Defaults to logging.INFO. |
| 24 | + """ |
| 25 | + self.logger = logging.getLogger(__name__) |
| 26 | + self.logger.setLevel(log_level) |
| 27 | + |
| 28 | + # Create a console handler and set its level |
| 29 | + ch = logging.StreamHandler() |
| 30 | + ch.setLevel(log_level) |
| 31 | + |
| 32 | + # Create a formatter |
| 33 | + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') |
| 34 | + |
| 35 | + # Add the formatter to the handler |
| 36 | + ch.setFormatter(formatter) |
| 37 | + |
| 38 | + # Add the handler to the logger |
| 39 | + self.logger.addHandler(ch) |
| 40 | + |
| 41 | + def stream_and_parse_sql_query(self, completion_stream: Generator[Dict[str, Any], None, None]) -> Generator[ |
| 42 | + Dict[str, Optional[str]], None, None]: |
| 43 | + """ |
| 44 | + Stream and parse the completion stream, yielding output and SQL queries. |
| 45 | +
|
| 46 | + This generator function processes each chunk of the completion stream, |
| 47 | + extracts any output and SQL queries, and yields the results. |
| 48 | +
|
| 49 | + Args: |
| 50 | + completion_stream (Generator[Dict[str, Any], None, None]): The input completion stream. |
| 51 | +
|
| 52 | + Yields: |
| 53 | + Dict[str, Optional[str]]: A dictionary containing 'output' and 'sql_query' keys. |
| 54 | + - 'output': The extracted output string from the chunk, if any. |
| 55 | + - 'sql_query': The extracted SQL query string, if found in the chunk. |
| 56 | +
|
| 57 | + Note: |
| 58 | + This function will only yield the first SQL query it finds in the stream. |
| 59 | + """ |
| 60 | + sql_query_found = False |
| 61 | + |
| 62 | + for chunk in completion_stream: |
| 63 | + output = "" |
| 64 | + sql_query = None |
| 65 | + |
| 66 | + # Log full chunk at DEBUG level |
| 67 | + self.logger.debug(f"Processing chunk: {json.dumps(chunk, indent=2)}") |
| 68 | + |
| 69 | + # Log important info at INFO level |
| 70 | + if isinstance(chunk, dict): |
| 71 | + if 'quick_response' in chunk: |
| 72 | + self.logger.info(f"Quick response received: {json.dumps(chunk)}") |
| 73 | + |
| 74 | + output = chunk.get('output', '') |
| 75 | + if output: |
| 76 | + self.logger.info(f"Chunk output: {output}") |
| 77 | + |
| 78 | + if 'messages' in chunk: |
| 79 | + for message in chunk['messages']: |
| 80 | + if message.get('role') == 'assistant': |
| 81 | + self.logger.info(f"Assistant message: {message.get('content', '')}") |
| 82 | + if chunk.get('type') == 'sql': |
| 83 | + sql_query = chunk['content'] |
| 84 | + self.logger.info(f"Generated SQL: {sql_query}") |
| 85 | + |
| 86 | + elif isinstance(chunk, str): |
| 87 | + output = chunk |
| 88 | + self.logger.info(f"String chunk received: {chunk}") |
| 89 | + |
| 90 | + yield { |
| 91 | + 'output':output, |
| 92 | + 'sql_query':sql_query |
| 93 | + } |
| 94 | + |
| 95 | + def process_stream(self, completion_stream: Generator[Dict[str, Any], None, None]) -> Tuple[str, Optional[str]]: |
| 96 | + """ |
| 97 | + Process the completion stream and extract the SQL query. |
| 98 | +
|
| 99 | + This method iterates through the stream, accumulates the full response, |
| 100 | + logs outputs, and extracts the SQL query when found. |
| 101 | +
|
| 102 | + Args: |
| 103 | + completion_stream (Generator[Dict[str, Any], None, None]): The input completion stream. |
| 104 | +
|
| 105 | + Returns: |
| 106 | + Tuple[str, Optional[str]]: A tuple containing: |
| 107 | + - The full accumulated response as a string. |
| 108 | + - The extracted SQL query as a string, or None if no query was found. |
| 109 | + """ |
| 110 | + full_response = "" |
| 111 | + sql_query = None |
| 112 | + |
| 113 | + self.logger.info("Starting to process completion stream...") |
| 114 | + |
| 115 | + for result in self.stream_and_parse_sql_query(completion_stream): |
| 116 | + if result['output']: |
| 117 | + self.logger.info(f"Output: {result['output']}") |
| 118 | + full_response += result['output'] |
| 119 | + |
| 120 | + if result['sql_query'] and sql_query is None: |
| 121 | + sql_query = result['sql_query'] |
| 122 | + self.logger.info(f"Extracted SQL Query: {sql_query}") |
| 123 | + |
| 124 | + self.logger.info(f"Full Response: {full_response}") |
| 125 | + self.logger.info(f"Final SQL Query: {sql_query}") |
| 126 | + |
| 127 | + return full_response, sql_query |
0 commit comments