1616logger = logging .getLogger ("nano-graphrag" )
1717ENCODER = None
1818
19-
2019def always_get_an_event_loop () -> asyncio .AbstractEventLoop :
2120 try :
2221 # If there is already an event loop, use it.
@@ -29,24 +28,93 @@ def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
2928 return loop
3029
3130
32- def locate_json_string_body_from_string (content : str ) -> Union [str , None ]:
33- """Locate the JSON string body from a string"""
34- maybe_json_str = re .search (r"{.*}" , content , re .DOTALL )
35- if maybe_json_str is not None :
36- return maybe_json_str .group (0 )
37- else :
31+ def extract_first_complete_json (s : str ):
32+ """Extract the first complete JSON object from the string using a stack to track braces."""
33+ stack = []
34+ first_json_start = None
35+
36+ for i , char in enumerate (s ):
37+ if char == '{' :
38+ stack .append (i )
39+ if first_json_start is None :
40+ first_json_start = i
41+ elif char == '}' :
42+ if stack :
43+ start = stack .pop ()
44+ if not stack :
45+ first_json_str = s [first_json_start :i + 1 ]
46+ try :
47+ # Attempt to parse the JSON string
48+ return json .loads (first_json_str .replace ("\n " , "" ))
49+ except json .JSONDecodeError as e :
50+ logger .error (f"JSON decoding failed: { e } . Attempted string: { first_json_str [:50 ]} ..." )
51+ return None
52+ finally :
53+ first_json_start = None
54+ logger .warning ("No complete JSON object found in the input string." )
55+ return None
56+
57+ def parse_value (value : str ):
58+ """Convert a string value to its appropriate type (int, float, bool, None, or keep as string). Work as a more broad 'eval()'"""
59+ value = value .strip ()
60+
61+ if value == "null" :
3862 return None
63+ elif value == "true" :
64+ return True
65+ elif value == "false" :
66+ return False
67+ else :
68+ # Try to convert to int or float
69+ try :
70+ if '.' in value : # If there's a dot, it might be a float
71+ return float (value )
72+ else :
73+ return int (value )
74+ except ValueError :
75+ # If conversion fails, return the value as-is (likely a string)
76+ return value .strip ('"' ) # Remove surrounding quotes if they exist
77+
78+ def extract_values_from_json (json_string , keys = ["reasoning" , "answer" , "data" ], allow_no_quotes = False ):
79+ """Extract key values from a non-standard or malformed JSON string, handling nested objects."""
80+ extracted_values = {}
81+
82+ # Enhanced pattern to match both quoted and unquoted values, as well as nested objects
83+ regex_pattern = r'(?P<key>"?\w+"?)\s*:\s*(?P<value>{[^}]*}|".*?"|[^,}]+)'
84+
85+ for match in re .finditer (regex_pattern , json_string , re .DOTALL ):
86+ key = match .group ('key' ).strip ('"' ) # Strip quotes from key
87+ value = match .group ('value' ).strip ()
88+
89+ # If the value is another nested JSON (starts with '{' and ends with '}'), recursively parse it
90+ if value .startswith ('{' ) and value .endswith ('}' ):
91+ extracted_values [key ] = extract_values_from_json (value )
92+ else :
93+ # Parse the value into the appropriate type (int, float, bool, etc.)
94+ extracted_values [key ] = parse_value (value )
95+
96+ if not extracted_values :
97+ logger .warning ("No values could be extracted from the string." )
98+
99+ return extracted_values
39100
40101
41102def convert_response_to_json (response : str ) -> dict :
42- json_str = locate_json_string_body_from_string (response )
43- assert json_str is not None , f"Unable to parse JSON from response: { response } "
44- try :
45- data = json .loads (json_str )
46- return data
47- except json .JSONDecodeError as e :
48- logger .error (f"Failed to parse JSON: { json_str } " )
49- raise e from None
103+ """Convert response string to JSON, with error handling and fallback to non-standard JSON extraction."""
104+ prediction_json = extract_first_complete_json (response )
105+
106+ if prediction_json is None :
107+ logger .info ("Attempting to extract values from a non-standard JSON string..." )
108+ prediction_json = extract_values_from_json (response , allow_no_quotes = True )
109+
110+ if not prediction_json :
111+ logger .error ("Unable to extract meaningful data from the response." )
112+ else :
113+ logger .info ("JSON data successfully extracted." )
114+
115+ return prediction_json
116+
117+
50118
51119
52120def encode_string_by_tiktoken (content : str , model_name : str = "gpt-4o" ):
0 commit comments