@@ -52,20 +52,19 @@ def _build_study(article: ArticleContent) -> dict[str, Any]:
5252 """Process a single article into a study representation."""
5353
5454 analyses : list [dict [str , Any ]] = []
55+ article_text : str | None = None
5556 tables = extract_tables_from_article (article .payload )
5657 if not tables :
5758 tables = _manual_extract_tables (article .payload )
58- article_text : str | None = None
5959 for metadata , df in tables :
6060 meta_text = _metadata_text (metadata )
61- coords = _extract_coordinates_from_dataframe (df , meta_text . lower () )
61+ coords = _extract_coordinates_from_dataframe (df )
6262 if not coords :
6363 continue
6464 header_text = " " .join (str (col ).lower () for col in df .columns )
6565 space = _heuristic_space (header_text , meta_text )
6666 if space is None :
67- if article_text is None :
68- article_text = _article_text (article .payload )
67+ article_text = _article_text (article .payload )
6968 guessed = _neurosynth_guess_space (article_text )
7069 if guessed != "UNKNOWN" :
7170 space = guessed
@@ -322,7 +321,7 @@ def _rows(xpath: str) -> list[etree._Element]:
322321 return pd .DataFrame (grid , columns = col_order )
323322
324323
325- def _extract_coordinates_from_dataframe (df : pd .DataFrame , meta_text : str ) -> list [list [float ]]:
324+ def _extract_coordinates_from_dataframe (df : pd .DataFrame ) -> list [list [float ]]:
326325 df = _normalize_table (df )
327326 extracted = _extract_coordinates_from_table (df )
328327 if not extracted .empty :
@@ -331,21 +330,7 @@ def _extract_coordinates_from_dataframe(df: pd.DataFrame, meta_text: str) -> lis
331330 [float (row .x ), float (row .y ), float (row .z )]
332331 for row in extracted .itertuples (index = False )
333332 ]
334-
335- coordinates : list [list [float ]] = []
336- preferred = _coordinate_columns (df .columns )
337- for row in df .itertuples (index = False , name = None ):
338- values = _select_row_values (row , df .columns , preferred )
339- if not values :
340- continue
341- numbers = _extract_numbers (" " .join (values ))
342- if len (numbers ) >= 3 :
343- coordinates .append (numbers [:3 ])
344- continue
345- fallback = _extract_numbers (" " .join (str (value ) for value in row ))
346- if len (fallback ) >= 3 :
347- coordinates .append (fallback [:3 ])
348- return coordinates
333+ return []
349334
350335
351336def _normalize_table (df : pd .DataFrame ) -> pd .DataFrame :
@@ -396,31 +381,3 @@ def _normalize_table(df: pd.DataFrame) -> pd.DataFrame:
396381 other_cols = [col for col in df .columns if col not in xyz_cols ]
397382 df = df [list (xyz_cols ) + other_cols ]
398383 return df
399-
400-
401- def _coordinate_columns (columns : pd .Index ) -> list [str ]:
402- order = {"x" : 0 , "y" : 1 , "z" : 2 }
403- matched = []
404- for col in columns :
405- name = str (col ).strip ().lower ()
406- if name in order :
407- matched .append ((order [name ], col ))
408- return [col for _ , col in sorted (matched )]
409-
410-
411- def _select_row_values (row : tuple [Any , ...], columns : pd .Index , preferred : list [str ]) -> list [str ]:
412- if preferred :
413- values = []
414- for col in preferred :
415- idx = columns .get_loc (col )
416- if idx < len (row ):
417- values .append (str (row [idx ]))
418- return values
419- return [str (value ) for value in row if value not in (None , "" )]
420-
421-
422- def _extract_numbers (text : str ) -> list [float ]:
423- import re
424-
425- matches = re .findall (r"[-+]?\d+(?:\.\d+)?" , text .replace ("−" , "-" ))
426- return [float (match ) for match in matches ]
0 commit comments