2727import java .util .List ;
2828import java .util .Optional ;
2929import java .util .Set ;
30+ import org .slf4j .Logger ;
31+ import org .slf4j .LoggerFactory ;
3032import org .sonar .python .EscapeCharPositionInfo ;
3133import org .sonar .python .IPythonLocation ;
3234
3335public class IpynbNotebookParser {
3436
37+ private static final Logger LOG = LoggerFactory .getLogger (IpynbNotebookParser .class );
38+
3539 public static final String SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER = "#SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER" ;
3640
3741 private static final Set <String > ACCEPTED_LANGUAGE = Set .of ("python" , "ipython" );
@@ -53,25 +57,85 @@ private IpynbNotebookParser(PythonInputFile inputFile) {
5357 private int lastPythonLine = 0 ;
5458
5559 public Optional <GeneratedIPythonFile > parse () throws IOException {
56- // If the language is not present, we assume it is a Python notebook
57- var isPythonNotebook = parseLanguage ().map (ACCEPTED_LANGUAGE ::contains ).orElse (true );
60+ var language = parseLanguage ();
61+ boolean isPythonNotebook = language .map (ACCEPTED_LANGUAGE ::contains ).orElse (true );
62+
63+ if (isPythonNotebook ) {
64+ return Optional .of (parseNotebook ());
65+ }
5866
59- return Boolean .TRUE .equals (isPythonNotebook ) ? Optional .of (parseNotebook ()) : Optional .empty ();
67+ if (LOG .isDebugEnabled ()){
68+ LOG .debug ("Skipping notebook '{}': unsupported language '{}'" , inputFile .wrappedFile ().filename (), language .orElse ("unknown" ));
69+ }
70+ return Optional .empty ();
6071 }
6172
73+ /**
74+ * Parses the notebook's top-level metadata to find the language.
75+ * Only checks metadata.kernelspec.language and metadata.language_info.name,
76+ * ignoring any language fields in cell metadata.
77+ */
6278 public Optional <String > parseLanguage () throws IOException {
6379 String content = inputFile .wrappedFile ().contents ();
6480 JsonFactory factory = new JsonFactory ();
81+ List <String > foundLanguages = new ArrayList <>();
82+
6583 try (JsonParser jParser = factory .createParser (content )) {
6684 while (!jParser .isClosed ()) {
6785 JsonToken jsonToken = jParser .nextToken ();
68- if (JsonToken .FIELD_NAME .equals (jsonToken ) && "language " .equals (jParser .currentName ())) {
86+ if (JsonToken .FIELD_NAME .equals (jsonToken ) && "metadata " .equals (jParser .currentName ()) && jParser . getParsingContext (). getParent (). inRoot ( )) {
6987 jParser .nextToken ();
70- return Optional .ofNullable (jParser .getValueAsString ());
88+ extractLanguagesFromMetadata (jParser , foundLanguages );
89+ break ;
7190 }
7291 }
7392 }
74- return Optional .empty ();
93+
94+ // Return an accepted language if found, otherwise the first language found (for rejection), or empty
95+ return foundLanguages .stream ()
96+ .filter (ACCEPTED_LANGUAGE ::contains )
97+ .findFirst ()
98+ .or (() -> foundLanguages .stream ().findFirst ());
99+ }
100+
101+ /**
102+ * Extracts language values from the top-level metadata object.
103+ * Looks for kernelspec.language and language_info.name.
104+ */
105+ private static void extractLanguagesFromMetadata (JsonParser jParser , List <String > foundLanguages ) throws IOException {
106+ while (jParser .nextToken () != JsonToken .END_OBJECT ) {
107+ if (JsonToken .FIELD_NAME .equals (jParser .currentToken ())) {
108+ String fieldName = jParser .currentName ();
109+ if ("kernelspec" .equals (fieldName )) {
110+ jParser .nextToken ();
111+ extractFieldFromObject (jParser , "language" , foundLanguages );
112+ } else if ("language_info" .equals (fieldName )) {
113+ jParser .nextToken ();
114+ extractFieldFromObject (jParser , "name" , foundLanguages );
115+ } else {
116+ jParser .nextToken ();
117+ skipNestedObjects (jParser );
118+ }
119+ }
120+ }
121+ }
122+
123+ /**
124+ * Extracts the value of a specific field from a JSON object.
125+ */
126+ private static void extractFieldFromObject (JsonParser jParser , String targetField , List <String > foundValues ) throws IOException {
127+ while (jParser .nextToken () != JsonToken .END_OBJECT ) {
128+ if (JsonToken .FIELD_NAME .equals (jParser .currentToken ()) && targetField .equals (jParser .currentName ())) {
129+ jParser .nextToken ();
130+ String value = jParser .getValueAsString ();
131+ if (value != null ) {
132+ foundValues .add (value );
133+ }
134+ } else {
135+ jParser .nextToken ();
136+ skipNestedObjects (jParser );
137+ }
138+ }
75139 }
76140
77141 public GeneratedIPythonFile parseNotebook () throws IOException {
0 commit comments