Skip to content

Commit 192c080

Browse files
joke1196sonartech
authored andcommitted
SONARPY-3803: Fix IPython Notebook parsing when an unsupported cell language is specified (#861)
GitOrigin-RevId: 881c30a7a345a4b76abb562a5da18965e90d75f1
1 parent 2b1c8bd commit 192c080

File tree

3 files changed

+144
-6
lines changed

3 files changed

+144
-6
lines changed

python-frontend/src/main/java/org/sonar/plugins/python/IpynbNotebookParser.java

Lines changed: 70 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,15 @@
2727
import java.util.List;
2828
import java.util.Optional;
2929
import java.util.Set;
30+
import org.slf4j.Logger;
31+
import org.slf4j.LoggerFactory;
3032
import org.sonar.python.EscapeCharPositionInfo;
3133
import org.sonar.python.IPythonLocation;
3234

3335
public class IpynbNotebookParser {
3436

37+
private static final Logger LOG = LoggerFactory.getLogger(IpynbNotebookParser.class);
38+
3539
public static final String SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER = "#SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER";
3640

3741
private static final Set<String> ACCEPTED_LANGUAGE = Set.of("python", "ipython");
@@ -53,25 +57,85 @@ private IpynbNotebookParser(PythonInputFile inputFile) {
5357
private int lastPythonLine = 0;
5458

5559
public Optional<GeneratedIPythonFile> parse() throws IOException {
56-
// If the language is not present, we assume it is a Python notebook
57-
var isPythonNotebook = parseLanguage().map(ACCEPTED_LANGUAGE::contains).orElse(true);
60+
var language = parseLanguage();
61+
boolean isPythonNotebook = language.map(ACCEPTED_LANGUAGE::contains).orElse(true);
62+
63+
if (isPythonNotebook) {
64+
return Optional.of(parseNotebook());
65+
}
5866

59-
return Boolean.TRUE.equals(isPythonNotebook) ? Optional.of(parseNotebook()) : Optional.empty();
67+
if(LOG.isDebugEnabled()){
68+
LOG.debug("Skipping notebook '{}': unsupported language '{}'", inputFile.wrappedFile().filename(), language.orElse("unknown"));
69+
}
70+
return Optional.empty();
6071
}
6172

73+
/**
74+
* Parses the notebook's top-level metadata to find the language.
75+
* Only checks metadata.kernelspec.language and metadata.language_info.name,
76+
* ignoring any language fields in cell metadata.
77+
*/
6278
public Optional<String> parseLanguage() throws IOException {
6379
String content = inputFile.wrappedFile().contents();
6480
JsonFactory factory = new JsonFactory();
81+
List<String> foundLanguages = new ArrayList<>();
82+
6583
try (JsonParser jParser = factory.createParser(content)) {
6684
while (!jParser.isClosed()) {
6785
JsonToken jsonToken = jParser.nextToken();
68-
if (JsonToken.FIELD_NAME.equals(jsonToken) && "language".equals(jParser.currentName())) {
86+
if (JsonToken.FIELD_NAME.equals(jsonToken) && "metadata".equals(jParser.currentName()) && jParser.getParsingContext().getParent().inRoot()) {
6987
jParser.nextToken();
70-
return Optional.ofNullable(jParser.getValueAsString());
88+
extractLanguagesFromMetadata(jParser, foundLanguages);
89+
break;
7190
}
7291
}
7392
}
74-
return Optional.empty();
93+
94+
// Return an accepted language if found, otherwise the first language found (for rejection), or empty
95+
return foundLanguages.stream()
96+
.filter(ACCEPTED_LANGUAGE::contains)
97+
.findFirst()
98+
.or(() -> foundLanguages.stream().findFirst());
99+
}
100+
101+
/**
102+
* Extracts language values from the top-level metadata object.
103+
* Looks for kernelspec.language and language_info.name.
104+
*/
105+
private static void extractLanguagesFromMetadata(JsonParser jParser, List<String> foundLanguages) throws IOException {
106+
while (jParser.nextToken() != JsonToken.END_OBJECT) {
107+
if (JsonToken.FIELD_NAME.equals(jParser.currentToken())) {
108+
String fieldName = jParser.currentName();
109+
if ("kernelspec".equals(fieldName)) {
110+
jParser.nextToken();
111+
extractFieldFromObject(jParser, "language", foundLanguages);
112+
} else if ("language_info".equals(fieldName)) {
113+
jParser.nextToken();
114+
extractFieldFromObject(jParser, "name", foundLanguages);
115+
} else {
116+
jParser.nextToken();
117+
skipNestedObjects(jParser);
118+
}
119+
}
120+
}
121+
}
122+
123+
/**
124+
* Extracts the value of a specific field from a JSON object.
125+
*/
126+
private static void extractFieldFromObject(JsonParser jParser, String targetField, List<String> foundValues) throws IOException {
127+
while (jParser.nextToken() != JsonToken.END_OBJECT) {
128+
if (JsonToken.FIELD_NAME.equals(jParser.currentToken()) && targetField.equals(jParser.currentName())) {
129+
jParser.nextToken();
130+
String value = jParser.getValueAsString();
131+
if (value != null) {
132+
foundValues.add(value);
133+
}
134+
} else {
135+
jParser.nextToken();
136+
skipNestedObjects(jParser);
137+
}
138+
}
75139
}
76140

77141
public GeneratedIPythonFile parseNotebook() throws IOException {

python-frontend/src/test/java/org/sonar/plugins/python/IpynbNotebookParserTest.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,23 @@
2323
import java.util.List;
2424
import java.util.Map;
2525
import org.junit.jupiter.api.Test;
26+
import org.junit.jupiter.api.extension.RegisterExtension;
27+
import org.slf4j.event.Level;
2628
import org.sonar.api.batch.fs.InputFile;
2729
import org.sonar.api.batch.fs.internal.TestInputFileBuilder;
2830
import org.sonar.api.internal.apachecommons.lang3.StringUtils;
31+
import org.sonar.api.testfixtures.log.LogTesterJUnit5;
2932
import org.sonar.python.IPythonLocation;
3033

3134
import static org.assertj.core.api.Assertions.assertThat;
3235
import static org.assertj.core.api.Assertions.assertThatThrownBy;
3336
import static org.sonar.plugins.python.NotebookTestUtils.mapToColumnMappingList;
3437

3538
class IpynbNotebookParserTest {
39+
40+
@RegisterExtension
41+
public LogTesterJUnit5 logTester = new LogTesterJUnit5().setLevel(Level.DEBUG);
42+
3643
private final File baseDir = new File("src/test/resources/org/sonar/plugins/python").getAbsoluteFile();
3744

3845
public static PythonInputFile createInputFile(File baseDir, String name, InputFile.Status status, InputFile.Type type) {
@@ -151,6 +158,22 @@ void testParseMojoNotebook() {
151158
var resultOptional = IpynbNotebookParser.parseNotebook(inputFile);
152159

153160
assertThat(resultOptional).isEmpty();
161+
assertThat(logTester.logs(Level.DEBUG)).contains("Skipping notebook 'notebook_mojo.ipynb': unsupported language 'mojo'");
162+
}
163+
164+
@Test
165+
void testParseNotebookWithCellLanguage() throws IOException {
166+
var inputFile = createInputFile(baseDir, "notebook_with_cell_language.ipynb", InputFile.Status.CHANGED, InputFile.Type.MAIN);
167+
168+
var resultOptional = IpynbNotebookParser.parseNotebook(inputFile);
169+
170+
// Should be parsed as Python despite "sparksql" language in cell metadata
171+
assertThat(resultOptional).isPresent();
172+
173+
var result = resultOptional.get();
174+
// Should contain content from all code cells (including the SQL magic cell which is still a code cell)
175+
assertThat(result.contents()).contains("x = 1");
176+
assertThat(result.contents()).contains("y = 2");
154177
}
155178

156179
@Test
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
{
2+
"metadata": {
3+
"kernelspec": {
4+
"name": "python3",
5+
"display_name": "Python 3"
6+
},
7+
"language_info": {
8+
"name": "python"
9+
},
10+
"custom_field": {
11+
"some_key": "some_value"
12+
}
13+
},
14+
"nbformat": 4,
15+
"nbformat_minor": 2,
16+
"cells": [
17+
{
18+
"cell_type": "code",
19+
"metadata": {},
20+
"source": [
21+
"x = 1\n",
22+
"print(x)"
23+
],
24+
"outputs": [],
25+
"execution_count": null
26+
},
27+
{
28+
"cell_type": "code",
29+
"metadata": {
30+
"microsoft": {
31+
"language": "sparksql"
32+
}
33+
},
34+
"source": [
35+
"%%sql\n",
36+
"SELECT * FROM table"
37+
],
38+
"outputs": [],
39+
"execution_count": null
40+
},
41+
{
42+
"cell_type": "code",
43+
"metadata": {},
44+
"source": [
45+
"y = 2"
46+
],
47+
"outputs": [],
48+
"execution_count": null
49+
}
50+
]
51+
}

0 commit comments

Comments
 (0)