Skip to content

Commit 9a940d1

Browse files
committed
fix: Consolidate import statements and streamline code formatting in the notebook
1 parent 39886c4 commit 9a940d1

1 file changed

Lines changed: 3 additions & 40 deletions

File tree

notebooks/03_silver_to_gold.ipynb

Lines changed: 3 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -64,21 +64,7 @@
6464
"execution_count": null,
6565
"metadata": {},
6666
"outputs": [],
67-
"source": [
68-
"import sparknlp\n",
69-
"from sparknlp.base import DocumentAssembler, Finisher\n",
70-
"from sparknlp.annotator import (\n",
71-
" Tokenizer, Normalizer, StopWordsCleaner, LemmatizerModel,\n",
72-
" SentimentDLModel, NerDLModel, NerConverter,\n",
73-
" SentenceDetector, WordEmbeddingsModel, UniversalSentenceEncoder\n",
74-
")\n",
75-
"from pyspark.ml import Pipeline\n",
76-
"from pyspark.sql.functions import col, explode, explode_outer, length, desc, count, when, rank, round as round_\n",
77-
"from pyspark.sql.functions import sum as sum_\n",
78-
"from pyspark.sql.window import Window\n",
79-
"\n",
80-
"print(f\"Spark NLP version: {sparknlp.version()}\")"
81-
]
67+
"source": "import sparknlp\nfrom sparknlp.base import DocumentAssembler, Finisher\nfrom sparknlp.annotator import (\n Tokenizer, Normalizer, StopWordsCleaner, LemmatizerModel,\n SentimentDLModel, NerDLModel, NerConverter,\n SentenceDetector, WordEmbeddingsModel, UniversalSentenceEncoder\n)\nfrom pyspark.ml import Pipeline\nfrom pyspark.sql.functions import col, explode, explode_outer, length, desc, count, when, rank, round as round_, window\nfrom pyspark.sql.functions import sum as sum_\nfrom pyspark.sql.window import Window\n\nprint(f\"Spark NLP version: {sparknlp.version()}\")"
8268
},
8369
{
8470
"cell_type": "markdown",
@@ -459,30 +445,7 @@
459445
"execution_count": null,
460446
"metadata": {},
461447
"outputs": [],
462-
"source": [
463-
"comments_with_kw = keywords_model_fitted.transform(comments_stream)\n",
464-
"keywords_exploded_stream = comments_with_kw \\\n",
465-
" .select(\n",
466-
" col(\"id\"),\n",
467-
" explode(col(\"keywords\")).alias(\"keyword\")\n",
468-
" ) \\\n",
469-
" .groupBy(\n",
470-
" window(\n",
471-
" col(\"timestamp\"),\n",
472-
" \"10 minutes\",\n",
473-
" \"2 minutes\"\n",
474-
" ),\n",
475-
" col(\"keyword\")\n",
476-
" ) \\\n",
477-
" .count() \\\n",
478-
" .orderBy(desc(\"count\"))\n",
479-
"\n",
480-
"keywords_exploded_stream.writeStream \\\n",
481-
" .format(\"delta\") \\\n",
482-
" .outputMode(\"complete\") \\\n",
483-
" .option(\"checkpointLocation\", f\"{GOLD_PATH}/_checkpoints/keywords_windowed\") \\\n",
484-
" .start(f\"{GOLD_PATH}/keywords_real_time\")"
485-
]
448+
"source": "comments_with_kw = keywords_model_fitted.transform(comments_stream)\nkeywords_exploded_stream = comments_with_kw \\\n .select(\n col(\"id\"),\n col(\"timestamp\"),\n explode(col(\"keywords\")).alias(\"keyword\")\n ) \\\n .groupBy(\n window(\n col(\"timestamp\"),\n \"10 minutes\",\n \"2 minutes\"\n ),\n col(\"keyword\")\n ) \\\n .count()\n\nkeywords_exploded_stream.writeStream \\\n .format(\"delta\") \\\n .outputMode(\"complete\") \\\n .option(\"checkpointLocation\", f\"{GOLD_PATH}/_checkpoints/keywords_windowed\") \\\n .start(f\"{GOLD_PATH}/keywords_real_time\")"
486449
},
487450
{
488451
"cell_type": "markdown",
@@ -644,4 +607,4 @@
644607
},
645608
"nbformat": 4,
646609
"nbformat_minor": 4
647-
}
610+
}

0 commit comments

Comments
 (0)