Skip to content

Commit 81c919b

Browse files
Update llama_cpp_in_Spark_NLP_LLMEntityExtractor.ipynb (#14756)
* Update llama_cpp_in_Spark_NLP_LLMEntityExtractor.ipynb * Update LLMEntityExtractor.md
1 parent 846fae6 commit 81c919b

2 files changed

Lines changed: 131 additions & 98 deletions

File tree

docs/en/annotator_entries/LLMEntityExtractor.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ according to your hardware to avoid out-of-memory errors.
4646
{:.table-model-big}
4747
| Parameter | Description | Default |
4848
|---|---|---|
49-
| `modelName` | Name of the AutoGGUF model to load for entity extraction | `"qwen3_4b_bf16_gguf"` |
49+
| `pretrained` | Name of the AutoGGUF model to load for entity extraction | `"qwen3_4b_bf16_gguf"` |
5050
| `entityTypes` | List of entity types to extract (used in prompt) | `["PERSON", "ORGANIZATION", "LOCATION", "DATE", "TIME"]` |
5151
| `promptTemplate` | Custom prompt template for entity extraction. Use `{entityTypes}` and `{examples}` placeholders | Built-in default prompt |
5252
| `fewShotExamples` | Few-shot examples as array of `(input_text, json_output)` tuples to guide the model | Empty array |
@@ -86,9 +86,9 @@ medical_examples = [
8686
]
8787

8888
entityExtractor = LLMEntityExtractor() \
89+
.pretrained("qwen3_4b_bf16_gguf") \
8990
.setInputCols(["document"]) \
9091
.setOutputCol("entities") \
91-
.setModelName("qwen3_4b_bf16_gguf") \
9292
.setEntityTypes(["MEDICATION", "DOSAGE", "ROUTE", "FREQUENCY", "PERSON", "ORGANIZATION"]) \
9393
.setFewShotExamples(medical_examples) \
9494
.setNPredict(500) \
@@ -114,6 +114,7 @@ result.select(
114114
F.col("entity.metadata.entity").alias("entity_type"),
115115
F.col("entity.metadata.chunk").alias("chunk_index")
116116
).show(truncate=False)
117+
117118
+-------------------+-----+---+------------+-----------+
118119
|text |begin|end|entity_type |chunk_index|
119120
+-------------------+-----+---+------------+-----------+
@@ -152,9 +153,9 @@ val medicalExamples = Array(
152153
)
153154

154155
val entityExtractor = new LLMEntityExtractor()
156+
.pretrained("qwen3_4b_bf16_gguf")
155157
.setInputCols("document")
156158
.setOutputCol("entities")
157-
.setModelName("qwen3_4b_bf16_gguf")
158159
.setEntityTypes(Array("MEDICATION", "DOSAGE", "ROUTE", "FREQUENCY", "PERSON", "ORGANIZATION"))
159160
.setFewShotExamples(medicalExamples)
160161
.setNPredict(500)
@@ -180,6 +181,7 @@ result.select(
180181
F.col("entity.metadata.entity").alias("entity_type"),
181182
F.col("entity.metadata.chunk").alias("chunk_index")
182183
).show(truncate=False)
184+
183185
+-------------------+-----+---+------------+-----------+
184186
|text |begin|end|entity_type |chunk_index|
185187
+-------------------+-----+---+------------+-----------+
@@ -220,4 +222,4 @@ scala_example=scala_example
220222
api_link=api_link
221223
python_api_link=python_api_link
222224
source_link=source_link
223-
%}
225+
%}

0 commit comments

Comments
 (0)