@@ -46,7 +46,7 @@ according to your hardware to avoid out-of-memory errors.
4646{:.table-model-big}
4747| Parameter | Description | Default |
4848| ---| ---| ---|
49- | ` modelName ` | Name of the AutoGGUF model to load for entity extraction | ` "qwen3_4b_bf16_gguf" ` |
49+ | ` pretrained ` | Name of the AutoGGUF model to load for entity extraction | ` "qwen3_4b_bf16_gguf" ` |
5050| ` entityTypes ` | List of entity types to extract (used in prompt) | ` ["PERSON", "ORGANIZATION", "LOCATION", "DATE", "TIME"] ` |
5151| ` promptTemplate ` | Custom prompt template for entity extraction. Use ` {entityTypes} ` and ` {examples} ` placeholders | Built-in default prompt |
5252| ` fewShotExamples ` | Few-shot examples as array of ` (input_text, json_output) ` tuples to guide the model | Empty array |
@@ -86,9 +86,9 @@ medical_examples = [
8686]
8787
8888entityExtractor = LLMEntityExtractor() \
89+ .pretrained("qwen3_4b_bf16_gguf") \
8990 .setInputCols([ "document"] ) \
9091 .setOutputCol("entities") \
91- .setModelName("qwen3_4b_bf16_gguf") \
9292 .setEntityTypes([ "MEDICATION", "DOSAGE", "ROUTE", "FREQUENCY", "PERSON", "ORGANIZATION"] ) \
9393 .setFewShotExamples(medical_examples) \
9494 .setNPredict(500) \
@@ -114,6 +114,7 @@ result.select(
114114 F.col("entity.metadata.entity").alias("entity_type"),
115115 F.col("entity.metadata.chunk").alias("chunk_index")
116116).show(truncate=False)
117+
117118+-------------------+-----+---+------------+-----------+
118119| text | begin| end| entity_type | chunk_index|
119120+-------------------+-----+---+------------+-----------+
@@ -152,9 +153,9 @@ val medicalExamples = Array(
152153)
153154
154155val entityExtractor = new LLMEntityExtractor()
156+ .pretrained("qwen3_4b_bf16_gguf")
155157 .setInputCols("document")
156158 .setOutputCol("entities")
157- .setModelName("qwen3_4b_bf16_gguf")
158159 .setEntityTypes(Array("MEDICATION", "DOSAGE", "ROUTE", "FREQUENCY", "PERSON", "ORGANIZATION"))
159160 .setFewShotExamples(medicalExamples)
160161 .setNPredict(500)
@@ -180,6 +181,7 @@ result.select(
180181 F.col("entity.metadata.entity").alias("entity_type"),
181182 F.col("entity.metadata.chunk").alias("chunk_index")
182183).show(truncate=False)
184+
183185+-------------------+-----+---+------------+-----------+
184186| text | begin| end| entity_type | chunk_index|
185187+-------------------+-----+---+------------+-----------+
@@ -220,4 +222,4 @@ scala_example=scala_example
220222api_link=api_link
221223python_api_link=python_api_link
222224source_link=source_link
223- %}
225+ %}
0 commit comments