exasol
diff --git a/‎doc/changes/changes_2.2.0.md‎
Lines changed: 4 additions & 16 deletions b/‎doc/changes/changes_2.2.0.md‎
Lines changed: 4 additions & 16 deletions
diff --git a/‎doc/developer_guide/developer_guide.md‎
Lines changed: 1 addition & 0 deletions b/‎doc/developer_guide/developer_guide.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎exasol_transformers_extension/deployment/constants.py‎
Lines changed: 1 addition & 3 deletions b/‎exasol_transformers_extension/deployment/constants.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎exasol_transformers_extension/deployment/work_with_spans_constants.py‎
Lines changed: 2 additions & 0 deletions b/‎exasol_transformers_extension/deployment/work_with_spans_constants.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎exasol_transformers_extension/deployment/work_without_spans_constants.py‎
Lines changed: 2 additions & 0 deletions b/‎exasol_transformers_extension/deployment/work_without_spans_constants.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎exasol_transformers_extension/resources/templates/with_spans/span_token_classification_udf.jinja.sql‎
Lines changed: 3 additions & 3 deletions b/‎exasol_transformers_extension/resources/templates/with_spans/span_token_classification_udf.jinja.sql‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎exasol_transformers_extension/resources/templates/with_spans/span_zero_shot_text_classification_udf.jinja.sql‎
Lines changed: 26 additions & 0 deletions b/‎exasol_transformers_extension/resources/templates/with_spans/span_zero_shot_text_classification_udf.jinja.sql‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎…o_shot_text_classification_udf.jinja.sql‎ ‎…o_shot_text_classification_udf.jinja.sql‎exasol_transformers_extension/resources/templates/zero_shot_text_classification_udf.jinja.sql renamed to exasol_transformers_extension/resources/templates/without_spans/zero_shot_text_classification_udf.jinja.sql
Lines changed: 1 addition & 1 deletion b/‎…o_shot_text_classification_udf.jinja.sql‎ ‎…o_shot_text_classification_udf.jinja.sql‎exasol_transformers_extension/resources/templates/zero_shot_text_classification_udf.jinja.sql renamed to exasol_transformers_extension/resources/templates/without_spans/zero_shot_text_classification_udf.jinja.sql
Lines changed: 1 addition & 1 deletion
diff --git a/‎exasol_transformers_extension/udfs/callers/with_spans/span_zero_shot_text_classification_udf_call.py‎
Lines changed: 8 additions & 0 deletions b/‎exasol_transformers_extension/udfs/callers/with_spans/span_zero_shot_text_classification_udf_call.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎exasol_transformers_extension/udfs/models/token_classification_udf.py‎
Lines changed: 4 additions & 4 deletions b/‎exasol_transformers_extension/udfs/models/token_classification_udf.py‎
Lines changed: 4 additions & 4 deletions
@@ -1,30 +1,18 @@
-# Transformers Extension 2.2.0, T.B.D
+# Transformers Extension 2.2.0, 2025-01-21
 
-Code name: T.B.D
+Code name: Bugfix for token classification
 
 ## Summary
 
-T.B.D
-
-### Features
-
-n/a
+This release includes a bugfix for handling unexpected results in the token classification udf, 
+as well as internal refactorings for the unit tests.
 
 ### Bugs
 
 - #272: Fixed unit tests assertions not working correctly
 - #275: Fixed a bug where models returning unexpected results was not handled correctly
 
-### Documentation
-
-n/a
-
 ### Refactorings
 
 - #273: Refactored unit tests for token_classification_udf to use StandAloneUDFMock, made params files more maintainable
 - #271: Moved test cases which only pertain to the base udf to base udf unit tests
-- #274: Refactored unit tests for zero_shot_text_classification_udf to use StandAloneUDFMock, made params files more maintainable
-
-### Security
-
-n/a
@@ -82,6 +82,7 @@ inference output to the inputs.
 Before implementing the UDF logic (examined in item 4 in this section), the 
 `run` function responsible for calling the newly created UDF script should be 
 defined in `exasol_transformers_extension/udfs/callers/`.
+Also add the new udf to the lists in tests/utils/db_queries.py
 
 ### 3. UDF Template-Caller Matching 
 The added UDF template and defined UDF caller should be added to the dictionary
 
@@ -15,9 +15,7 @@
     "text_generation_udf_call.py":
         "text_generation_udf.jinja.sql",
     "translation_udf_call.py":
-        "translation_udf.jinja.sql",
-    "zero_shot_text_classification_udf.py":
-        "zero_shot_text_classification_udf.jinja.sql"
+        "translation_udf.jinja.sql"
 }
 
 constants = InstallScriptsConstants(
 
@@ -5,6 +5,8 @@
 UDF_CALL_TEMPLATES = {
     "span_token_classification_udf_call.py":
         "span_token_classification_udf.jinja.sql",
+    "span_zero_shot_text_classification_udf_call.py":
+        "span_zero_shot_text_classification_udf.jinja.sql"
 }
 
 work_with_spans_constants = InstallScriptsConstants(
 
@@ -5,6 +5,8 @@
 UDF_CALL_TEMPLATES = {
     "token_classification_udf_call.py":
         "token_classification_udf.jinja.sql",
+    "zero_shot_text_classification_udf.py":
+        "zero_shot_text_classification_udf.jinja.sql"
 }
 
 work_without_spans_constants = InstallScriptsConstants(
 
@@ -4,7 +4,7 @@ CREATE OR REPLACE {{ language_alias }} SET SCRIPT "TE_TOKEN_CLASSIFICATION_UDF_W
     sub_dir VARCHAR(2000000),
     model_name VARCHAR(2000000),
     text_data VARCHAR(2000000),
-    text_data_docid INTEGER,
+    text_data_doc_id INTEGER,
     text_data_char_begin INTEGER,
     text_data_char_end INTEGER,
     aggregation_strategy VARCHAR(2000000)
@@ -13,14 +13,14 @@ CREATE OR REPLACE {{ language_alias }} SET SCRIPT "TE_TOKEN_CLASSIFICATION_UDF_W
     bucketfs_conn VARCHAR(2000000),
     sub_dir VARCHAR(2000000),
     model_name VARCHAR(2000000),
-    text_data_docid INTEGER,
+    text_data_doc_id INTEGER,
     text_data_char_begin INTEGER,
     text_data_char_end INTEGER,
     aggregation_strategy VARCHAR(2000000),
     entity_covered_text VARCHAR(2000000),
     entity_type VARCHAR(2000000),
     score DOUBLE,
-    entity_docid INTEGER,
+    entity_doc_id INTEGER,
     entity_char_begin INTEGER,
     entity_char_end INTEGER,
     error_message VARCHAR(2000000) ) AS
 
@@ -0,0 +1,26 @@
+CREATE OR REPLACE {{ language_alias }} SET SCRIPT "TE_ZERO_SHOT_TEXT_CLASSIFICATION_UDF_WITH_SPAN"(
+    device_id INTEGER,
+    bucketfs_conn VARCHAR(2000000),
+    sub_dir VARCHAR(2000000),
+    model_name VARCHAR(2000000),
+    text_data VARCHAR(2000000),
+    text_data_doc_id INTEGER,
+    text_data_char_begin INTEGER,
+    text_data_char_end INTEGER,
+    candidate_labels VARCHAR(2000000)
+    ORDER BY {{ ordered_columns | join(" ASC,") }} ASC
+)EMITS (
+    bucketfs_conn VARCHAR(2000000),
+    sub_dir VARCHAR(2000000),
+    model_name VARCHAR(2000000),
+    text_data_doc_id INTEGER,
+    text_data_char_begin INTEGER,
+    text_data_char_end INTEGER,
+    label VARCHAR(2000000),
+    score DOUBLE,
+    rank INTEGER,
+    error_message VARCHAR(2000000) ) AS
+
+{{ script_content }}
+
+/
@@ -10,7 +10,7 @@ CREATE OR REPLACE {{ language_alias }} SET SCRIPT "TE_ZERO_SHOT_TEXT_CLASSIFICAT
     bucketfs_conn VARCHAR(2000000),
     sub_dir VARCHAR(2000000),
     model_name VARCHAR(2000000),
-    test_data VARCHAR(2000000),
+    text_data VARCHAR(2000000),
     candidate_labels VARCHAR(2000000),
     label VARCHAR(2000000),
     score DOUBLE,
 
@@ -0,0 +1,8 @@
+from exasol_transformers_extension.udfs.models.zero_shot_text_classification_udf import \
+    ZeroShotTextClassificationUDF
+
+udf = ZeroShotTextClassificationUDF(exa, work_with_spans=True)
+
+
+def run(ctx):
+    return udf.run(ctx)
@@ -79,7 +79,7 @@ def execute_prediction(self, model_df: pd.DataFrame) -> List[List[Dict[str, Any]
 
     def create_new_span_columns(self, model_df: pd.DataFrame) -> pd.DataFrame:
         # create new columns for use with spans
-        model_df[["entity_docid", "entity_char_begin", "entity_char_end"]] = None, None, None
+        model_df[["entity_doc_id", "entity_char_begin", "entity_char_end"]] = None, None, None
         # we use different names in udf with span and without, so need to rename
         # this decision was made as to improve the naming of the columns without
         # breaking the interface of the existing udf
@@ -95,10 +95,10 @@ def drop_old_data_for_span_execution(self, model_df: pd.DataFrame) -> pd.DataFra
         return model_df
 
     def make_entity_span(self, df_row):
-        token_docid = df_row["text_data_docid"]
+        token_doc_id = df_row["text_data_doc_id"]
         token_char_begin = df_row["start_pos"] + df_row['text_data_char_begin']
         token_char_end = df_row["end_pos"] + df_row['text_data_char_begin']
-        return pd.Series([token_docid, token_char_begin, token_char_end])
+        return pd.Series([token_doc_id, token_char_begin, token_char_end])
 
     def append_predictions_to_input_dataframe(
             self, model_df: pd.DataFrame, pred_df_list: List[pd.DataFrame]) \
@@ -124,7 +124,7 @@ def append_predictions_to_input_dataframe(
 
         if self.work_with_spans:
             model_df = self.create_new_span_columns(model_df)
-            model_df[["entity_docid", "entity_char_begin", "entity_char_end"]] =\
+            model_df[["entity_doc_id", "entity_char_begin", "entity_char_end"]] =\
                 model_df.apply(self.make_entity_span, axis=1)
             model_df = self.drop_old_data_for_span_execution(model_df)
         return model_df
Original file line number	Diff line number	Diff line change
`@@ -5,6 +5,8 @@`
`5`	`5`	`UDF_CALL_TEMPLATES = {`
`6`	`6`	`"span_token_classification_udf_call.py":`
`7`	`7`	`"span_token_classification_udf.jinja.sql",`
	`8`	`+ "span_zero_shot_text_classification_udf_call.py":`
	`9`	`+ "span_zero_shot_text_classification_udf.jinja.sql"`
`8`	`10`	`}`
`9`	`11`
`10`	`12`	`work_with_spans_constants = InstallScriptsConstants(`
Original file line number	Diff line number	Diff line change
`@@ -5,6 +5,8 @@`
`5`	`5`	`UDF_CALL_TEMPLATES = {`
`6`	`6`	`"token_classification_udf_call.py":`
`7`	`7`	`"token_classification_udf.jinja.sql",`
	`8`	`+ "zero_shot_text_classification_udf.py":`
	`9`	`+ "zero_shot_text_classification_udf.jinja.sql"`
`8`	`10`	`}`
`9`	`11`
`10`	`12`	`work_without_spans_constants = InstallScriptsConstants(`