jbesomi · jbesomi · Sep 21, 2020 · Aug 18, 2020 · Aug 19, 2020 · Aug 19, 2020
diff --git a/tests/test_representation.py b/tests/test_representation.py
@@ -120,13 +120,13 @@ def _tfidf(term, corpus, document_index):
 
 
 vector_s = pd.Series([[1.0, 0.0], [0.0, 0.0]], index=[5, 7])
-document_term_df = pd.DataFrame(
-    [[1.0, 0.0], [0.0, 0.0]], index=[5, 7], columns=["a", "b"],
-).astype("Sparse[float64, nan]")
+df = pd.DataFrame([[1.0, 0.0], [0.0, 0.0]], index=[5, 7], columns=["a", "b"],).astype(
+    "Sparse[float64, nan]"
+)
 
 
 test_cases_dim_reduction_and_clustering = [
-    # format: [function_name, function, correct output for s_vector_series and s_documenttermDF input above]
+    # format: [function_name, function, correct output for s_vector_series and df input above]
     ["pca", representation.pca, pd.Series([[-0.5, 0.0], [0.5, 0.0]], index=[5, 7],),],
     [
         "nmf",
@@ -232,7 +232,7 @@ def test_dim_reduction_and_clustering_with_vector_series_input(
         )
 
     @parameterized.expand(test_cases_dim_reduction_and_clustering)
-    def test_dim_reduction_and_clustering_with_documenttermDF_input(
+    def test_dim_reduction_and_clustering_with_dataframe_input(
         self, name, test_function, correct_output
     ):
         s_true = correct_output
@@ -242,11 +242,11 @@ def test_dim_reduction_and_clustering_with_documenttermDF_input(
             return
 
         if name == "kmeans":
-            result_s = test_function(document_term_df, random_state=42, n_clusters=2)
-        elif name == "dbscan" or name == "meanshift":
-            result_s = test_function(document_term_df)
+            result_s = test_function(df, random_state=42, n_clusters=2)
+        elif name == "dbscan" or name == "meanshift" or name == "normalize":
+            result_s = test_function(df)
         else:
-            result_s = test_function(document_term_df, random_state=42)
+            result_s = test_function(df, random_state=42)
 
         pd.testing.assert_series_equal(
             s_true,
@@ -257,10 +257,10 @@ def test_dim_reduction_and_clustering_with_documenttermDF_input(
             check_category_order=False,
         )
 
-    def test_normalize_document_term_df_also_as_output(self):
-        # normalize should also return DocumentTermDF output for DocumentTermDF
+    def test_normalize_DataFrame_also_as_output(self):
+        # normalize should also return DataFrame output for DataFrame
         # input so we test it separately
-        result = representation.normalize(document_term_df)
+        result = representation.normalize(df)
         correct_output = pd.DataFrame(
             [[1.0, 0.0], [0.0, 0.0]], index=[5, 7], columns=["a", "b"],
         )

diff --git a/tests/test_types.py b/tests/test_types.py
@@ -72,20 +72,13 @@ def f(s):
         except TypeError:
             self.fail("Failed although input type is correct.")
 
-    def test_inputseries_correct_type_documentrepresentationseries(self):
-        @_types.InputSeries(_types.RepresentationSeries)
+    def test_inputseries_correct_type_DataFrame(self):
+        @_types.InputSeries(_types.DataFrame)
         def f(s):
             pass
 
         try:
-            f(
-                pd.Series(
-                    [1, 2, 3],
-                    index=pd.MultiIndex.from_tuples(
-                        [("doc1", "word1"), ("doc1", "word2"), ("doc2", "word1")]
-                    ),
-                )
-            )
+            f(pd.DataFrame([[1, 2, 3]], columns=["a", "b", "c"], dtype="Sparse",))
         except TypeError:
             self.fail("Failed although input type is correct.")
 
@@ -118,3 +111,23 @@ def f(s):
             f(pd.Series([np.nan, pd.NA, [0, 1, 2]]))
         except TypeError:
             self.fail("Failed although input type is correct.")
+
+    def test_several_possible_types_correct_type(self):
+        @_types.InputSeries([_types.DataFrame, _types.VectorSeries])
+        def f(x):
+            pass
+
+        try:
+            f(pd.DataFrame([[1, 2, 3]], columns=["a", "b", "c"], dtype="Sparse",))
+
+            f(pd.Series([[1.0, 2.0]]))
+
+        except TypeError:
+            self.fail("Failed although input type is correct.")
+
+    def test_several_possible_types_wrong_type(self):
+        @_types.InputSeries([_types.DataFrame, _types.VectorSeries])
+        def f(x):
+            pass
+
+        self.assertRaises(TypeError, f, pd.Series([["token", "ized"]]))