-
Notifications
You must be signed in to change notification settings - Fork 9
Description
Hey - I've been trying to get this library to work. It's been a lot of trial and error, but this last (final?) issue doesn't seem solvable without installing a whole new Python environment (I'm on 3.12.2). Is it that sklearn has changed the way wrappers work??:
miniconda3/lib/python3.12/site-packages/antm/ctfidf.py:34: RuntimeWarning:
divide by zero encountered in divide
NotFittedError Traceback (most recent call last)
Cell In[18], line 1
----> 1 model.fit(save=True)
File ~/miniconda3/lib/python3.12/site-packages/antm/main.py:107, in ANTM.fit(self, save)
105 self.tokens, self.dictionary, self.corpus = text_processing(self.df.content.values)
106 print("Topic Representation is initialized...")
--> 107 self.output = ctfidf_rp(self.dictionary, self.documents_per_topic_per_time, num_doc=len(self.df),
108 num_words=self.num_words)
109 print("Topic Modeling is done")
110 self.evolving_topics = topic_evolution(self.list_tm, self.output)
File ~/miniconda3/lib/python3.12/site-packages/antm/topic_representation_layer.py:31, in ctfidf_rp(dictionary, documents_per_topic_per_time, num_doc, num_words)
29 words= count_vectorizer.get_feature_names_out()
30 count= count_vectorizer.transform(documents_per_topic_per_time.content)
---> 31 ctfidf= CTFIDFVectorizer().fit_transform(count, n_samples=num_doc).toarray()
32 topics_representations=ctf_idf_topics(documents_per_topic_per_time.cluster,words,ctfidf,num_words)
33 output = documents_per_topic_per_time.assign(topic_representation=topics_representations)
File ~/miniconda3/lib/python3.12/site-packages/sklearn/utils/_set_output.py:316, in _wrap_method_output..wrapped(self, X, *args, **kwargs)
314 @wraps(f)
315 def wrapped(self, X, *args, **kwargs):
--> 316 data_to_wrap = f(self, X, *args, **kwargs)
317 if isinstance(data_to_wrap, tuple):
318 # only wrap the first output for cross decomposition
319 return_tuple = (
320 _wrap_data_with_container(method, data_to_wrap[0], X, self),
321 *data_to_wrap[1:],
322 )
File ~/miniconda3/lib/python3.12/site-packages/sklearn/base.py:1098, in TransformerMixin.fit_transform(self, X, y, **fit_params)
1083 warnings.warn(
1084 (
1085 f"This object ({self.class.name}) has a transform"
(...)
1093 UserWarning,
1094 )
1096 if y is None:
1097 # fit method of arity 1 (unsupervised transformation)
-> 1098 return self.fit(X, **fit_params).transform(X)
1099 else:
1100 # fit method of arity 2 (supervised transformation)
1101 return self.fit(X, y, **fit_params).transform(X)
File ~/miniconda3/lib/python3.12/site-packages/sklearn/utils/_set_output.py:316, in _wrap_method_output..wrapped(self, X, *args, **kwargs)
314 @wraps(f)
315 def wrapped(self, X, *args, **kwargs):
--> 316 data_to_wrap = f(self, X, *args, **kwargs)
317 if isinstance(data_to_wrap, tuple):
318 # only wrap the first output for cross decomposition
319 return_tuple = (
320 _wrap_data_with_container(method, data_to_wrap[0], X, self),
321 *data_to_wrap[1:],
322 )
File ~/miniconda3/lib/python3.12/site-packages/antm/ctfidf.py:65, in CTFIDFVectorizer.transform(self, X, copy)
60 n_samples, n_features = X.shape
62 # idf_ being a property, the automatic attributes detection
63 # does not work as usual and we need to specify the attribute
64 # name:
---> 65 check_is_fitted(self, attributes=["idf_"],
66 msg='idf vector is not fitted')
68 # Check if expected nr features is found
69 expected_n_features = self._idf_diag.shape[0]
File ~/miniconda3/lib/python3.12/site-packages/sklearn/utils/validation.py:1661, in check_is_fitted(estimator, attributes, msg, all_or_any)
1658 raise TypeError("%s is not an estimator instance." % (estimator))
1660 if not _is_fitted(estimator, attributes, all_or_any):
-> 1661 raise NotFittedError(msg % {"name": type(estimator).name})
NotFittedError: idf vector is not fitted