From 3b2bc0e2a166bb9ee68475c405c2127d7db4fbc8 Mon Sep 17 00:00:00 2001
From: Usama Shahid <usamashahid.us8@gmail.com>
Date: Sun, 12 May 2024 08:43:15 +0100
Subject: [PATCH 1/2] changed hdbscan import from sklearn

the hdbscan library is issuing errors on macbooks, but HDBSCAN is available within sklearn, so reducing dependencies
---
 setup.py           | 1 -
 top2vec/Top2Vec.py | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index f5d45f2..1025e24 100644
--- a/setup.py
+++ b/setup.py
@@ -31,7 +31,6 @@
         'scikit-learn >= 1.2.0',
         'gensim >= 4.0.0',
         'umap-learn >= 0.5.1',
-        'hdbscan >= 0.8.27',
         'wordcloud',
     ],
     extras_require={
diff --git a/top2vec/Top2Vec.py b/top2vec/Top2Vec.py
index 712169c..a75ed84 100644
--- a/top2vec/Top2Vec.py
+++ b/top2vec/Top2Vec.py
@@ -9,7 +9,7 @@
 from gensim.parsing.preprocessing import strip_tags
 from gensim.models.phrases import Phrases
 import umap
-import hdbscan
+from sklearn.cluster import HDBSCAN
 from wordcloud import WordCloud
 import matplotlib.pyplot as plt
 from joblib import dump, load
@@ -1384,7 +1384,7 @@ def compute_topics(self,
             labels = cluster.fit_predict(umap_embedding)
 
         else:
-            cluster = hdbscan.HDBSCAN(**hdbscan_args).fit(umap_embedding)
+            cluster = HDBSCAN(**hdbscan_args).fit(umap_embedding)
             labels = cluster.labels_
 
         # calculate topic vectors from dense areas of documents

From 1534fe04d6c94205a4bc667c338fa322cbe77871 Mon Sep 17 00:00:00 2001
From: Usama Shahid <usamashahid.us8@gmail.com>
Date: Sun, 12 May 2024 09:07:32 +0100
Subject: [PATCH 2/2] updated imports

---
 top2vec/Top2Vec.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/top2vec/Top2Vec.py b/top2vec/Top2Vec.py
index a75ed84..f278796 100644
--- a/top2vec/Top2Vec.py
+++ b/top2vec/Top2Vec.py
@@ -9,11 +9,10 @@
 from gensim.parsing.preprocessing import strip_tags
 from gensim.models.phrases import Phrases
 import umap
-from sklearn.cluster import HDBSCAN
 from wordcloud import WordCloud
 import matplotlib.pyplot as plt
 from joblib import dump, load
-from sklearn.cluster import dbscan
+from sklearn.cluster import dbscan, HDBSCAN
 import tempfile
 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.preprocessing import normalize