|
22 | 22 | SOFTWARE.
|
23 | 23 | ----------------------------------------------------------------------------------------------
|
24 | 24 | """
|
| 25 | + |
25 | 26 | import logging
|
26 | 27 | import os
|
27 | 28 | import pickle # for saving the embeddings cache
|
|
42 | 43 | import plotly.express as px # for plots
|
43 | 44 | import plotly.graph_objs as go # for plot object type
|
44 | 45 | import requests
|
45 |
| - from sklearn.model_selection import train_test_split # for splitting train & test data |
| 46 | + from sklearn.model_selection import ( |
| 47 | + train_test_split, |
| 48 | + ) # for splitting train & test data |
46 | 49 | import torch # for matrix optimization
|
47 | 50 | from tenacity import retry, stop_after_attempt, wait_random_exponential
|
48 | 51 |
|
@@ -243,8 +246,14 @@ def test_df_negatives(base_test_df: pd.DataFrame) -> pd.DataFrame:
|
243 | 246 |
|
244 | 247 |
|
245 | 248 | @parameterize(
|
246 |
| - train_df={"base_df": source("base_train_df"), "df_negatives": source("train_df_negatives")}, |
247 |
| - test_df={"base_df": source("base_test_df"), "df_negatives": source("test_df_negatives")}, |
| 249 | + train_df={ |
| 250 | + "base_df": source("base_train_df"), |
| 251 | + "df_negatives": source("train_df_negatives"), |
| 252 | + }, |
| 253 | + test_df={ |
| 254 | + "base_df": source("base_test_df"), |
| 255 | + "df_negatives": source("test_df_negatives"), |
| 256 | + }, |
248 | 257 | )
|
249 | 258 | def construct_df(
|
250 | 259 | base_df: pd.DataFrame,
|
@@ -631,7 +640,9 @@ def mse_loss(predictions, targets):
|
631 | 640 | @inject(
|
632 | 641 | optimization_result_matrices=group(*[source(k) for k in optimization_parameterization.keys()])
|
633 | 642 | )
|
634 |
| -def optimization_results(optimization_result_matrices: List[pd.DataFrame]) -> pd.DataFrame: |
| 643 | +def optimization_results( |
| 644 | + optimization_result_matrices: List[pd.DataFrame], |
| 645 | +) -> pd.DataFrame: |
635 | 646 | """Combine optimization results into one dataframe."""
|
636 | 647 | return pd.concat(optimization_result_matrices)
|
637 | 648 |
|
@@ -685,7 +696,9 @@ def customized_embeddings_dataframe(
|
685 | 696 | return embedded_data_set
|
686 | 697 |
|
687 | 698 |
|
688 |
| -def customized_dataset_histogram(customized_embeddings_dataframe: pd.DataFrame) -> go.Figure: |
| 699 | +def customized_dataset_histogram( |
| 700 | + customized_embeddings_dataframe: pd.DataFrame, |
| 701 | +) -> go.Figure: |
689 | 702 | """Plot histogram of cosine similarities for the new customized embeddings.
|
690 | 703 |
|
691 | 704 | The graphs show how much the overlap there is between the distribution of cosine similarities for similar and
|
|
0 commit comments