Skip to content

Commit 507c5cf

Browse files
Predictor fixes (#70)
* Add textual dependency and update predictor configurations * Implement SVO extraction and enhance predictor configurations * Remove outdated TPP data file and add README files for Personachat and Human Conversation datasets. Start of re-organization to support localization of ConvAssist * Add argument picking to VSCode launch configuration * Remove unused startwords property from GeneralWordPredictor * Add startwords property to SmoothedNgramPredictor for resource path management * Remove SVOPredictor class and associated methods from smoothed_ngram_predictor * Update default values for startsents and startwords to filenames in Predictor class * Improve HNSWLIB index initialization and add progress tracking with tqdm * Replace SVOPredictor with SmoothedNgramPredictor in predictor registry * Updated Poetry Lock file with latest dependency updates. * Add utility for converting parquet to CSV and enhance logging in PredictorActivator
1 parent 68edb65 commit 507c5cf

File tree

22 files changed

+290
-328
lines changed

22 files changed

+290
-328
lines changed

.vscode/launch.json

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222
"justMyCode": false,
2323
"env": {
2424
"PYTHONPATH": "${workspaceFolder}"
25-
}
25+
},
26+
"args": "${command:pickArgs}"
2627
},
2728
{
2829
"name": "ConvAssist",
@@ -41,7 +42,9 @@
4142
"name": "Python Debugger: Python File",
4243
"type": "debugpy",
4344
"request": "launch",
44-
"program": "${file}"
45+
"program": "${file}",
46+
"justMyCode": false,
47+
4548
},
4649
{
4750
"name": "Python: Debug Tests",

3rd_party_resources/human-conversation/LICENSE renamed to 3rd_party_resources/localizations/en/human-conversation/LICENSE

File renamed without changes.

3rd_party_resources/human-conversation/README.md renamed to 3rd_party_resources/localizations/en/human-conversation/README.md

File renamed without changes.
File renamed without changes.
File renamed without changes.

3rd_party_resources/third_party_programs.md

Lines changed: 0 additions & 1 deletion
This file was deleted.

3rd_party_resources/utils/database_generator.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import argparse
2+
import os
23
from typing import List
34

45
from tqdm import tqdm
@@ -51,7 +52,6 @@ def configure():
5152
#flag to clean the database
5253

5354
parser.add_argument(
54-
"-c",
5555
"--clean",
5656
action="store_true",
5757
help="Whether to clean the database"
@@ -89,14 +89,20 @@ def main(argv=None):
8989
if response.lower() != 'y':
9090
print("Exiting...")
9191
return
92+
else:
93+
print("Cleaning database...")
94+
os.remove(args.database)
9295

9396
phrases = []
9497

9598
with open(args.input_file) as f:
9699
for line in f:
97100
phrases.append(line.strip())
98101

102+
99103
with NGramUtil(args.database, args.cardinality, args.lowercase, args.normalize) as ngramutil:
104+
ngramutil.create_update_ngram_tables()
105+
100106
threads = []
101107
for i in range(args.cardinality):
102108
p = Thread(target=insertngrambycardinality, args=(ngramutil, phrases, i + 1))

3rd_party_resources/utils/utils.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import pandas as pd
2+
3+
df = pd.read_parquet('train-00000-of-00001-aaf72b9960b78228.parquet')
4+
df.to_csv('data.csv', index=False)
5+

convassist/ConvAssist.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def initialize(
6161
self.set_predictors()
6262

6363
self.predictor_activator = PredictorActivator(
64-
self.config, self.predictor_registry, self.context_tracker, self.logger
64+
self.name, self.config, self.predictor_registry, self.context_tracker #, self.logger
6565
)
6666
self.predictor_activator.combination_policy = "meritocracy"
6767

convassist/predictor/predictor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ def __init__(
5858
self._sentence_transformer_model: str = "" # Path
5959
self._sentences_db: str = "" # Path
6060
self._spellingdatabase: str = "" # Path
61-
self._startsents: str = "" # Path
62-
self._startwords: str = "" # Path
61+
self._startsents: str = "start_sentences.txt" # Filename
62+
self._startwords: str = "start_words.txt" # Filename
6363
self._static_resources_path: str = ""
6464
self._stopwords: str = "" # Path
6565
self._test_generalsentenceprediction: bool = False

0 commit comments

Comments
 (0)