diff --git a/app/criteria/comparison_speech_slides/criterion.py b/app/criteria/comparison_speech_slides/criterion.py
new file mode 100644
index 00000000..c328988b
--- /dev/null
+++ b/app/criteria/comparison_speech_slides/criterion.py
@@ -0,0 +1,90 @@
+from bson import ObjectId
+
+from app.root_logger import get_root_logger
+from app.localisation import *
+from ..criterion_base import BaseCriterion
+from ..criterion_result import CriterionResult
+from app.audio import Audio
+from app.presentation import Presentation
+from app.utils import normalize_text, delete_punctuation
+from ..text_comparison import SlidesSimilarityEvaluator
+
+logger = get_root_logger('web')
+
+
+# Критерий, оценивающий, насколько текст слайда перекликается с речью студента на этом слайде
+class ComparisonSpeechSlidesCriterion(BaseCriterion):
+ PARAMETERS = dict(
+ skip_slides=list.__name__,
+ )
+
+ def __init__(self, parameters, dependent_criteria, name=''):
+ super().__init__(
+ name=name,
+ parameters=parameters,
+ dependent_criteria=dependent_criteria,
+ )
+ self.evaluator = SlidesSimilarityEvaluator()
+ if 'slide_speech_threshold' not in self.parameters:
+ self.parameters['slide_speech_threshold'] = 0.125
+
+ @property
+ def description(self):
+ return {
+ "Критерий": t(self.name),
+ "Описание": t(
+ "Проверяет, что текст слайда соответствует словам, которые произносит студент во время демонстрации "
+ "этого слайда"),
+ "Оценка": t("1, если среднее значение соответствия речи содержимому слайдов равно или превосходит заданного порога (от 0 до 1), "
+ "иначе r / значение порога, где r - среднее значение соответствия речи демонстрируемым слайдам")
+ }
+
+ def skip_slide(self, current_slide_text: str) -> bool:
+ for skip_slide in self.parameters['skip_slides']:
+ if skip_slide.lower() in delete_punctuation(current_slide_text).lower():
+ return True
+ return False
+
+ def apply(self, audio: Audio, presentation: Presentation, training_id: ObjectId,
+ criteria_results: dict) -> CriterionResult:
+ # Результаты сравнения текстов
+ results = {}
+
+ slides_to_process = []
+
+ for current_slide_index in range(len(audio.audio_slides)):
+ # Список слов, сказанных студентом на данном слайде -- список из RecognizedWord
+ current_slide_speech = audio.audio_slides[current_slide_index].recognized_words
+ # Удаление time_stamp-ов и probability, ибо работа будет вестись только со словами
+ current_slide_speech = list(map(lambda x: x.word.value, current_slide_speech))
+ # Нормализация текста выступления
+ current_slide_speech = " ".join(normalize_text(current_slide_speech))
+
+ # Если на данном слайде ничего не сказано, то не обрабатываем данный слайд
+ if len(current_slide_speech.split()) == 0:
+ results[current_slide_index + 1] = 0.000
+ continue
+
+ # Список слов со слайда презентации
+ current_slide_text = presentation.slides[current_slide_index].words
+ # Проверяем, входит ли рассматриваемый слайд в список нерасмматриваемых
+ if self.skip_slide(current_slide_text):
+ logger.info(f"Слайд №{current_slide_index + 1} пропущен")
+ continue
+
+ # Нормализация текста слайда
+ current_slide_text = " ".join(normalize_text(current_slide_text.split()))
+ slides_to_process.append((current_slide_speech, current_slide_text, current_slide_index + 1))
+
+ self.evaluator.train_model([" ".join(list(map(lambda x: x[0], slides_to_process))), " ".join(list(map(lambda x: x[1], slides_to_process)))])
+
+ for speech, slide_text, slide_number in slides_to_process:
+ results[slide_number] = self.evaluator.evaluate_semantic_similarity(speech, slide_text)
+
+ results = dict(sorted(results.items()))
+
+ score = (sum(list(results.values())) / len(list(results.values()))) / self.parameters['slide_speech_threshold']
+
+ return CriterionResult(1 if score >= 1 else score, "Отлично" if score >= 1 else "Следует уделить внимание "
+ "соотвествию речи на слайдах "
+ "{}".format(",\n".join([f"№{n} - {results[n]}" for n in dict(filter(lambda item: item[1] < self.parameters['slide_speech_threshold'], results.items()))])))
diff --git a/app/criteria/comparison_whole_speech/criterion.py b/app/criteria/comparison_whole_speech/criterion.py
new file mode 100644
index 00000000..f0cf6227
--- /dev/null
+++ b/app/criteria/comparison_whole_speech/criterion.py
@@ -0,0 +1,84 @@
+from bson import ObjectId
+
+from app.root_logger import get_root_logger
+from app.localisation import *
+from ..criterion_base import BaseCriterion
+from ..criterion_result import CriterionResult
+from app.audio import Audio
+from app.presentation import Presentation
+from app.utils import normalize_text
+from ..text_comparison import Doc2VecEvaluator
+
+logger = get_root_logger('web')
+
+
+class ComparisonWholeSpeechCriterion(BaseCriterion):
+ PARAMETERS = dict(
+ vector_size=int.__name__,
+ window=int.__name__,
+ min_count=int.__name__,
+ workers=int.__name__,
+ epochs=int.__name__,
+ dm=int.__name__,
+ )
+
+ def __init__(self, parameters, dependent_criteria, name=''):
+ super().__init__(
+ name=name,
+ parameters=parameters,
+ dependent_criteria=dependent_criteria,
+ )
+ vector_size = self.parameters['vector_size']
+ window = self.parameters['window']
+ min_count = self.parameters['min_count']
+ workers = self.parameters['workers']
+ epochs = self.parameters['epochs']
+ dm = self.parameters['dm']
+
+ self.model = Doc2VecEvaluator(vector_size, window, min_count, workers, epochs, dm)
+
+ @property
+ def description(self):
+ return {
+ "Критерий": t(self.name),
+ "Описание": t("Проверяет, что тема доклада студента совпадает с темой презентации"),
+ "Оценка": t(
+ "1, если тема доклада и презентации совпадают не менее, чем на 40%, иначе 2.5 * k, где k - степень соответствия темы доклада теме презентации")
+ }
+
+ def apply(self, audio: Audio, presentation: Presentation, training_id: ObjectId,
+ criteria_results: dict) -> CriterionResult:
+ normalized_speech = []
+ normalized_slides = []
+
+ for i in range(len(audio.audio_slides)):
+ # Список сказанных на слайде слов
+ current_slide_speech = audio.audio_slides[i].recognized_words
+ # Очистка списка от timestamp-ов и probability
+ current_slide_speech = list(map(lambda x: x.word.value, current_slide_speech))
+ # Нормализация текста
+ current_slide_speech = " ".join(normalize_text(current_slide_speech))
+ if current_slide_speech != "":
+ normalized_speech.append(current_slide_speech)
+
+ # Текст из слайда презентации
+ current_slide_text = presentation.slides[i].words
+ # Нормализация текста слайда
+ current_slide_text = " ".join(normalize_text(current_slide_text.split()))
+ if current_slide_text != "":
+ normalized_slides.append(current_slide_text)
+
+ if len(normalized_speech) == 0:
+ return CriterionResult(0, "Тренажер не зафиксировал, что вы что-то говорили")
+ normalized_speech_text = " ".join(normalized_speech)
+
+ if len(normalized_slides) == 0:
+ return CriterionResult(0, "Загруженная вами презентация не содержит текста")
+ normalized_slides_text = " ".join(normalized_slides)
+
+ self.model.train_model([normalized_speech_text, normalized_slides_text])
+
+ score = 2.5 * self.model.evaluate_semantic_similarity(normalized_speech_text, normalized_slides_text)
+ logger.info(f"Score={score}")
+ return CriterionResult(1 if score >= 1 else score,
+ "Ваша речь соответствует тексту презентации" if score >= 1 else "Ваша речь не полностью соответствует теме презентации")
diff --git a/app/criteria/criterions.py b/app/criteria/criterions.py
index 4af03365..5afd53fe 100644
--- a/app/criteria/criterions.py
+++ b/app/criteria/criterions.py
@@ -8,3 +8,5 @@
from .speech_is_not_in_database.criterion import SpeechIsNotInDatabaseCriterion
from .speech_pace.criterion import SpeechPaceCriterion
from .strict_speech_duration.criterion import StrictSpeechDurationCriterion
+from .comparison_speech_slides.criterion import ComparisonSpeechSlidesCriterion
+from .comparison_whole_speech.criterion import ComparisonWholeSpeechCriterion
diff --git a/app/criteria/preconfigured_criterions.py b/app/criteria/preconfigured_criterions.py
index 89da1f19..e92bb837 100644
--- a/app/criteria/preconfigured_criterions.py
+++ b/app/criteria/preconfigured_criterions.py
@@ -9,10 +9,11 @@
from criteria import (FillersNumberCriterion, FillersRatioCriterion,
SpeechIsNotInDatabaseCriterion, SpeechPaceCriterion,
- StrictSpeechDurationCriterion)
+ StrictSpeechDurationCriterion, ComparisonSpeechSlidesCriterion,
+ ComparisonWholeSpeechCriterion)
from .utils import DEFAULT_FILLERS
-
+from .utils import DEFAULT_SKIP_SLIDES
preconfigured_criterions = [
# SpeechDurationCriterion
@@ -143,7 +144,27 @@
}
},
dependent_criteria=[],
+ ),
+
+ ComparisonSpeechSlidesCriterion(
+ name="ComparisonSpeechSlidesCriterion",
+ parameters={"skip_slides": DEFAULT_SKIP_SLIDES},
+ dependent_criteria=[],
+ ),
+
+ ComparisonWholeSpeechCriterion(
+ name="ComparisonWholeSpeechCriterion",
+ parameters={
+ "vector_size": 200,
+ "window": 5,
+ "min_count": 3,
+ "workers": 4,
+ "epochs": 40,
+ "dm": 0
+ },
+ dependent_criteria=[],
)
+
]
diff --git a/app/criteria/text_comparison.py b/app/criteria/text_comparison.py
new file mode 100644
index 00000000..2edd2258
--- /dev/null
+++ b/app/criteria/text_comparison.py
@@ -0,0 +1,37 @@
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+from gensim.models.doc2vec import Doc2Vec, TaggedDocument
+
+
+class SlidesSimilarityEvaluator:
+ def __init__(self):
+ self.vectorizer = TfidfVectorizer(ngram_range=(1, 1))
+
+ def train_model(self, corpus: list):
+ self.vectorizer.fit(corpus)
+
+ def evaluate_semantic_similarity(self, text1: str, text2: str) -> float:
+ vector1 = self.vectorizer.transform([text1])
+ vector2 = self.vectorizer.transform([text2])
+ similarity = cosine_similarity(vector1, vector2)[0][0]
+
+ return round(similarity, 3)
+
+
+class Doc2VecEvaluator:
+ def __init__(self, vector_size: int, window: int, min_count: int, workers: int, epochs: int, dm: int):
+ self.model = Doc2Vec(vector_size=vector_size, window=window, min_count=min_count, workers=workers,
+ epochs=epochs, dm=dm)
+
+ def train_model(self, documents: list):
+ tagged_documents = [TaggedDocument(words=doc.split(), tags=[i]) for i, doc in enumerate(documents)]
+ self.model.build_vocab(tagged_documents)
+ self.model.train(tagged_documents, total_examples=self.model.corpus_count, epochs=self.model.epochs)
+
+ def evaluate_semantic_similarity(self, text1: str, text2: str) -> float:
+ text1 = text1.split()
+ text2 = text2.split()
+
+ similarity = self.model.wv.n_similarity(text1, text2)
+
+ return round(similarity, 3)
\ No newline at end of file
diff --git a/app/criteria/utils.py b/app/criteria/utils.py
index 910e04ad..d7aeb059 100644
--- a/app/criteria/utils.py
+++ b/app/criteria/utils.py
@@ -3,7 +3,6 @@
import traceback
from typing import Optional, Callable
-
from app.audio import Audio
from app.utils import get_types
@@ -84,6 +83,10 @@ def get_fillers_number(fillers: list, audio: Audio) -> int:
return sum(map(len, get_fillers(fillers, audio)))
+DEFAULT_SKIP_SLIDES = [
+ "Спасибо за внимание",
+]
+
DEFAULT_FILLERS = [
'короче',
'однако',
diff --git a/app/criteria_pack/preconfigured_pack.py b/app/criteria_pack/preconfigured_pack.py
index 37f08fd4..22e62bd3 100644
--- a/app/criteria_pack/preconfigured_pack.py
+++ b/app/criteria_pack/preconfigured_pack.py
@@ -32,7 +32,10 @@
['DEFAULT_FILLERS_RATIO_CRITERION', 0.33]],
'SlidesCheckerPack':
[['SimpleNumberSlidesCriterion', 0.05],
- ['SlidesCheckerCriterion', 0.95]]
+ ['SlidesCheckerCriterion', 0.95]],
+ 'ComparisonPack':
+ [['ComparisonSpeechSlidesCriterion', 0.5],
+ ['ComparisonWholeSpeechCriterion', 0.5]]
}
diff --git a/app/feedback_evaluator.py b/app/feedback_evaluator.py
index 6aa22493..d29050c9 100644
--- a/app/feedback_evaluator.py
+++ b/app/feedback_evaluator.py
@@ -1,7 +1,7 @@
import json
from app.criteria import SpeechDurationCriterion, SpeechPaceCriterion, FillersRatioCriterion, FillersNumberCriterion, \
- StrictSpeechDurationCriterion
+ StrictSpeechDurationCriterion, ComparisonSpeechSlidesCriterion, ComparisonWholeSpeechCriterion
class Feedback:
diff --git a/app/presentation_parser/slide_splitter.py b/app/presentation_parser/slide_splitter.py
index c402c180..efcc1568 100644
--- a/app/presentation_parser/slide_splitter.py
+++ b/app/presentation_parser/slide_splitter.py
@@ -1,7 +1,5 @@
import fitz
import pymorphy2
-import nltk
-nltk.download('stopwords')
from nltk.corpus import stopwords
import os
diff --git a/app/training_processor.py b/app/training_processor.py
index dd17dc3e..c7abc4c6 100644
--- a/app/training_processor.py
+++ b/app/training_processor.py
@@ -87,6 +87,10 @@ def run(self):
if __name__ == "__main__":
+ import nltk
+ nltk.download('stopwords')
+ nltk.download('punkt')
+
Config.init_config(sys.argv[1])
training_processor = TrainingProcessor()
training_processor.run()
diff --git a/app/utils.py b/app/utils.py
index cc2d39b8..79bf3795 100644
--- a/app/utils.py
+++ b/app/utils.py
@@ -1,4 +1,6 @@
import os
+import string
+import re
import tempfile
from distutils.util import strtobool
from threading import Timer
@@ -7,6 +9,8 @@
from bson import ObjectId
from flask import json
import magic
+import pymorphy2
+from nltk.corpus import stopwords
from pydub import AudioSegment
import subprocess
@@ -16,11 +20,11 @@
SECONDS_PER_MINUTE = 60
BYTES_PER_MEGABYTE = 1024 * 1024
ALLOWED_MIMETYPES = {
- 'pdf': ['application/pdf'],
- 'ppt': ['application/vnd.ms-powerpoint'],
- 'odp': ['application/vnd.oasis.opendocument.presentation'],
- 'pptx': ['application/vnd.openxmlformats-officedocument.presentationml.presentation', 'application/zip']
- }
+ 'pdf': ['application/pdf'],
+ 'ppt': ['application/vnd.ms-powerpoint'],
+ 'odp': ['application/vnd.oasis.opendocument.presentation'],
+ 'pptx': ['application/vnd.openxmlformats-officedocument.presentationml.presentation', 'application/zip']
+}
CONVERTIBLE_EXTENSIONS = ('ppt', 'pptx', 'odp')
ALLOWED_EXTENSIONS = set(ALLOWED_MIMETYPES.keys())
DEFAULT_EXTENSION = 'pdf'
@@ -74,7 +78,7 @@ def convert_to_pdf(presentation_file):
temp_file.write(presentation_file.read())
temp_file.close()
presentation_file.seek(0)
-
+
converted_file = None
convert_cmd = f"soffice --headless --convert-to pdf --outdir {os.path.dirname(temp_file.name)} {temp_file.name}"
if run_process(convert_cmd).returncode == 0:
@@ -136,9 +140,9 @@ def check_argument_is_convertible_to_object_id(arg):
return {'message': '{} cannot be converted to ObjectId. {}: {}'.format(arg, e1.__class__, e1)}, 404
except Exception as e2:
return {
- 'message': 'Some arguments cannot be converted to ObjectId or to str. {}: {}.'
- .format(e2.__class__, e2)
- }, 404
+ 'message': 'Some arguments cannot be converted to ObjectId or to str. {}: {}.'
+ .format(e2.__class__, e2)
+ }, 404
def check_arguments_are_convertible_to_object_id(f):
@@ -182,6 +186,29 @@ def check_dict_keys(dictionary, keys):
return f"{msg}\n{dictionary}" if msg else ''
+# Функция нормализации текста
+def normalize_text(text: list) -> list:
+ table = str.maketrans("", "", string.punctuation)
+ morph = pymorphy2.MorphAnalyzer()
+
+ # Замена знаков препинания на пустые строки, конвертация в нижний регистр и обрезание пробелов по краям
+ text = list(map(lambda x: x.translate(table).lower().strip(), text))
+ # Замена цифр и слов не на русском языке на пустые строки
+ text = list(map(lambda x: re.sub(r'[^А-яёЁ\s]', '', x), text))
+ # Удаление пустых строк
+ text = list(filter(lambda x: x.isalpha(), text))
+ # Приведение слов к нормальной форме
+ text = list(map(lambda x: morph.normal_forms(x)[0], text))
+ # Очистка от стоп-слов
+ text = list(filter(lambda x: x not in RussianStopwords().words, text))
+ return text
+
+
+# Удаление пунктуации из текста
+def delete_punctuation(text: str) -> str:
+ return text.translate(str.maketrans('', '', string.punctuation + "\t\n\r\v\f"))
+
+
class RepeatedTimer:
"""
Utility class to call a function with a given interval between the end and the beginning of consecutive calls
@@ -210,3 +237,18 @@ def start(self):
def stop(self):
self._timer.cancel()
self.is_running = False
+
+
+class Singleton(type):
+ _instances = {}
+
+ def __call__(cls, *args, **kwargs):
+ if cls not in cls._instances:
+ cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
+ return cls._instances[cls]
+
+
+class RussianStopwords(metaclass=Singleton):
+
+ def __init__(self):
+ self.words = stopwords.words('russian')
diff --git a/docker-compose.yml b/docker-compose.yml
index b4569d4e..303a30ed 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -38,6 +38,8 @@ services:
restart: always
depends_on:
- db
+ volumes:
+ - nltk_data:/root/nltk_data
recognized_presentation_processor:
image: wst-image:v0.2
@@ -55,6 +57,8 @@ services:
- db
- recognized_audio_processor
- recognized_presentation_processor
+ volumes:
+ - nltk_data:/root/nltk_data
task_attempt_to_pass_back_processor:
image: wst-image:v0.2
@@ -84,3 +88,4 @@ services:
volumes:
whisper_models:
+ nltk_data:
diff --git a/requirements.txt b/requirements.txt
index ffcf3ef2..e672ccb8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
Flask-Reuploaded ==1.4.0
PyMuPDF ==1.23.26
-flask ==3.0.2
+flask ==3.0.2
fuzzywuzzy ==0.18.0
librosa ==0.10.1
lti ==0.9.5
@@ -26,3 +26,5 @@ requests ==2.27.1
scipy ==1.12.0
ua-parser ==0.18
vext ==0.7.6
+scikit-learn ==1.4.2
+gensim ==4.3.2