Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions app/main/checks/presentation_checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@
from .name_of_image_check import PresImageCaptureCheck
from .task_tracker import TaskTracker
from .overview_in_tasks import OverviewInTasks
from .was_were_check import PresWasWereCheck
21 changes: 21 additions & 0 deletions app/main/checks/presentation_checks/was_were_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from ..base_check import BasePresCriterion, answer
from app.nlp.is_passive_was_were_sentence import CritreriaType, generate_output_text, get_was_were_sentences

class PresWasWereCheck(BasePresCriterion):
label = 'Проверка на пассивные конструкции, начинающиеся с Был/Была/Было/Были, которые можно убрать без потери смысла'
description = ''
id = 'pres_was_were_check'

def __init__(self, file_info, threshold=3):
super().__init__(file_info)
self.threshold = threshold

def check(self):
detected_sentences, total_sentences = get_was_were_sentences(self.file, CritreriaType.PRESENTATION)
if total_sentences > self.threshold:
result_str = generate_output_text(detected_sentences, CritreriaType.PRESENTATION)
result_score = 0
else:
result_str = 'Пройдена!'
result_score = 1
return answer(result_score, result_str)
1 change: 1 addition & 0 deletions app/main/checks/report_checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,4 @@
from .sw_section_size import SWSectionSizeCheck
from .sw_keywords_check import SWKeywordsCheck
from .task_tracker import ReportTaskTracker
from .was_were_check import ReportWasWereCheck
23 changes: 23 additions & 0 deletions app/main/checks/report_checks/was_were_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from ..base_check import BaseReportCriterion, answer
from app.nlp.is_passive_was_were_sentence import CritreriaType, generate_output_text, get_was_were_sentences

class ReportWasWereCheck(BaseReportCriterion):
label = 'Проверка на пассивные конструкции, начинающиеся с Был/Была/Было/Были, которые можно убрать без потери смысла'
description = ''
id = 'report_was_were_check'

def __init__(self, file_info, threshold=3):
super().__init__(file_info)
self.threshold = threshold

def check(self):
if self.file.page_counter() < 4:
return answer(False, 'В отчёте недостаточно страниц. Нечего проверять.')
detected, total_sentences = get_was_were_sentences(self.file, CritreriaType.REPORT)
if total_sentences > self.threshold:
result_str = generate_output_text(detected, CritreriaType.REPORT)
result_score = 0
else:
result_str = 'Пройдена!'
result_score = 1
return answer(result_score, result_str)
86 changes: 86 additions & 0 deletions app/nlp/is_passive_was_were_sentence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import re
import pymorphy2
import string
from enum import Enum

morph = pymorphy2.MorphAnalyzer()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Добавьте в данном файле в виде комментария примеры хороших и плохих предложений, которые начинаются с Был*

Comment on lines +2 to +6
Copy link
Collaborator

@HadronCollider HadronCollider Dec 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Подтяните изменения из dev и замените pymorphy2 на pymorphy3



class CritreriaType(Enum):
REPORT=0
PRESENTATION=1


def criteria_type_to_str(type: CritreriaType):
if type == CritreriaType.REPORT:
return "Страница"
elif type == CritreriaType.PRESENTATION:
return "Слайд"
else:
return "Элемент"

def get_content_by_file(file, type: CritreriaType):
if type == CritreriaType.REPORT:
return file.pdf_file.get_text_on_page().items()
elif type == CritreriaType.PRESENTATION:
return enumerate(file.get_text_from_slides())

def clean_word(word):
punct = string.punctuation.replace('-', '')
return word.translate(str.maketrans('', '', punct))


def is_passive_was_were_sentece(sentence):
"""
Примеры плохих предложений (пассивные конструкции с "Был*" - можно убрать):
- Был проведен анализ данных
- Была выполнена работа по исследованию
- Было принято решение о внедрении
- Были получены следующие результаты
- Была создана база данных

Примеры хороших предложений ("Был*" нельзя убрать):
- Было бы здорово получить новые данные
- Был сильный скачок напряжения
- Были времена, когда это казалось невозможным
- Был студентом университета три года назад
- Была программистом до выхода на пенсию
"""
first_words = re.split(r'\s+', sentence.strip(), maxsplit=2)
if len(first_words) < 2:
return False

first_word = clean_word(first_words[0])
second_word = clean_word(first_words[1])

parsed = morph.parse(first_word)[0]
if (parsed.normal_form == 'быть' and
'past' in parsed.tag and
parsed.tag.POS == 'VERB'):
second_word_parsed = morph.parse(second_word)[0]
return ('PRTS' in second_word_parsed.tag and
'pssv' in second_word_parsed.tag)
return False


def generate_output_text(detected_senteces, type: CritreriaType):
output = 'Обнаружены конструкции (Был/Была/Было/Были), которые можно удалить без потери смысла:<br><br>'
for index, messages in detected_senteces.items():
output_type = criteria_type_to_str(type)
output += f'{output_type} №{index + 1}: <br>' + '<br>'.join(messages) + '<br><br>'
return output


def get_was_were_sentences(file, type: CritreriaType):
detected = {}
total_sentences = 0
for page_index, page_text in get_content_by_file(file, type):
sentences = re.split(r'(?<=[.!?…])\s+', page_text)
for sentence_index, sentence in enumerate(sentences):
if is_passive_was_were_sentece(sentence):
total_sentences += 1
if page_index not in detected:
detected[page_index] = []
truncated_sentence = sentence[:30] + '...' if len(sentence) > 30 else sentence
detected[page_index].append(f'{sentence_index+1}: {truncated_sentence}')
return detected, total_sentences
Loading