diff --git a/app/main/check_packs/pack_config.py b/app/main/check_packs/pack_config.py
index 598c3cc2..845d6f8e 100644
--- a/app/main/check_packs/pack_config.py
+++ b/app/main/check_packs/pack_config.py
@@ -18,6 +18,8 @@
['pres_empty_slide'],
['theme_in_pres_check'],
['verify_git_link'],
+ ['slide_text_volume_check'],
+ ['slide_text_volume_check', {'work_mode': 'Заключение'}],
]
BASE_REPORT_CRITERION = [
["simple_check"],
diff --git a/app/main/checks/presentation_checks/__init__.py b/app/main/checks/presentation_checks/__init__.py
index d605c1d3..f26e20b9 100644
--- a/app/main/checks/presentation_checks/__init__.py
+++ b/app/main/checks/presentation_checks/__init__.py
@@ -13,3 +13,4 @@
from .find_theme_in_pres import FindThemeInPres
from .verify_git_link import PresVerifyGitLinkCheck
from .empty_slide_check import PresEmptySlideCheck
+from .slide_text_volume_check import SlideTextVolumeCheck
\ No newline at end of file
diff --git a/app/main/checks/presentation_checks/slide_text_volume_check.py b/app/main/checks/presentation_checks/slide_text_volume_check.py
new file mode 100644
index 00000000..d212ebcd
--- /dev/null
+++ b/app/main/checks/presentation_checks/slide_text_volume_check.py
@@ -0,0 +1,113 @@
+from ..base_check import BasePresCriterion, answer
+from utils import get_text_from_slides
+from nlp.stemming import Stemming
+import re
+
+WORK_MODE = ["all_slides", "Заключение"]
+
+class SlideTextVolumeCheck(BasePresCriterion):
+ label = 'Проверка объема текста на каждом слайде'
+ description = 'Объем текста на каждом слайде (за исключением титульного и запасных) должен соответсвовать критериям.'
+ id = 'slide_text_volume_check'
+
+ def __init__(self, file_info, work_mode="all_slides",min_count_words_on_slide=30,
+ min_count_paragraphs=2, min_count_words_in_paragraph=10,
+ max_count_words_on_slide=100, max_count_paragraphs=5,
+ max_count_words_in_paragraph=50,
+ slides_with_required_list=["Цель и задачи", "Заключение"],
+ allow_only_image_or_table=True):
+ super().__init__(file_info)
+ self.work_mode = work_mode
+ self.min_count_words_on_slide = min_count_words_on_slide
+ self.min_count_paragraphs = min_count_paragraphs
+ self.min_count_words_in_paragraph = min_count_words_in_paragraph
+ self.max_count_words_on_slide = max_count_words_on_slide
+ self.max_count_paragraphs = max_count_paragraphs
+ self.max_count_words_in_paragraph = max_count_words_in_paragraph
+ self.slides_with_required_list = slides_with_required_list
+ self.allow_only_image_or_table = allow_only_image_or_table
+
+ def check(self):
+ result_str = ''
+ slides = self.install_work_mode()
+ slides_info = []
+ if len(slides) == 0:
+ return answer(False, 'Презентация пуста или слайды не найдены.')
+ for slide in slides:
+ title = slide.get_title()
+ text = slide.get_text()
+ page = slide.get_page_number()[0]
+ page_with_images_or_tables = False
+ required_list = False
+ if "Запасные слайды" in title:
+ break
+ if title in self.slides_with_required_list:
+ required_list = True
+ if len(slide.get_images()) > 0 or len(slide.get_table()) > 0:
+ page_with_images_or_tables = True
+ slides_info.append(self.slide_text_analysis(title, page, text, required_list, page_with_images_or_tables,))
+ for slide_info in slides_info:
+ res = ''
+ link = self.format_page_link([slide_info['page']])
+ if self.allow_only_image_or_table and slide_info['has_image_or_table']:
+ continue
+ if slide_info['count_words_on_slide'] <= self.min_count_words_on_slide or slide_info['count_words_on_slide'] >= self.max_count_words_on_slide:
+ res += f'Количество слов на слайде: {slide_info["count_words_on_slide"]};
'
+ if slide_info['count_paragraphs'] <= self.min_count_paragraphs or slide_info['count_paragraphs'] >= self.max_count_paragraphs:
+ res += f'Количество абзацев на слайде: {slide_info["count_paragraphs"]};
'
+ paragraphs = slide_info['paragraphs']
+ for i in range(len(paragraphs)):
+ if paragraphs[i] <= self.min_count_words_in_paragraph or paragraphs[i] >= self.max_count_words_in_paragraph:
+ res += f'Количество слов в абзаце № {i + 1}: {paragraphs[i]};
'
+ if slide_info['required_list'] and not slide_info['has_list']:
+ res += f'На данном слайде наличие списка является обязательным;
'
+ if res:
+ result_str = result_str + f'
Слайд {link}:
' + res
+
+ if not result_str:
+ return answer(True, 'Пройдена!')
+ else:
+ result_str += f'
Количество слов на слайде должно быть больше {self.min_count_words_on_slide} и меньше {self.max_count_words_on_slide};
' \
+ f'Количество абзацев на слайде должно быть больше {self.min_count_paragraphs} и меньше {self.max_count_paragraphs};
' \
+ f'Количество слов в абзаце должно быть больше {self.min_count_words_in_paragraph} и меньше {self.max_count_words_in_paragraph};
'
+ return answer(False, result_str)
+
+ def slide_text_analysis(self, title, page, text, required_list, page_with_images_or_tables):
+ if text is None:
+ text = ''
+ paragraphs = [p.strip() for p in text.split('\n') if p.strip() and not p.strip().isnumeric() and not p.strip() in title]
+ slide_info = {
+ 'page': page,
+ 'required_list': required_list,
+ 'paragraphs': [],
+ 'count_paragraphs': len(paragraphs),
+ 'count_words_on_slide': 0,
+ 'has_list': True,
+ 'has_image_or_table': page_with_images_or_tables,
+ }
+
+ for paragraph in paragraphs:
+ slide_info['paragraphs'].append(len(paragraph.split()))
+ # The variable has_list is currently set to true; after creating the check, set it to false."
+ slide_info['count_words_on_slide'] = sum(slide_info['paragraphs'])
+ return slide_info
+
+ def install_work_mode(self):
+ if self.work_mode == WORK_MODE[0]:
+ slides = self.file.slides[1:]
+ elif self.work_mode == WORK_MODE[1]:
+ for slide in self.file.slides:
+ if self.work_mode in slide.get_title():
+ slides = [slide]
+ break
+ stemming = Stemming()
+ goal_and_tasks = get_text_from_slides(self.file,"Цель и задачи")
+ tasks = stemming.get_sentences(goal_and_tasks, True)
+ ignore = re.compile('[0-9][.]?|Задачи:|‹#›')
+ cleaned_tasks = [task for task in tasks if not re.fullmatch(ignore, task)]
+ task_count = len(cleaned_tasks)
+ self.min_count_paragraphs = task_count - 1
+ self.max_count_paragraphs = task_count + 3
+ else:
+ slides = []
+ return slides
\ No newline at end of file