evilprof/file_handler.py at main · subnetdusk/evilprof · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# file_handler.py
import pandas as pd
import os

from config import QUESTION_TYPE_MC, QUESTION_TYPE_OE, QUESTION_TYPE_UNKNOWN


def load_questions_from_excel(uploaded_file, status_callback):
    """
    Load questions/answers from Excel (.xlsx, .xls) or CSV (.csv) file.
    Detects blocks separated by empty rows and determines each block's type.
    Calls status_callback only for significant errors or warnings.
    Returns:
        - all_questions: List of question dicts, each with 'block_id' and 'type'.
        - blocks_summary: List of dicts describing each block {'block_id', 'type', 'count'}.
        - error_key: Error key (str) or None on success.
    """
    if uploaded_file is None:
        return None, None, "UPLOAD_FIRST_WARNING"

    file_name = uploaded_file.name
    df = None

    try:
        _, file_extension = os.path.splitext(file_name)
        file_extension = file_extension.lower()

        if file_extension in ['.xlsx', '.xls']:
            excel_df = pd.read_excel(uploaded_file, header=None)
            df = excel_df.fillna('').astype(str)
        elif file_extension == '.csv':
            try:
                content_bytes = uploaded_file.getvalue()
                content_str = content_bytes.decode('utf-8-sig')
                from io import StringIO
                csv_file_like = StringIO(content_str)
                csv_df = pd.read_csv(csv_file_like, header=None, sep=',', skipinitialspace=True, lineterminator='\n')
                df = csv_df.fillna('').astype(str)
            except Exception as e_comma:
                try:
                    uploaded_file.seek(0)
                    content_bytes = uploaded_file.getvalue()
                    content_str = content_bytes.decode('utf-8-sig')
                    from io import StringIO
                    csv_file_like = StringIO(content_str)
                    csv_df = pd.read_csv(csv_file_like, header=None, sep=';', skipinitialspace=True, lineterminator='\n')
                    df = csv_df.fillna('').astype(str)
                except Exception as e_semicolon:
                    status_callback("error", "FH_CSV_READ_ERROR", filename=file_name, error=f"Comma: {e_comma}, Semicolon: {e_semicolon}")
                    return None, None, "FH_CSV_READ_ERROR"
        else:
            status_callback("error", "FH_UNSUPPORTED_FORMAT", filename=file_name, extension=file_extension)
            return None, None, "FH_UNSUPPORTED_FORMAT"

        # Block analysis logic
        all_questions = []
        blocks_summary = []
        current_block_id = 1
        current_block_questions = []
        current_block_type = None
        first_question_in_block = True

        # Add virtual empty row at the end to finalize last block
        if df is not None:
            df.loc[len(df)] = [""] * df.shape[1]
        else:
            raise ValueError("Failed to read the uploaded file into a DataFrame.")

        for index, row in df.iterrows():
            is_empty_row = all(s is None or str(s).strip() == "" for s in row)

            if is_empty_row:
                # Finalize previous block
                if current_block_questions:
                    if current_block_type is None:
                        current_block_type = QUESTION_TYPE_UNKNOWN
                    blocks_summary.append({
                        'block_id': current_block_id,
                        'type': current_block_type,
                        'count': len(current_block_questions)
                    })
                    all_questions.extend(current_block_questions)
                # Prepare for next block
                current_block_id += 1
                current_block_questions = []
                current_block_type = None
                first_question_in_block = True
            else:
                # Process non-empty row
                row_list = [str(s).strip() for s in row]
                question_text = row_list[0]
                answers = [ans for ans in row_list[1:] if ans]

                if question_text:
                    question_type = QUESTION_TYPE_MC if len(answers) >= 2 else QUESTION_TYPE_OE

                    if first_question_in_block:
                        current_block_type = question_type
                        first_question_in_block = False
                    elif question_type != current_block_type:
                        status_callback("warning", "FH_BLOCK_MIXED_TYPES", block_id=current_block_id, expected=current_block_type, found=question_type, row_num=index + 1)
                        continue

                    question_dict = {
                        'question': question_text,
                        'answers': answers if current_block_type == QUESTION_TYPE_MC else [],
                        'original_index': index,
                        'type': current_block_type,
                        'block_id': current_block_id
                    }
                    current_block_questions.append(question_dict)

        # Remove empty blocks
        blocks_summary = [b for b in blocks_summary if b['count'] > 0]

        if not all_questions:
            status_callback("error", "FH_NO_VALID_QUESTIONS", filename=file_name)
            return None, None, "FH_NO_VALID_QUESTIONS"

        return all_questions, blocks_summary, None

    except Exception as e:
        status_callback("error", "FH_UNEXPECTED_ERROR", filename=file_name, error=str(e))
        return None, None, "FH_UNEXPECTED_ERROR"