Bite2Burn-AI-Powered-Calorie-Estimation-Personalized-Nutrition-Assistant/split.py at main · Mohithavelagapudi/Bite2Burn-AI-Powered-Calorie-Estimation-Personalized-Nutrition-Assistant · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from bs4 import BeautifulSoup

# Mapping for food categories
category_mapping = {
    'A': 'CEREALS AND MILLETS',
    'B': 'GRAIN LEGUMES',
    'C': 'GREEN LEAFY VEGETABLES',
    'D': 'OTHER VEGETABLES',
    'E': 'FRUITS',
    'F': 'ROOTS AND TUBERS',
    'G001 to G018': 'CONDIMENTS AND SPICES-FRESH',
    'G019 to G033': 'CONDIMENTS AND SPICES-DRY',
    'H': 'NUTS AND OIL SEEDS',
    'I': 'SUGARS',
    'J': 'MUSHROOMS',
    'K': 'MISCELLANEOUS FOODS',
    'L': 'MILK AND MILK PRODUCTS',
    'M': 'EGG AND EGG PRODUCTS',
    'N': 'POULTRY',
    'O': 'ANIMAL FAT',
    'P': 'MARINE FISH',
    'Q': 'MARINE SHELLFISH',
    'R': 'MARINE MOLLUSKS',
    'S': 'FRESHWATER FISH AND SHELLFISH'
}

def split_html_by_alphabet(file_path):
    # List to hold each split HTML content
    html_splits = []

    # Read and parse the HTML file
    with open(file_path, 'r') as file:
        soup = BeautifulSoup(file, 'html.parser')

    # Extract table headers
    headers = soup.find_all('th')
    header_html = ''.join(str(th) for th in headers)

    # Initialize a dictionary to store rows by alphabetical prefix
    row_groups = {}

    # Iterate over each row in the table
    for row in soup.find_all('tr')[1:]:  # Skipping the header row
        # Extract the first cell to determine the alphabetical group
        first_cell = row.find_all('td')[0].text
        prefix = first_cell[0]

        # Handle the 'G' category with its sub-ranges
        if prefix == 'G':
            food_code_number = int(first_cell[1:])
            if 1 <= food_code_number <= 18:
                prefix = 'G001 to G018'
            elif 19 <= food_code_number <= 33:
                prefix = 'G019 to G033'

        # Add row to the corresponding prefix group
        if prefix not in row_groups:
            row_groups[prefix] = []
        row_groups[prefix].append(row)

    # Generate HTML split for each alphabetical group
    for prefix, rows in sorted(row_groups.items()):
        # Create a header row with the category name
        category_name = category_mapping.get(prefix, "UNKNOWN CATEGORY")
        category_header = f"<tr><th colspan='11'>{category_name}</th></tr>"

        # Create the table with the category header, table header, and group-specific rows
        split_html = f"<table>{category_header}<tr>{header_html}</tr>"
        split_html += ''.join(str(row) for row in rows)
        split_html += "</table>"

        # Add to the list of HTML splits
        html_splits.append(split_html)

    return html_splits

# Usage
file_path = r'D://AIE COURSE FILES//SEM - 7//FSD//PROJECT//ref.html'
html_splits = split_html_by_alphabet(file_path)

# Display or further process each split as needed
'''for index, split in enumerate(html_splits):
    print(f"HTML Split {index + 1}:\n", split)'''