Update app.py

PhoenixAlpha23 · web-flow · commit 61b19cab3ee5 · 2025-02-03T22:40:30.000+05:30
multilingual version Jan 11
diff --git a/app.py b/app.py
@@ -7,9 +7,7 @@
 
 from utils.image_processing import preprocess_image
 from utils.pdf_processing import process_pdf
-from utils.text_extraction import extract_text
-
-from utils.text_extraction import get_supported_languages
+from utils.text_extraction import extract_text, get_supported_languages
 
 def setup_page_config():
     """Configure Streamlit page settings."""
@@ -30,6 +28,32 @@ def initialize_session_state():
 
 def create_sidebar_options():
     """Create user-friendly OCR processing options."""
+    st.sidebar.header("OCR Settings")
+    
+    # Language Selection
+    st.sidebar.subheader("Language Settings")
+    available_languages = get_supported_languages()
+    default_lang = 'English'
+    
+    # Primary language selection
+    primary_lang = st.sidebar.selectbox(
+        "Primary Language",
+        options=list(available_languages.keys()),
+        index=list(available_languages.keys()).index(default_lang),
+        help="Select the main language of your document"
+    )
+    
+    # Additional languages selection
+    additional_langs = st.sidebar.multiselect(
+        "Additional Languages (Optional)",
+        options=[lang for lang in available_languages.keys() if lang != primary_lang],
+        help="Select additional languages if your document contains multiple languages"
+    )
+    
+    # Combine selected languages
+    selected_langs = [primary_lang] + additional_langs
+    lang_codes = '+'.join([available_languages[lang] for lang in selected_langs])
+
     st.sidebar.header("Image Enhancement Options")
     return {
         'apply_threshold': st.sidebar.checkbox(
@@ -63,9 +87,9 @@ def create_sidebar_options():
                 12: "Word by Word"
             }[x],
             help="Choose how the system should read your document's layout. Automatic[3] is best for most documents."
-        )
+        ),
+        'language': lang_codes  
     }
-
 def display_processed_image(original_image, processed_image):
     """
     Display original and processed images side by side
@@ -74,12 +98,10 @@ def display_processed_image(original_image, processed_image):
         original_image (numpy.ndarray): Original input image
         processed_image (numpy.ndarray): Preprocessed image
     """
-    # Create two columns for display
     col1, col2 = st.columns(2)
     
     with col1:
         st.subheader("Original Image")
-        # Convert OpenCV image (BGR) to RGB for correct color display
         st.image(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB), 
                  use_container_width=True)
     
@@ -88,7 +110,6 @@ def display_processed_image(original_image, processed_image):
         st.image(cv2.cvtColor(processed_image, cv2.COLOR_BGR2RGB), 
                  use_container_width=True)
 
-
 def process_uploaded_files(uploaded_files, options):
     """
     Modified function to show processed images
@@ -100,12 +121,17 @@ def process_uploaded_files(uploaded_files, options):
     Returns:
         tuple: Lists of all text and individual texts
     """
+    print("Uploaded Files:",uploaded_files)
     all_text = []
     individual_texts = {}
     
     # Progress bar for multiple file processing
     progress_bar = st.progress(0)
     
+    # Make sure uploaded_files is not None and is iterable
+    if not uploaded_files:
+        return all_text, individual_texts
+        
     for i, uploaded_file in enumerate(uploaded_files):
         try:
             # Update progress bar
@@ -123,9 +149,6 @@ def process_uploaded_files(uploaded_files, options):
                 # Show original image before processing
                 st.subheader(f"Processing: {uploaded_file.name}")
                 
-                # Display original image
-                st.image(image, caption="Original Image", use_container_width=True)
-                
                 # Preprocess image
                 processed_image = preprocess_image(image_np, options)
                 
@@ -148,7 +171,7 @@ def process_uploaded_files(uploaded_files, options):
 
 def create_text_downloads(all_text, individual_texts):
     """
-    Create download buttons for extracted texts.
+   download buttons for extracted text(s).
     
     Args:
         all_text (list): Combined extracted texts
@@ -189,12 +212,14 @@ def main():
     # App title and description
     st.title("Text Extraction using Tesseract OCR")
     st.markdown('## Upload multiple images or PDF files to extract text from.')
-    st.write('You can use documents in the following languages\n')
-    # Print available languages
-    print(get_supported_languages())
-    st.write('From the list of Tesseract Page Segmentation Modes (PSM)  on the left,\n you control how Tesseract analyzes and interprets document with varying layouts:')
+    
+    # Display supported languages
+    available_languages = get_supported_languages()
+    st.write('Supported Languages:', ', '.join(available_languages.keys()))
+    
+    st.write('From the list of Tesseract Page Segmentation Modes (PSM) on the left,\n you control how Tesseract analyzes and interprets document with varying layouts:')
     st.write(""" Automatic detection works fine for most documents,\n
-    You can  also Choose a different one based on your document's structure from the list.\n""")
+    You can also Choose a different one based on your document's structure from the list.\n""")
     
     # File uploader
     uploaded_files = st.file_uploader(
@@ -203,7 +228,7 @@ def main():
         type=["png", "jpg", "jpeg", "pdf"]
     )
     
-    # Create OCR options
+    #OCR options
     options = create_sidebar_options()
     
     # Process files when uploaded
@@ -215,9 +240,8 @@ def main():
         if all_text:
             st.text_area("Extracted Text", value="\n".join(all_text), height=300)
             
-            # Create download buttons
+            # download buttons
             create_text_downloads(all_text, individual_texts)
 
-# This ensures the app runs automatically when accessed
 if __name__ == "__main__":
     main()