Skip to content

Commit acec57e

Browse files
committed
Update gradio for reload mode
1 parent 60f5bbc commit acec57e

File tree

1 file changed

+59
-43
lines changed

1 file changed

+59
-43
lines changed

gradio/gradio_app.py

+59-43
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,12 @@
22
import sys
33

44
if "APP_PATH" in os.environ:
5-
os.chdir(os.environ["APP_PATH"])
6-
# fix sys.path for import
7-
sys.path.append(os.getcwd())
5+
app_path = os.path.abspath(os.environ["APP_PATH"])
6+
if os.getcwd() != app_path:
7+
# fix sys.path for import
8+
os.chdir(app_path)
9+
if app_path not in sys.path:
10+
sys.path.append(app_path)
811

912
import gradio as gr
1013

@@ -27,8 +30,7 @@
2730
def load_models():
2831
return create_model_dict()
2932

30-
def convert_pdf(fname: str, **kwargs) -> (str, Dict[str, Any], dict):
31-
config_parser = ConfigParser(kwargs)
33+
def convert_pdf(fname: str, config_parser: ConfigParser) -> (str, Dict[str, Any], dict):
3234
config_dict = config_parser.generate_config_dict()
3335
config_dict["pdftext_workers"] = 1
3436
converter = PdfConverter(
@@ -57,9 +59,6 @@ def get_page_image(pdf_file, page_num, dpi=96):
5759
png_image = png.convert("RGB")
5860
return png_image
5961

60-
def get_uploaded_image(in_file):
61-
return Image.open(in_file).convert("RGB")
62-
6362

6463
def img_to_html(img, img_alt):
6564
img_bytes = io.BytesIO()
@@ -80,8 +79,9 @@ def markdown_insert_images(markdown, images):
8079
markdown = markdown.replace(image_markdown, img_to_html(images[image_path], image_alt))
8180
return markdown
8281

83-
84-
model_dict = load_models()
82+
# Load models if not already loaded in reload mode
83+
if 'model_dict' not in globals():
84+
model_dict = load_models()
8585

8686
with gr.Blocks(title="Marker") as demo:
8787
gr.Markdown("""
@@ -94,54 +94,63 @@ def markdown_insert_images(markdown, images):
9494

9595
with gr.Row():
9696
with gr.Column():
97-
in_file = gr.File(label="PDF file or image:", file_types=[".pdf", ".png", ".jpg", ".jpeg", ".gif", ".webp"])
98-
in_num = gr.Slider(label="Page number", minimum=1, maximum=100, value=1, step=1)
99-
in_img = gr.Image(label="Select page of Image", type="pil", sources=None)
97+
in_file = gr.File(label="PDF file:", file_types=[".pdf"])
98+
in_num = gr.Slider(label="PDF file page number", minimum=1, maximum=1, value=1, step=1, visible=False)
99+
in_img = gr.Image(label="PDF file (preview)", type="pil", sources=None, visible=False)
100100

101101
page_range_txt = gr.Textbox(label="Page range to parse, comma separated like 0,5-10,20", value=f"0-0")
102102
output_format_dd = gr.Dropdown(label="Output format", choices=["markdown", "json", "html"], value="markdown")
103103

104104
force_ocr_ckb = gr.Checkbox(label="Force OCR", value=True, info="Force OCR on all pages")
105105
debug_ckb = gr.Checkbox(label="Debug", value=False, info="Show debug information")
106-
trun_marker_btn = gr.Button("Run Marker")
106+
trun_marker_btn = gr.Button("Run Marker", interactive=False)
107107
with gr.Column():
108-
result_md = gr.Markdown(label="Result markdown")
109-
result_json = gr.JSON(label="Result json")
110-
result_html = gr.Markdown(label="Result html")
108+
result_md = gr.Markdown(label="Result markdown", visible=False)
109+
result_json = gr.JSON(label="Result json", visible=False)
110+
result_html = gr.Markdown(label="Result html", visible=False)
111111
debug_img_pdf = gr.Image(label="PDF debug image", visible=False)
112112
debug_img_layout = gr.Image(label="Layout debug image", visible=False)
113113

114114
def show_image(file, num=1):
115-
if file.endswith('.pdf'):
116-
count = count_pdf(file)
117-
img = get_page_image(file, num)
118-
return [
119-
gr.update(visible=True, maximum=count),
120-
gr.update(value=img)]
121-
else:
122-
img = get_uploaded_image(file)
115+
if file is None:
123116
return [
117+
gr.update(visible=False, maximum=1, value=num),
124118
gr.update(visible=False),
125-
gr.update(value=img)]
126-
119+
"0-0"]
120+
count = count_pdf(file)
121+
img = get_page_image(file, num)
122+
return [
123+
gr.update(visible=True, maximum=count),
124+
gr.update(visible=True, value=img),
125+
f"0-{num-1}"]
126+
127+
in_file.clear(
128+
fn=show_image,
129+
inputs=[in_file],
130+
outputs=[in_num, in_img, page_range_txt]
131+
)
127132
in_file.upload(
128133
fn=show_image,
129134
inputs=[in_file],
130-
outputs=[in_num, in_img],
135+
outputs=[in_num, in_img, page_range_txt]
131136
)
132137
in_num.change(
133138
fn=show_image,
134139
inputs=[in_file, in_num],
135-
outputs=[in_num, in_img],
140+
outputs=[in_num, in_img, page_range_txt]
136141
)
137142

138143
def check_page_range(page_range, file):
139144
count = count_pdf(file) if file is not None else 1
140145
if not re.match(r"^(\d+(-\d+)?)?$", page_range):
141146
gr.Warning(f"Invalid format. Please use 0-{count-1}", duration=0)
142-
return gr.update(info=f"format 0-{count-1}"), gr.update(interactive=False)
147+
return [
148+
gr.update(info=f"format 0-{count-1}"),
149+
gr.update(interactive=False)]
143150
else:
144-
return gr.update(info=f"format 0-{count-1}"), gr.update(interactive=True)
151+
return [
152+
gr.update(info=f"format 0-{count-1}"),
153+
gr.update(interactive=True)]
145154
page_range_txt.change(
146155
fn=check_page_range,
147156
inputs=[page_range_txt, in_file],
@@ -150,28 +159,34 @@ def check_page_range(page_range, file):
150159

151160
# Run Marker
152161
def run_marker_img(filename, page_range, force_ocr, output_format, debug):
162+
cli_options = {
163+
"output_format": output_format,
164+
"page_range": page_range,
165+
"force_ocr": force_ocr,
166+
"debug": debug,
167+
"output_dir": settings.DEBUG_DATA_FOLDER if debug else None,
168+
}
169+
config_parser = ConfigParser(cli_options)
153170
rendered = convert_pdf(
154171
filename,
155-
page_range=page_range,
156-
force_ocr=force_ocr,
157-
output_format=output_format,
158-
output_dir=settings.DEBUG_DATA_FOLDER if debug else None,
159-
debug=debug
172+
config_parser
160173
)
161-
text, ext, images = text_from_rendered(rendered)
162-
163174
gr_debug_pdf = gr.update(visible=False)
164175
gr_debug_lay = gr.update(visible=False)
165176
if debug:
166177
debug_data_path = rendered.metadata.get("debug_data_path")
167178
if debug_data_path:
168-
pdf_image_path = os.path.join(debug_data_path, f"pdf_page_0.png")
179+
page_range = config_parser.generate_config_dict()["page_range"]
180+
first_page = page_range[0] if page_range else 0
181+
182+
pdf_image_path = os.path.join(debug_data_path, f"pdf_page_{first_page}.png")
169183
img = Image.open(pdf_image_path)
170184
gr_debug_pdf = gr.update(visible=True, value=img)
171-
layout_image_path = os.path.join(debug_data_path, f"layout_page_0.png")
185+
layout_image_path = os.path.join(debug_data_path, f"layout_page_{first_page}.png")
172186
img = Image.open(layout_image_path)
173187
gr_debug_lay = gr.update(visible=True, value=img)
174188

189+
text, ext, images = text_from_rendered(rendered)
175190
if output_format == "markdown":
176191
text = markdown_insert_images(text, images)
177192
return [
@@ -197,11 +212,12 @@ def run_marker_img(filename, page_range, force_ocr, output_format, debug):
197212
gr_debug_pdf,
198213
gr_debug_lay
199214
]
200-
215+
201216
trun_marker_btn.click(
202217
fn=run_marker_img,
203218
inputs=[in_file, page_range_txt, force_ocr_ckb, output_format_dd, debug_ckb],
204-
outputs=[result_md, result_json, result_html, debug_img_pdf, debug_img_layout],
219+
outputs=[result_md, result_json, result_html, debug_img_pdf, debug_img_layout]
205220
)
206221

207-
demo.launch()
222+
if __name__ == "__main__":
223+
demo.launch()

0 commit comments

Comments
 (0)