Skip to content

Commit f30cb0d

Browse files
committed
remove languages
1 parent c881012 commit f30cb0d

4 files changed

Lines changed: 3 additions & 14 deletions

File tree

.github/workflows/build.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
runs-on: ubuntu-latest
1616
env:
1717
IMAGE_NAME: ghcr.io/${{ github.repository }}/server
18-
IMAGE_TAG: ${{ github.event_name == 'release' && github.event.release.tag_name || github.sha }}
18+
IMAGE_TAG: ${{ github.event_name == 'release' && github.event.release.tag_name || 'latest' }}
1919
outputs:
2020
commit_title: ${{ steps.get_head_commit_title.outputs.title }}
2121
steps:
@@ -41,7 +41,7 @@ jobs:
4141
context: .
4242
file: ./server/Dockerfile
4343
push: true
44-
tags: ${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }},${{ env.IMAGE_NAME }}:latest
44+
tags: ${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }}
4545
cache-from: type=gha
4646
cache-to: type=gha,mode=max
4747

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ Process a document.
7272
**Parameters:**
7373
- `file`: The PDF file to process (required)
7474
- `page_range`: Page range to convert (e.g. "0,5-10,20")
75-
- `languages`: Comma separated list of languages for OCR
7675
- `force_ocr`: Force OCR on all pages (default: false)
7776
- `paginate_output`: Whether to paginate the output (default: false)
7877
- `output_format`: Output format - "markdown", "json", or "html" (default: "markdown")

server/main.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
from server.exceptions import FailedToConvertPDFException
1919
from server.logger import logger
20-
from server.schemas import Languages, OutputFormat, ParseResponse
20+
from server.schemas import OutputFormat, ParseResponse
2121
from server.security import check_api_key
2222

2323
app_data = {}
@@ -52,7 +52,6 @@ def health() -> Response:
5252

5353

5454
page_range = Form(default=None, description="Page range to convert, specify comma separated page numbers or ranges. Example: '0,5-10,20'", example="0,5-10,20") # fmt: off
55-
languages = Form(default=None, description="Comma separated list of languages to use for OCR. Must be either the names or codes from from https://github.com/VikParuchuri/surya/blob/master/surya/recognition/languages.py.", example=None) # fmt: off
5655
force_ocr = Form(default=False, description="Force OCR on all pages of the PDF. Defaults to False. This can lead to worse results if you have good text in your PDFs (which is true in most cases).") # fmt: off
5756
paginate_output = Form(default=False, description="Whether to paginate the output. Defaults to False. If set to True, each page of the output will be separated by a horizontal rule that contains the page number (2 newlines, {PAGE_NUMBER}, 48 - characters, 2 newlines).") # fmt: off
5857
output_format = Form(default="markdown", description="The format to output the text in. Can be 'markdown', 'json', or 'html'. Defaults to 'markdown'.") # fmt: off
@@ -63,7 +62,6 @@ def health() -> Response:
6362
@app.post("/marker/upload", tags=["Marker"], response_model=ParseResponse, dependencies=[Security(check_api_key)])
6463
async def convert_pdf_upload(
6564
page_range: Optional[str] = page_range,
66-
languages: Optional[Languages] = languages,
6765
force_ocr: Optional[bool] = force_ocr,
6866
paginate_output: Optional[bool] = paginate_output,
6967
output_format: Optional[OutputFormat] = output_format,
@@ -88,7 +86,6 @@ async def convert_pdf_upload(
8886
options = {
8987
"filepath": filepath,
9088
"page_range": page_range,
91-
"languages": languages,
9289
"force_ocr": force_ocr,
9390
"paginate_output": paginate_output,
9491
"output_format": output_format,

server/schemas.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,6 @@
22
from typing import Any
33

44
from pydantic import BaseModel
5-
from surya.recognition.languages import CODE_TO_LANGUAGE
6-
7-
LANGUAGES = list(CODE_TO_LANGUAGE.keys()) + list(CODE_TO_LANGUAGE.values())
8-
LANGUAGES = {str(lang).upper(): str(lang) for lang in sorted(set(LANGUAGES))}
9-
10-
Languages = Enum("Language", LANGUAGES, type=str)
11-
125

136
class OutputFormat(str, Enum):
147
markdown = "markdown"

0 commit comments

Comments
 (0)