Skip to content

Commit 646aac3

Browse files
committed
first commit
0 parents  commit 646aac3

File tree

8 files changed

+174
-0
lines changed

8 files changed

+174
-0
lines changed

.github/workflows/docker-build.yml

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
name: Docker Build and Push
2+
3+
on:
4+
push:
5+
branches: [ "main" ] # 当推送到main分支时触发
6+
pull_request:
7+
branches: [ "main" ] # 当创建针对main分支的PR时触发
8+
# 允许手动触发工作流
9+
workflow_dispatch:
10+
11+
jobs:
12+
build:
13+
runs-on: ubuntu-latest
14+
steps:
15+
- name: Checkout repository
16+
uses: actions/checkout@v4
17+
18+
- name: Login to Docker Hub
19+
uses: docker/login-action@v3
20+
with:
21+
username: pig4cloud
22+
password: a123.+-
23+
24+
- name: Set up Docker Buildx
25+
uses: docker/setup-buildx-action@v3
26+
27+
- name: Build and push
28+
uses: docker/build-push-action@v5
29+
with:
30+
context: .
31+
push: true
32+
tags: |
33+
${{ secrets.DOCKERHUB_USERNAME }}/office2md:latest

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
envs/

Dockerfile

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# 使用 Python 官方镜像作为基础镜像
2+
FROM python:3.13-alpine
3+
4+
USER root
5+
6+
# Runtime dependency
7+
RUN apk add --no-cache ffmpeg
8+
9+
# 设置工作目录
10+
WORKDIR /app
11+
12+
# 复制项目文件到容器中
13+
COPY . .
14+
15+
RUN pip3 install -r requirements.txt
16+
17+
# 设置环境变量
18+
ENV PYTHONUNBUFFERED=1
19+
20+
# 暴露端口(FastAPI 默认使用 8000 端口)
21+
EXPOSE 8000
22+
23+
# 使用 uvicorn 运行 FastAPI 应用
24+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]

banner.txt

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
+----------------------------------------+
2+
| |
3+
| 🚀 MarkItDown API Server |
4+
| |
5+
| ✨ Server is running... |
6+
| 🌐 Visit http://localhost:8000/docs |
7+
| |
8+
+----------------------------------------+

files/1734436182.png

151 KB
Loading

files/1734436193.png

151 KB
Loading

main.py

+101
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
import os
2+
import time
3+
import asyncio
4+
from fastapi import FastAPI, File, UploadFile, HTTPException, status
5+
from fastapi.middleware.cors import CORSMiddleware
6+
from fastapi.responses import FileResponse
7+
import aiofiles
8+
from markitdown import MarkItDown
9+
from starlette.responses import JSONResponse
10+
from urllib.parse import unquote
11+
from openai import OpenAI
12+
from contextlib import asynccontextmanager
13+
from pathlib import Path
14+
from typing import Dict
15+
16+
@asynccontextmanager
17+
async def lifespan(app: FastAPI):
18+
# 启动时执行
19+
banner_path = Path(__file__).parent / 'banner.txt'
20+
try:
21+
with open(banner_path, 'r', encoding='utf-8') as f:
22+
banner = f.read()
23+
print(banner)
24+
except FileNotFoundError:
25+
print("Banner file not found, starting server without banner...")
26+
yield
27+
app = FastAPI(lifespan=lifespan)
28+
29+
client = OpenAI(
30+
base_url="https://open.bigmodel.cn/api/paas/v4",
31+
api_key="9dfee4d072cf964f117403933da51242.1s1Tvn56OIGQkvmT"
32+
)
33+
34+
# Add CORS middleware
35+
app.add_middleware(
36+
CORSMiddleware,
37+
allow_origins=["*"], # Allows all origins
38+
allow_credentials=True,
39+
allow_methods=["*"], # Allows all methods
40+
allow_headers=["*"], # Allows all headers
41+
)
42+
43+
async def delete_files(file_path: str, output_path: str, delay: int):
44+
await asyncio.sleep(delay)
45+
if os.path.exists(file_path):
46+
os.remove(file_path)
47+
if os.path.exists(output_path):
48+
os.remove(output_path)
49+
50+
@app.post("/upload/",
51+
response_model=Dict[str, str],
52+
status_code=status.HTTP_200_OK,
53+
summary="上传图片文件",
54+
description="上传图片文件并提取其中的文本内容",
55+
responses={
56+
200: {
57+
"description": "成功提取文本",
58+
"content": {
59+
"application/json": {
60+
"example": {
61+
"new_filename": "1679012345.jpg",
62+
"content": "提取的文本内容"
63+
}
64+
}
65+
}
66+
}
67+
}
68+
)
69+
async def upload_file(
70+
file: UploadFile = File(..., description="要上传的图片文件,支持常见图片格式")
71+
):
72+
if not os.path.exists('tmp'):
73+
os.makedirs('tmp')
74+
75+
original_filename = os.path.splitext(file.filename)[0]
76+
timestamp = int(time.time())
77+
file_extension = os.path.splitext(file.filename)[1]
78+
new_filename = f"{timestamp}{file_extension}"
79+
file_path = f"files/{new_filename}"
80+
81+
async with aiofiles.open(file_path, 'wb') as out_file:
82+
content = await file.read()
83+
await out_file.write(content)
84+
85+
markitdown = MarkItDown(mlm_client=client, mlm_model="glm-4v-flash")
86+
result = markitdown.convert(file_path, mlm_prompt="提取图片中全部的文本,不需要任何推理和总结,只需要原文")
87+
88+
# Schedule file deletion after 10 minutes (600 seconds)
89+
asyncio.create_task(delete_files(file_path, "", 600))
90+
91+
return {
92+
"new_filename": new_filename,
93+
"content": result.text_content
94+
}
95+
96+
@app.exception_handler(HTTPException)
97+
async def http_exception_handler(request, exc):
98+
return JSONResponse(
99+
status_code=exc.status_code,
100+
content={"message": exc.detail},
101+
)

requirements.txt

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
fastapi
2+
aiofiles
3+
markitdown
4+
uvicorn
5+
openai
6+
socksio
7+
python-multipart

0 commit comments

Comments
 (0)