Skip to content

Commit f4ac711

Browse files
author
Working On It
committed
fix: improve TestParsePdfIntoTxt()
1 parent 96d5ab7 commit f4ac711

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

txt/pdf_alibaba_cloud.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,10 @@ import (
3131

3232
const (
3333
// Alibaba cloud market
34-
appCode = ""
34+
appCode = "311783680cc24ec5a1895872e1129447"
3535
apiHost = "https://generalpdf.market.alicloudapi.com"
3636
apiPath = "/ocrservice/pdf"
37-
pdfDirPath = "papar_QA_dataset\\papers"
37+
pdfDirPath = "C:\\Users\\chuny\\Desktop\\papar_QA_dataset\\papers"
3838
tmpDir = "tmp_splits"
3939
outDir = "outputdir"
4040
)
@@ -192,6 +192,7 @@ func extractTextFromWordBase64(base64Str string) (string, error) {
192192
for _, run := range para.Runs() {
193193
text += run.Text()
194194
}
195+
text += "\n"
195196
}
196197
return text, nil
197198
}

0 commit comments

Comments
 (0)