generated from datawhalechina/repo-template
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Open
Description
pip install -r requirements.txt后, 在3.3.2节 loader = UnstructuredMarkdownLoader(r"1. 简介 Introduction.md") 报错 zipfile.BadZipFile: File is not a zip file. 按Langchain repo里搜到的方法没能解决. 作为md文件同级目录的python文件 / python 交互shell指令都会出现.
>>> import nltk
>>> for pkg in ['punkt', 'averaged_perceptron_tagger']:
... nltk.download(pkg)
...
[nltk_data] Downloading package punkt to
[nltk_data] C:\Users\dell\AppData\Roaming\nltk_data...
[nltk_data] Package punkt is already up-to-date!
True
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data] C:\Users\dell\AppData\Roaming\nltk_data...
[nltk_data] Package averaged_perceptron_tagger is already up-to-
[nltk_data] date!
True
>>> from langchain_community.document_loaders.markdown import UnstructuredMarkdo
wnLoader
>>> loader = UnstructuredMarkdownLoader(r"1. 简介 Introduction.md")
>>> md_pages = loader.load()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Users\dell\Anaconda3\envs\llm-universe\lib\site-packages\langchain_co
re\document_loaders\base.py", line 30, in load
return list(self.lazy_load())
File "C:\Users\dell\Anaconda3\envs\llm-universe\lib\site-packages\langchain_co
mmunity\document_loaders\unstructured.py", line 107, in lazy_load
elements = self._get_elements()
File "C:\Users\dell\Anaconda3\envs\llm-universe\lib\site-packages\langchain_co
mmunity\document_loaders\markdown.py", line 73, in _get_elements
from unstructured.partition.md import partition_md
File "C:\Users\dell\Anaconda3\envs\llm-universe\lib\site-packages\unstructured
\partition\md.py", line 12, in <module>
from unstructured.partition.html import partition_html
File "C:\Users\dell\Anaconda3\envs\llm-universe\lib\site-packages\unstructured
\partition\html\__init__.py", line 1, in <module>
from unstructured.partition.html.partition import partition_html
File "C:\Users\dell\Anaconda3\envs\llm-universe\lib\site-packages\unstructured
\partition\html\partition.py", line 17, in <module>
from unstructured.partition.html.parser import Flow, html_parser
File "C:\Users\dell\Anaconda3\envs\llm-universe\lib\site-packages\unstructured
\partition\html\parser.py", line 98, in <module>
from unstructured.partition.text_type import (
File "C:\Users\dell\Anaconda3\envs\llm-universe\lib\site-packages\unstructured
\partition\text_type.py", line 20, in <module>
from unstructured.nlp.tokenize import pos_tag, sent_tokenize, word_tokenize
File "C:\Users\dell\Anaconda3\envs\llm-universe\lib\site-packages\unstructured
\nlp\tokenize.py", line 48, in <module>
download_nltk_packages()
File "C:\Users\dell\Anaconda3\envs\llm-universe\lib\site-packages\unstructured
\nlp\tokenize.py", line 37, in download_nltk_packages
tokenizer_available = check_for_nltk_package(
File "C:\Users\dell\Anaconda3\envs\llm-universe\lib\site-packages\unstructured
\nlp\tokenize.py", line 24, in check_for_nltk_package
nltk.find(f"{package_category}/{package_name}", paths=paths)
File "C:\Users\dell\Anaconda3\envs\llm-universe\lib\site-packages\nltk\data.py
", line 551, in find
return find(modified_name, paths)
File "C:\Users\dell\Anaconda3\envs\llm-universe\lib\site-packages\nltk\data.py
", line 538, in find
return ZipFilePathPointer(p, zipentry)
File "C:\Users\dell\Anaconda3\envs\llm-universe\lib\site-packages\nltk\data.py
", line 391, in __init__
zipfile = OpenOnDemandZipFile(os.path.abspath(zipfile))
File "C:\Users\dell\Anaconda3\envs\llm-universe\lib\site-packages\nltk\data.py
", line 1020, in __init__
zipfile.ZipFile.__init__(self, filename)
File "C:\Users\dell\Anaconda3\envs\llm-universe\lib\zipfile.py", line 1272, in
__init__
self._RealGetContents()
File "C:\Users\dell\Anaconda3\envs\llm-universe\lib\zipfile.py", line 1339, in
_RealGetContents
raise BadZipFile("File is not a zip file")
zipfile.BadZipFile: File is not a zip file
可能与#183相关
Metadata
Metadata
Assignees
Labels
No labels