from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from parsera.engine.model import HuggingFaceModel
from parsera import Parsera
Define the URL and elements to scrape
url = "https://news.ycombinator.com/"
elements = {
"Title": "News title",
"Points": "Number of points",
"Comments": "Number of comments",
}
Initialize model with transformers pipeline
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-128k-instruct", trust_remote_code=True)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=5000)
Initialize HuggingFaceModel
llm = HuggingFaceModel(pipeline=pipe)
Scrapper with HuggingFace model
scrapper = Parsera(model=llm)
result = scrapper.run(url=url, elements=elements)
OSError: [WinError 126] The specified module could not be found. Error loading "C:\apps\python\lib\site-packages\torch\lib\fbgemm.dll"
tried to run the script but getting an torch related error:-