diff --git a/ecommerce-scraper/.env.example b/ecommerce-scraper/.env.example new file mode 100644 index 00000000..0c89a934 --- /dev/null +++ b/ecommerce-scraper/.env.example @@ -0,0 +1,2 @@ +OPENAI_API_KEY="your_openai_api_key" +SCRAPEGRAPH_API_KEY="your_scrapegraph_api_key" \ No newline at end of file diff --git a/ecommerce-scraper/README.md b/ecommerce-scraper/README.md new file mode 100644 index 00000000..5629e276 --- /dev/null +++ b/ecommerce-scraper/README.md @@ -0,0 +1,68 @@ +# E-commerce Scraper + +A Python-based web scraping tool built with CrewAI and ScrapegraphAI ([Scrapegraph](https://scrapegraph.ai/)) that extracts product information from e-commerce websites. Currently configured to scrape keyboard listings from eBay Italy. + +## Features + +- Automated web scraping using CrewAI agents +- Integration with Scrapegraph for reliable data extraction +- Configurable for different product searches +- Environment-based configuration for API keys + +## Prerequisites + +- Python 3.8 or higher +- OpenAI API key +- Scrapegraph API key + +## Installation + +1. Clone the repository: + ```bash + git clone + cd ecommerce-scraper + ``` + +2. Install the required dependencies: + ```bash + pip install crewai crewai-tools python-dotenv + ``` + +3. Set up environment variables: + - Copy `.env.example` to `.env` + - Add your API keys to the `.env` file: + ```plaintext + OPENAI_API_KEY="your_openai_api_key" + SCRAPEGRAPH_API_KEY="your_scrapegraph_api_key" + ``` + +## Usage + +Run the scraper: +```bash +python ecommerce_scraper.py +``` + +The script will: +1. Connect to eBay Italy +2. Search for keyboards +3. Extract product information +4. Output the results + +## Customization + +To scrape different products or websites, modify the `website` variable in `ecommerce_scraper.py`: + +```python +website = "https://www.ebay.it/sch/i.html?_from=R40&_trksid=m570.l1313&_nkw=your_search_term&_sacat=0" +``` + +Replace `your_search_term` with the product you want to search for. + +## License + +[Add your chosen license here] + +## Contributing + +Contributions are welcome! Please feel free to submit a Pull Request. \ No newline at end of file diff --git a/ecommerce-scraper/ecommerce_scraper.py b/ecommerce-scraper/ecommerce_scraper.py new file mode 100644 index 00000000..d96e1fab --- /dev/null +++ b/ecommerce-scraper/ecommerce_scraper.py @@ -0,0 +1,30 @@ +from crewai import Agent, Crew, Process, Task + +from crewai_tools import ScrapegraphScrapeTool +from dotenv import load_dotenv + +load_dotenv() + +website = "https://www.ebay.it/sch/i.html?_from=R40&_trksid=m570.l1313&_nkw=keyboard&_sacat=0" +tool = ScrapegraphScrapeTool() + +agent = Agent( + role="Web Researcher", + goal="Research and extract accurate information from websites", + backstory="You are an expert web researcher with experience in extracting and analyzing information from various websites.", + tools=[tool], +) + +task = Task( + name="scraping task", + description=f"Visit the website {website} and extract detailed information about all the keyboards available.", + expected_output="A file with the informations extracted from the website.", + agent=agent, +) + +crew = Crew( + agents=[agent], + tasks=[task], +) + +crew.kickoff() \ No newline at end of file