-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathbooking-hotel-scraping.js
More file actions
145 lines (119 loc) · 5.01 KB
/
booking-hotel-scraping.js
File metadata and controls
145 lines (119 loc) · 5.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import { chromium } from 'playwright';
// Configuration
const BOOKING_URL = "https://www.booking.com/";
/**
* STEP 1: Configure your Bright Data scraping browser endpoint
* - Get endpoint from: https://brightdata.com/cp/zones
* - Create new scraping browser: https://docs.brightdata.com/scraping-automation/scraping-browser/quickstart
* - Websocket format: wss://brd-customer-[id]-zone-[zone]:[password]@[domain]:[port]
*/
const BROWSER_WS = process.env.BRIGHT_DATA_SCRAPING_BROWSER_WEBSOCKET_ENDPOINT || "YOUR_BRIGHT_DATA_SCRAPING_BROWSER_WEBSOCKET_ENDPOINT";
// STEP 2: Run `node booking-hotel-scraping.js` commend on terminal
// Search parameters
const SEARCH_LOCATION = "New York";
const CHECK_IN_DAYS_FROM_NOW = 1; // Check-in tomorrow
const CHECK_OUT_DAYS_FROM_NOW = 2; // Check-out day after tomorrow
// Helper function to add days to a date
function addDays(date, days) {
const result = new Date(date);
result.setDate(result.getDate() + days);
return result;
}
// Helper function to format date for Booking.com
function formatDate(date) {
return date.toISOString().split('T')[0];
}
// Calculate check-in and check-out dates
const today = new Date();
const checkInDate = formatDate(addDays(today, CHECK_IN_DAYS_FROM_NOW));
const checkOutDate = formatDate(addDays(today, CHECK_OUT_DAYS_FROM_NOW));
// Main function to run the hotel search
async function searchHotels() {
console.log("🔍 Starting hotel search process...");
console.log(`📍 Searching for hotels in: ${SEARCH_LOCATION}`);
console.log(`📅 Check-in date: ${checkInDate}`);
console.log(`📅 Check-out date: ${checkOutDate}`);
// Connect to browser
console.log("🌐 Connecting to browser...");
const browser = await chromium.connectOverCDP(BROWSER_WS);
console.log("✅ Successfully connected to browser");
// Create a new context and page
const context = await browser.newContext();
const page = await context.newPage();
// Open Booking.com
console.log("🌐 Opening Booking.com...");
await page.goto(BOOKING_URL, { waitUntil: "domcontentloaded", timeout: 60000 });
console.log("✅ Successfully loaded Booking.com");
// Handle popup if it appears
await handlePopup(page);
// Fill search form and submit
console.log("📝 Filling search form...");
await fillSearchForm(page);
console.log("✅ Search form submitted successfully");
// Get and display results
console.log("🔍 Searching for available hotels...");
const results = await getHotelResults(page);
// Display results in a table
console.log("\n📊 Search Results:");
console.log("==================");
// Format results for table display
const tableData = results.map((hotel, index) => ({
'#': index + 1,
'Hotel Name': hotel.name,
'Price': hotel.price,
'Rating': hotel.rating
}));
// Display the table
console.table(tableData);
console.log(`\n✅ Found ${results.length} hotels`);
// Close browser
console.log("👋 Closing browser...");
await browser.close();
console.log("✅ Browser closed successfully");
}
// Handle the sign-in popup if it appears
async function handlePopup(page) {
try {
console.log("⚠️ Checking for popup...");
const closeButton = await page.waitForSelector('[aria-label="Dismiss sign-in info."]', { timeout: 25000 });
await closeButton.click();
console.log("✅ Popup closed successfully");
} catch (e) {
console.log("ℹ️ No popup appeared - continuing with search");
}
}
// Fill and submit the search form
async function fillSearchForm(page) {
// Fill location
console.log("📍 Entering search location...");
await page.waitForSelector('[data-testid="destination-container"] input');
await page.fill('[data-testid="destination-container"] input', SEARCH_LOCATION);
console.log("✅ Location entered successfully");
// Select dates
console.log("📅 Selecting dates...");
await page.click('[data-testid="searchbox-dates-container"]');
await page.waitForSelector('[data-testid="searchbox-datepicker-calendar"]');
await page.click(`[data-date="${checkInDate}"]`);
await page.click(`[data-date="${checkOutDate}"]`);
console.log("✅ Dates selected successfully");
// Submit search
console.log("🔍 Submitting search...");
// In Playwright, we can use waitForNavigation with a Promise
const navigationPromise = page.waitForNavigation({ waitUntil: 'domcontentloaded' });
await page.click('button[type="submit"]');
await navigationPromise;
console.log("✅ Search submitted successfully");
}
// Extract hotel information from search results
async function getHotelResults(page) {
console.log("🏨 Extracting hotel information...");
return await page.$$eval('[data-testid="property-card"]', cards =>
cards.map(card => ({
name: card.querySelector('[data-testid="title"]')?.innerText || 'N/A',
price: card.querySelector('[data-testid="price-and-discounted-price"]')?.innerText || 'N/A',
rating: card.querySelector('[data-testid="review-score"]')?.innerText || 'N/A'
}))
);
}
// Start the search
searchHotels();