Skip to content
This repository has been archived by the owner. It is now read-only.

Commit 1d9702b

Browse files
authored
Add more logging to know what is happening d… (#9)
Add more logging to know what is happening during execution
1 parent 98e00bd commit 1d9702b

12 files changed

+65
-9
lines changed

Diff for: src/Scraper/Application/Configurator.php

+17-3
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,10 @@ public function configureFromDataset($scrapedDataset): Collection
5656
$type = $scrapedDataset[0]['type'];
5757
$currentConfiguration = $this->configuration->findByType($type);
5858

59-
$result = [];
60-
foreach ($scrapedDataset as $scrapedData) {
59+
$result = [];
60+
$totalDatasets = count($scrapedDataset);
61+
foreach ($scrapedDataset as $key => $scrapedData) {
62+
Log::info("Finding config {$key}/{$totalDatasets}");
6163
if ($crawler = $this->getCrawler($scrapedData)) {
6264
$result[] = $this->findConfigByScrapedData($scrapedData, $crawler, $currentConfiguration);
6365
}
@@ -72,10 +74,15 @@ public function configureFromDataset($scrapedDataset): Collection
7274

7375
private function getCrawler($scrapedData)
7476
{
77+
Log::info("Request {$scrapedData['url']}");
7578
$crawler = $this->client->request('GET', $scrapedData['url']);
7679

7780
$httpCode = $this->client->getInternalResponse()->getStatus();
7881
if ($httpCode !== 200) {
82+
Log::notice(
83+
"Response status ({$httpCode}) invalid, so proceeding to delete the scraped data.",
84+
compact('scrapedData')
85+
);
7986
$scrapedData->delete();
8087

8188
return null;
@@ -101,18 +108,21 @@ private function findConfigByScrapedData($scrapedData, $crawler, $currentConfigu
101108

102109
foreach ($scrapedData['data'] as $field => $value) {
103110
try {
111+
Log::info("Searching xpath for field {$field}");
104112
$result[$field] = $this->getOldXpath($currentConfiguration, $field, $crawler);
105113
if (!$result[$field]) {
114+
Log::debug('Trying to find a new xpath.');
106115
$result[$field] = $this->xpathBuilder->find(
107116
$crawler->getNode(0),
108117
$value
109118
);
110119
}
111120
$this->variantGenerator->addConfig($field, $result[$field]);
121+
Log::info('Added found xpath to the config');
112122
} catch (\UnexpectedValueException $e) {
113123
$this->variantGenerator->fieldNotFound();
114124
$value = is_array($value) ? json_encode($value) : $value;
115-
Log::warning("Field '{$field}' with value '{$value}' not found for '{$crawler->getUri()}'.");
125+
Log::notice("Field '{$field}' with value '{$value}' not found for '{$crawler->getUri()}'.");
116126
}
117127
}
118128

@@ -130,14 +140,18 @@ private function findConfigByScrapedData($scrapedData, $crawler, $currentConfigu
130140

131141
private function getOldXpath($currentConfiguration, $field, $crawler)
132142
{
143+
Log::debug('Checking old Xpaths');
133144
$config = $currentConfiguration->firstWhere('name', $field);
134145
foreach ($config['xpaths'] ?? [] as $xpath) {
146+
Log::debug("Checking xpath {$xpath}");
135147
$isFound = $crawler->filterXPath($xpath)->count();
136148
if ($isFound) {
137149
return $xpath;
138150
}
139151
}
140152

153+
Log::debug('Old xpath not found');
154+
141155
return false;
142156
}
143157

Diff for: src/Scraper/Application/XpathBuilder.php

+11-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
namespace Softonic\LaravelIntelligentScraper\Scraper\Application;
44

5+
use Illuminate\Support\Facades\Log;
6+
57
class XpathBuilder
68
{
79
/**
@@ -25,6 +27,8 @@ public function find($documentElement, $values)
2527
? [$values]
2628
: $values;
2729

30+
Log::debug('Trying to find a xpath for the given values', compact('values'));
31+
2832
$nodes = [];
2933
foreach ($values as $value) {
3034
$nodes[] = $this->findNode($documentElement, $value);
@@ -108,14 +112,20 @@ private function getNodeWithValue($nodes, $isFoundCallback)
108112

109113
private function getXPath(array $nodes)
110114
{
115+
Log::debug('Calculating xpath for the given nodes.');
111116
$elements = [];
112117
foreach ($nodes as $node) {
113118
$elements[] = $this->optimizeElements($node, $this->getPathElements($node));
114119
}
115120

121+
Log::debug('Getting common elements between xpaths.');
116122
$finalElements = (count($elements) > 1) ? $this->getCommonElements($elements) : $elements[0];
117123

118-
return implode('/', array_reverse($finalElements));
124+
Log::debug('Getting common elements between xpaths.');
125+
$finalXpath = implode('/', array_reverse($finalElements));
126+
Log::debug("Xpath generated: {$finalXpath}.");
127+
128+
return $finalXpath;
119129
}
120130

121131
private function optimizeElements($node, $elements, $childNode = null, $index = 0)

Diff for: src/Scraper/Application/XpathFinder.php

+9
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
namespace Softonic\LaravelIntelligentScraper\Scraper\Application;
44

55
use Goutte\Client as GoutteClient;
6+
use Illuminate\Support\Facades\Log;
67
use Softonic\LaravelIntelligentScraper\Scraper\Exceptions\MissingXpathValueException;
78

89
class XpathFinder
@@ -25,19 +26,25 @@ public function __construct(GoutteClient $client, VariantGenerator $variantGener
2526

2627
public function extract(string $url, $configs): array
2728
{
29+
Log::info("Requesting $url");
2830
$crawler = $this->client->request('GET', $url);
2931
$httpCode = $this->client->getInternalResponse()->getStatus();
3032
if ($httpCode !== 200) {
33+
Log::info('Invalid response http status', ['status' => $httpCode]);
3134
throw new \UnexpectedValueException("Response error from '{$url}' with '{$httpCode}' http code");
3235
}
3336

37+
Log::info('Response Received. Starting crawler.');
3438
$result = [];
3539
foreach ($configs as $config) {
40+
Log::info("Searching field {$config['name']}.");
3641
$subcrawler = collect();
3742
foreach ($config['xpaths'] as $xpath) {
43+
Log::debug("Checking xpath {$xpath}");
3844
$subcrawler = $crawler->filterXPath($xpath);
3945

4046
if ($subcrawler->count()) {
47+
Log::debug("Found xpath {$xpath}");
4148
$this->variantGenerator->addConfig($config['name'], $xpath);
4249
break;
4350
}
@@ -55,7 +62,9 @@ public function extract(string $url, $configs): array
5562
});
5663
}
5764

65+
Log::info('Calculating variant.');
5866
$result['variant'] = $this->variantGenerator->getId($config['type']);
67+
Log::info('Variant calculated.');
5968

6069
return $result;
6170
}

Diff for: src/Scraper/Listeners/ConfigureScraper.php

+2-2
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ public function handle(InvalidConfiguration $invalidConfiguration)
5555
$this->extractData($scrapeRequest, $config);
5656
$config->map->save();
5757
} catch (MissingXpathValueException $e) {
58-
$this->logger->warning(
58+
$this->logger->notice(
5959
"Configuration not available for '$scrapeRequest->url' and type '$scrapeRequest->type', error: {$e->getMessage()}."
6060
);
6161
event(new ScrapeFailed($invalidConfiguration->scrapeRequest));
@@ -72,7 +72,7 @@ public function handle(InvalidConfiguration $invalidConfiguration)
7272
*/
7373
private function extractData(ScrapeRequest $scrapeRequest, $config): void
7474
{
75-
$this->logger->debug("Extracting data from $scrapeRequest->url for type '$scrapeRequest->type'");
75+
$this->logger->info("Extracting data from $scrapeRequest->url for type '$scrapeRequest->type'");
7676

7777
list('data' => $data, 'variant' => $variant) = $this->xpathFinder->extract($scrapeRequest->url, $config);
7878
event(new Scraped($scrapeRequest, $data, $variant));

Diff for: src/Scraper/Listeners/Scrape.php

+3-3
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ public function handle(ScrapeRequest $scrapeRequest)
4444
$config = $this->loadConfiguration($scrapeRequest);
4545
$this->extractData($scrapeRequest, $config);
4646
} catch (MissingXpathValueException $e) {
47-
$this->logger->warning(
47+
$this->logger->notice(
4848
"Invalid Configuration for '$scrapeRequest->url' and type '$scrapeRequest->type', error: {$e->getMessage()}."
4949
);
5050

@@ -59,7 +59,7 @@ public function handle(ScrapeRequest $scrapeRequest)
5959
*/
6060
private function loadConfiguration(ScrapeRequest $scrapeRequest)
6161
{
62-
$this->logger->debug("Loading scrapping configuration for type '$scrapeRequest->type'");
62+
$this->logger->info("Loading scrapping configuration for type '$scrapeRequest->type'");
6363

6464
$config = $this->configuration->findByType($scrapeRequest->type);
6565
if ($config->isEmpty()) {
@@ -75,7 +75,7 @@ private function loadConfiguration(ScrapeRequest $scrapeRequest)
7575
*/
7676
private function extractData(ScrapeRequest $scrapeRequest, $config): void
7777
{
78-
$this->logger->debug("Extracting data from $scrapeRequest->url for type '$scrapeRequest->type'");
78+
$this->logger->info("Extracting data from $scrapeRequest->url for type '$scrapeRequest->type'");
7979

8080
list('data' => $data, 'variant' => $variant) = $this->xpathFinder->extract($scrapeRequest->url, $config);
8181
event(new Scraped($scrapeRequest, $data, $variant));

Diff for: src/Scraper/Listeners/UpdateDataset.php

+3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
namespace Softonic\LaravelIntelligentScraper\Scraper\Listeners;
44

55
use Illuminate\Contracts\Queue\ShouldQueue;
6+
use Illuminate\Support\Facades\Log;
67
use Softonic\LaravelIntelligentScraper\Scraper\Events\Scraped;
78
use Softonic\LaravelIntelligentScraper\Scraper\Models\ScrapedDataset;
89

@@ -23,6 +24,7 @@ public function handle(Scraped $event)
2324

2425
private function addDataset(Scraped $event)
2526
{
27+
Log::info('Adding new information to dataset', ['request' => $event->scrapeRequest]);
2628
$scraperDatasets = ScrapedDataset::withType($event->scrapeRequest->type)
2729
->withVariant($event->variant);
2830

@@ -42,6 +44,7 @@ private function addDataset(Scraped $event)
4244

4345
private function updateDataset(ScrapedDataset $dataset, Scraped $event)
4446
{
47+
Log::info('Updating new information to dataset', ['request' => $event->scrapeRequest]);
4548
$dataset->data = $event->data;
4649

4750
$dataset->save();

Diff for: tests/Unit/Scraper/Application/ConfiguratorTest.php

+2
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ public function setUp()
5151
$this->configuration = \Mockery::mock(Configuration::class);
5252
$this->variantGenerator = \Mockery::mock(VariantGenerator::class);
5353

54+
Log::spy();
55+
5456
$this->configurator = new Configurator(
5557
$this->client,
5658
$this->xpathBuilder,

Diff for: tests/Unit/Scraper/Application/XpathBuilderTest.php

+3
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
namespace Softonic\LaravelIntelligentScraper\Scraper\Application;
44

5+
use Illuminate\Support\Facades\Log;
56
use Tests\TestCase;
67

78
class XpathBuilderTest extends TestCase
@@ -23,6 +24,8 @@ public function setUp()
2324
$dom = new \DOMDocument('1.0', 'UTF-8');
2425
$dom->loadHTML($this->getHtml());
2526

27+
Log::spy();
28+
2629
$this->domElement = $dom->documentElement;
2730
$this->xpathBuilder = new XpathBuilder('/^random-.*$/');
2831
}

Diff for: tests/Unit/Scraper/Application/XpathFinderTest.php

+8
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
use Goutte\Client;
66
use Illuminate\Foundation\Testing\DatabaseMigrations;
7+
use Illuminate\Support\Facades\Log;
78
use Softonic\LaravelIntelligentScraper\Scraper\Exceptions\MissingXpathValueException;
89
use Softonic\LaravelIntelligentScraper\Scraper\Models\Configuration;
910
use Tests\TestCase;
@@ -12,6 +13,13 @@ class XpathFinderTest extends TestCase
1213
{
1314
use DatabaseMigrations;
1415

16+
public function setUp()
17+
{
18+
parent::setUp();
19+
20+
Log::spy();
21+
}
22+
1523
/**
1624
* @test
1725
*/

Diff for: tests/Unit/Scraper/Listeners/ConfigureScraperTest.php

+2
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ public function setUp()
4444
{
4545
parent::setUp();
4646

47+
Log::spy();
48+
4749
$this->config = \Mockery::mock(Configuration::class);
4850
$this->xpathFinder = \Mockery::mock(XpathFinder::class);
4951
$this->url = 'http://test.c/123456';

Diff for: tests/Unit/Scraper/Listeners/ScrapeTest.php

+2
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ public function setUp()
4747
{
4848
parent::setUp();
4949

50+
Log::spy();
51+
5052
$this->config = \Mockery::mock(Configuration::class);
5153
$this->xpathFinder = \Mockery::mock(XpathFinder::class);
5254
$this->url = 'http://test.c/123456';

Diff for: tests/Unit/Scraper/Listeners/UpdateDatasetTest.php

+3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
namespace Softonic\LaravelIntelligentScraper\Scraper\Listeners;
44

55
use Illuminate\Foundation\Testing\DatabaseMigrations;
6+
use Illuminate\Support\Facades\Log;
67
use Softonic\LaravelIntelligentScraper\Scraper\Events\Scraped;
78
use Softonic\LaravelIntelligentScraper\Scraper\Events\ScrapeRequest;
89
use Softonic\LaravelIntelligentScraper\Scraper\Models\ScrapedDataset;
@@ -20,6 +21,8 @@ public function setUp()
2021
{
2122
parent::setUp();
2223

24+
Log::spy();
25+
2326
$this->updateDataset = new UpdateDataset();
2427
}
2528

0 commit comments

Comments
 (0)