Skip to content
This repository has been archived by the owner. It is now read-only.

Commit 1242893

Browse files
authored
Feature/fix configurator (#6)
Allow to reuse the old config when re-configuring
1 parent 30430cd commit 1242893

File tree

2 files changed

+232
-83
lines changed

2 files changed

+232
-83
lines changed

Diff for: src/Scraper/Application/Configurator.php

+41-11
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,21 @@ class Configurator
2727
*/
2828
private $variantGenerator;
2929

30-
public function __construct(Client $client, XpathBuilder $xpathBuilder, VariantGenerator $variantGenerator)
31-
{
30+
/**
31+
* @var \Softonic\LaravelIntelligentScraper\Scraper\Repositories\Configuration
32+
*/
33+
private $configuration;
34+
35+
public function __construct(
36+
Client $client,
37+
XpathBuilder $xpathBuilder,
38+
\Softonic\LaravelIntelligentScraper\Scraper\Repositories\Configuration $configuration,
39+
VariantGenerator $variantGenerator
40+
) {
3241
$this->client = $client;
3342
$this->xpathBuilder = $xpathBuilder;
3443
$this->variantGenerator = $variantGenerator;
44+
$this->configuration = $configuration;
3545
}
3646

3747
/**
@@ -41,14 +51,17 @@ public function __construct(Client $client, XpathBuilder $xpathBuilder, VariantG
4151
*/
4252
public function configureFromDataset($scrapedDataset): Collection
4353
{
54+
$type = $scrapedDataset[0]['type'];
55+
$currentConfiguration = $this->configuration->findByType($type);
56+
4457
$result = [];
4558
foreach ($scrapedDataset as $scrapedData) {
4659
if ($crawler = $this->getCrawler($scrapedData)) {
47-
$result[] = $this->findConfigByScrapedData($scrapedData, $crawler);
60+
$result[] = $this->findConfigByScrapedData($scrapedData, $crawler, $currentConfiguration);
4861
}
4962
}
5063

51-
$finalConfig = $this->mergeConfiguration($result, $scrapedDataset[0]['type']);
64+
$finalConfig = $this->mergeConfiguration($result, $type);
5265

5366
$this->checkConfiguration($scrapedDataset[0]['data'], $finalConfig);
5467

@@ -74,21 +87,25 @@ private function getCrawler($scrapedData)
7487
*
7588
* If the data is not valid anymore, it is deleted from dataset.
7689
*
77-
* @param ScrapedDataset $scrapedData
78-
* @param Crawler $crawler
90+
* @param ScrapedDataset $scrapedData
91+
* @param Crawler $crawler
92+
* @param Configuration[] $currentConfiguration
7993
*
8094
* @return array
8195
*/
82-
private function findConfigByScrapedData($scrapedData, $crawler)
96+
private function findConfigByScrapedData($scrapedData, $crawler, $currentConfiguration)
8397
{
8498
$result = [];
8599

86100
foreach ($scrapedData['data'] as $field => $value) {
87101
try {
88-
$result[$field] = $this->xpathBuilder->find(
89-
$crawler->getNode(0),
90-
$value
91-
);
102+
$result[$field] = $this->getOldXpath($currentConfiguration, $field, $crawler);
103+
if (!$result[$field]) {
104+
$result[$field] = $this->xpathBuilder->find(
105+
$crawler->getNode(0),
106+
$value
107+
);
108+
}
92109
$this->variantGenerator->addConfig($field, $result[$field]);
93110
} catch (\UnexpectedValueException $e) {
94111
$this->variantGenerator->fieldNotFound();
@@ -102,6 +119,19 @@ private function findConfigByScrapedData($scrapedData, $crawler)
102119
return $result;
103120
}
104121

122+
private function getOldXpath($currentConfiguration, $field, $crawler)
123+
{
124+
$config = $currentConfiguration->firstWhere('name', $field);
125+
foreach ($config['xpaths'] ?? [] as $xpath) {
126+
$isFound = $crawler->filterXPath($xpath)->count();
127+
if ($isFound) {
128+
return $xpath;
129+
}
130+
}
131+
132+
return false;
133+
}
134+
105135
/**
106136
* Merge configuration.
107137
*

0 commit comments

Comments
 (0)