From 809fa0b89e7771c49d18309074512b6cfa0cf196 Mon Sep 17 00:00:00 2001 From: MalcolmVonMoJ Date: Wed, 27 Sep 2023 11:36:06 +0100 Subject: [PATCH 1/2] Added prison list to parser --- inc/OleeoFeedParser.php | 37 + inc/PrisonLocations.php | 1649 +++++++++++++++++++++++++++++++++++++++ run.php | 1 + 3 files changed, 1687 insertions(+) create mode 100644 inc/PrisonLocations.php diff --git a/inc/OleeoFeedParser.php b/inc/OleeoFeedParser.php index 828629e..1ebaca0 100644 --- a/inc/OleeoFeedParser.php +++ b/inc/OleeoFeedParser.php @@ -130,6 +130,33 @@ public function fixJobTitleTypos($title) { return $title; } + /** + * Extracts prison name from address or title and returns official name (help in PrisonLocations.php) + */ + public function getPrisonNames($title, $address) { + $address = implode(";",$address); //Convert array to string + $address = str_replace(" ", " ", $address); //Addresses seem to often have double spaces in them + $locations = getPrisonLocationData(); + $list = []; + foreach ($locations as $location) { + $name = $location["name"]; + if (strpos($title,$name) !== false) { + $list[] = $name; + } elseif (strripos($address,$name) !== false) { + $list[] = $name; + } else { + foreach ($location["name_variations"] as $alias) { + if (strpos($title,$alias) !== false) { + $list[] = $name; + } elseif (strripos($address,$alias) !== false) { + $list[] = $name; + } + } + } + } + return array_unique($list); + } + /** * Converts XML File to JSON FIle * @param string $sourceFile Source XML File to be parsed @@ -400,6 +427,11 @@ function validateOptionalFieldsbySpan($job, $jobContent){ } } + if (array_key_exists('addresses', $job)) { + $job_prison_names = $this->getPrisonNames($job['title'],$job['addresses']); + if (count($job_prison_names)) $job['prisonNames'] = $job_prison_names; + } + return $job; } @@ -424,6 +456,11 @@ function validateOptionalFieldsbyNewLine($job, $jobContent){ } } + if (array_key_exists('addresses', $job)) { + $job_prison_names = $this->getPrisonNames($job['title'],[]); + if (count($job_prison_names)) $job['prisonNames'] = $job_prison_names; + } + $fields = (string) $jobContent->div; $fieldsArray = explode("\n", $fields); diff --git a/inc/PrisonLocations.php b/inc/PrisonLocations.php new file mode 100644 index 0000000..4061758 --- /dev/null +++ b/inc/PrisonLocations.php @@ -0,0 +1,1649 @@ + Date: Mon, 2 Oct 2023 16:41:21 +0100 Subject: [PATCH 2/2] corrected if statements --- inc/OleeoFeedParser.php | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/inc/OleeoFeedParser.php b/inc/OleeoFeedParser.php index 1ebaca0..601c0f1 100644 --- a/inc/OleeoFeedParser.php +++ b/inc/OleeoFeedParser.php @@ -430,6 +430,9 @@ function validateOptionalFieldsbySpan($job, $jobContent){ if (array_key_exists('addresses', $job)) { $job_prison_names = $this->getPrisonNames($job['title'],$job['addresses']); if (count($job_prison_names)) $job['prisonNames'] = $job_prison_names; + } else { + $job_prison_names = $this->getPrisonNames($job['title'],[]); + if (count($job_prison_names)) $job['prisonNames'] = $job_prison_names; } return $job; @@ -456,10 +459,8 @@ function validateOptionalFieldsbyNewLine($job, $jobContent){ } } - if (array_key_exists('addresses', $job)) { - $job_prison_names = $this->getPrisonNames($job['title'],[]); - if (count($job_prison_names)) $job['prisonNames'] = $job_prison_names; - } + $job_prison_names = $this->getPrisonNames($job['title'],[]); + if (count($job_prison_names)) $job['prisonNames'] = $job_prison_names; $fields = (string) $jobContent->div;