Open

Description
Bridge request
Bridge for Nikkei Asia.
General information
-
Host URI for the bridge (i.e.
https://asia.nikkei.com
): -
Which information would you like to see?
Articles
-
How should the information be displayed/formatted?
-
Which of the following parameters do you expect?
- Title
- URI (link to the original article)
- Author
- Timestamp
- Content (the content of the article)
- Enclosures (pictures, videos, etc...)
- Categories (categories, tags, etc...)
Options
- Limit number of returned items
- Default limit: 20
- Load full articles
- Cache articles (articles are stored in a local cache on first request): yes
- Cache timeout (max = 24 hours): 1 hour
- Balance requests (RSS-Bridge uses cached versions to reduce bandwith usage)
- Timeout (default = 5 minutes, max = 24 hours): 5 minutes
Additional notes
- Fetch from rss feed
- Fetch article full content
- Remove ads
<?php
class NikkeiBridge extends BridgeAbstract
{
const NAME = 'Nikkei Bridge';
const URI = 'https://asia.nikkei.com';
const DESCRIPTION = 'Fetches the latest articles from the Nikkei Asia';
const MAINTAINER = 'notme';
const CACHE_TIMEOUT = 3600;
const MAX_CONTENTS = 20;
public function collectData()
{
$rssFeedUrl = 'https://asia.nikkei.com/rss/feed/nar';
$rssContent = file_get_contents($rssFeedUrl);
if (!$rssContent) {
returnServerError('Could not request ' . $rssFeedUrl);
}
$rss = simplexml_load_string($rssContent);
if (!$rss) {
returnServerError('Could not parse RSS feed from ' . $rssFeedUrl);
}
$count = 0;
foreach ($rss->item as $element) {
if ($count >= self::MAX_CONTENTS) {
break;
}
$count++;
$item = [];
$item['title'] = (string)$element->title;
$item['uri'] = (string)$element->link;
$item['timestamp'] = strtotime((string)$element->pubDate);
// Fetch the article content
$articleContent = $this->fetchArticleContent($item['uri']);
if ($articleContent) {
$item['content'] = $articleContent;
} else {
$item['content'] = 'Content could not be retrieved';
}
$this->items[] = $item;
}
}
private function fetchArticleContent($url)
{
// Extract the path from the URL
$urlComponents = parse_url($url);
$path = $urlComponents['path'];
// Base64 encode the path
$encodedPath = base64_encode($path);
// Create the API URL
$apiUrl = 'https://asia.nikkei.com/__service/v1/piano/article_access/' . $encodedPath;
// Fetch the JSON content from the API
$apiResponse = file_get_contents($apiUrl);
if (!$apiResponse) {
error_log('Could not request ' . $apiUrl);
return null;
}
$apiResponseData = json_decode($apiResponse, true);
if (!isset($apiResponseData['body'])) {
error_log('Invalid API response for ' . $apiUrl);
return null;
}
// Load the HTML content
$htmlContent = $apiResponseData['body'];
// Remove elements with class o-ads
$dom = new DOMDocument;
libxml_use_internal_errors(true);
$dom->loadHTML($htmlContent);
libxml_clear_errors();
$xpath = new DOMXPath($dom);
foreach ($xpath->query('//*[contains(@class, "o-ads")]') as $adsNode) {
$adsNode->parentNode->removeChild($adsNode);
}
// Save the cleaned HTML content
$cleanedHtml = $dom->saveHTML();
return $cleanedHtml;
}
}