Skip to content

Commit b45847e

Browse files
committed
Merge branch 'fix_abstract_submission-3_5_0-i1182' into 'main'
Format abstracts and biographies for Thoth (3.5.0) See merge request softwares-pkp/plugins_ojs/thoth-omp-plugin!109
2 parents 91f72da + 7be2f1c commit b45847e

6 files changed

Lines changed: 371 additions & 28 deletions

File tree

classes/factories/ThothAbstractFactory.php

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
namespace APP\plugins\generic\thoth\classes\factories;
1818

19+
use APP\plugins\generic\thoth\classes\formatters\ThothMarkupFormatter;
1920
use APP\plugins\generic\thoth\classes\i18n\ThothLocaleCode;
2021
use ThothApi\GraphQL\Models\AbstractText as ThothAbstract;
2122

@@ -35,6 +36,7 @@ private function create($entity, string $workId, ?string $preferredLocale = null
3536
{
3637
$canonicalLocale = $this->getCanonicalLocale($entity, $preferredLocale);
3738
$abstracts = $this->getLocalizedValues($entity, 'abstract', $canonicalLocale);
39+
$markupFormatter = new ThothMarkupFormatter();
3840
$thothAbstracts = [];
3941

4042
foreach ($abstracts as $locale => $abstract) {
@@ -47,7 +49,7 @@ private function create($entity, string $workId, ?string $preferredLocale = null
4749
$thothAbstracts[$this->getLocaleKey($localeCode)] = new ThothAbstract([
4850
'workId' => $workId,
4951
'localeCode' => $localeCode,
50-
'content' => $this->wrapInParagraph($abstract),
52+
'content' => $markupFormatter->format($abstract),
5153
'canonical' => $locale === $canonicalLocale,
5254
'abstractType' => 'LONG',
5355
]);
@@ -109,14 +111,4 @@ private function logUnsupportedLocale(string $entityType, ?string $locale): void
109111
$normalizedLocaleCode
110112
));
111113
}
112-
113-
private function wrapInParagraph(string $content): string
114-
{
115-
$content = trim($content);
116-
if (preg_match('/^<p\b[^>]*>.*<\/p>$/is', $content) === 1) {
117-
return $content;
118-
}
119-
120-
return sprintf('<p>%s</p>', $content);
121-
}
122114
}

classes/factories/ThothBiographyFactory.php

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
namespace APP\plugins\generic\thoth\classes\factories;
1818

19+
use APP\plugins\generic\thoth\classes\formatters\ThothMarkupFormatter;
1920
use APP\plugins\generic\thoth\classes\i18n\ThothLocaleCode;
2021
use ThothApi\GraphQL\Models\Biography as ThothBiography;
2122

@@ -25,6 +26,7 @@ public function createFromAuthor($author, string $contributionId, ?string $prefe
2526
{
2627
$canonicalLocale = $this->getCanonicalLocale($author, $preferredLocale);
2728
$biographies = $this->getLocalizedValues($author, 'biography', $canonicalLocale);
29+
$markupFormatter = new ThothMarkupFormatter();
2830
$thothBiographies = [];
2931

3032
foreach ($biographies as $locale => $biography) {
@@ -37,7 +39,7 @@ public function createFromAuthor($author, string $contributionId, ?string $prefe
3739
$thothBiographies[$this->getLocaleKey($localeCode)] = new ThothBiography([
3840
'contributionId' => $contributionId,
3941
'localeCode' => $localeCode,
40-
'content' => $this->wrapInParagraph($biography),
42+
'content' => $markupFormatter->format($biography),
4143
'canonical' => $locale === $canonicalLocale,
4244
]);
4345
}
@@ -98,14 +100,4 @@ private function logUnsupportedLocale(string $entityType, ?string $locale): void
98100
$normalizedLocaleCode
99101
));
100102
}
101-
102-
private function wrapInParagraph(string $content): string
103-
{
104-
$content = trim($content);
105-
if (preg_match('/^<p\b[^>]*>.*<\/p>$/is', $content) === 1) {
106-
return $content;
107-
}
108-
109-
return sprintf('<p>%s</p>', $content);
110-
}
111103
}
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
<?php
2+
3+
/**
4+
* @file plugins/generic/thoth/classes/formatters/ThothMarkupFormatter.php
5+
*
6+
* Copyright (c) 2024-2026 Lepidus Tecnologia
7+
* Copyright (c) 2024-2026 Thoth
8+
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
9+
*
10+
* @class ThothMarkupFormatter
11+
*
12+
* @ingroup plugins_generic_thoth
13+
*
14+
* @brief Formats HTML markup for Thoth text fields
15+
*/
16+
17+
namespace APP\plugins\generic\thoth\classes\formatters;
18+
19+
class ThothMarkupFormatter
20+
{
21+
public function format(string $content): string
22+
{
23+
$content = trim($content);
24+
if (!$this->needsStructuralFormatting($content)) {
25+
return $content;
26+
}
27+
28+
$document = new \DOMDocument('1.0', 'UTF-8');
29+
$previousUseInternalErrors = libxml_use_internal_errors(true);
30+
$loaded = $document->loadHTML(
31+
'<?xml encoding="UTF-8"><div>' . $content . '</div>',
32+
LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD
33+
);
34+
libxml_clear_errors();
35+
libxml_use_internal_errors($previousUseInternalErrors);
36+
37+
if (!$loaded) {
38+
return $content;
39+
}
40+
41+
$wrapper = $this->getValueWrapper($document) ?? $document->getElementsByTagName('div')->item(0);
42+
if ($wrapper === null) {
43+
return $content;
44+
}
45+
46+
$blocks = [];
47+
$inlineContent = '';
48+
foreach (iterator_to_array($wrapper->childNodes) as $node) {
49+
$this->appendMarkupNode($document, $node, $blocks, $inlineContent);
50+
}
51+
$this->flushParagraph($blocks, $inlineContent);
52+
53+
return $this->removeBreaks(implode('', $blocks));
54+
}
55+
56+
private function needsStructuralFormatting(string $content): bool
57+
{
58+
return preg_match('/<div\b[^>]*class=["\'][^"\']*\bvalue\b/i', $content) === 1
59+
|| preg_match('/<br\b/i', $content) === 1
60+
|| preg_match('/<\/?(ul|ol)\b/i', $content) === 1;
61+
}
62+
63+
private function getValueWrapper(\DOMDocument $document): ?\DOMElement
64+
{
65+
$xpath = new \DOMXPath($document);
66+
$nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " value ")]');
67+
$node = $nodes !== false ? $nodes->item(0) : null;
68+
69+
return $node instanceof \DOMElement ? $node : null;
70+
}
71+
72+
private function appendMarkupNode(
73+
\DOMDocument $document,
74+
\DOMNode $node,
75+
array &$blocks,
76+
string &$inlineContent
77+
): void {
78+
if ($node instanceof \DOMElement) {
79+
$tagName = strtolower($node->tagName);
80+
81+
if ($tagName === 'br') {
82+
$this->flushParagraph($blocks, $inlineContent);
83+
return;
84+
}
85+
86+
if ($tagName === 'p') {
87+
$this->flushParagraph($blocks, $inlineContent);
88+
$this->appendParagraphNode($document, $node, $blocks);
89+
return;
90+
}
91+
92+
if (in_array($tagName, ['ul', 'ol'], true)) {
93+
$this->flushParagraph($blocks, $inlineContent);
94+
$blocks[] = trim($document->saveHTML($node));
95+
return;
96+
}
97+
}
98+
99+
$inlineContent .= $document->saveHTML($node);
100+
}
101+
102+
private function appendParagraphNode(\DOMDocument $document, \DOMElement $paragraph, array &$blocks): void
103+
{
104+
$inlineContent = '';
105+
foreach (iterator_to_array($paragraph->childNodes) as $node) {
106+
$this->appendMarkupNode($document, $node, $blocks, $inlineContent);
107+
}
108+
$this->flushParagraph($blocks, $inlineContent);
109+
}
110+
111+
private function flushParagraph(array &$blocks, string &$inlineContent): void
112+
{
113+
$content = trim($inlineContent);
114+
if ($content !== '') {
115+
$blocks[] = sprintf('<p>%s</p>', $content);
116+
}
117+
118+
$inlineContent = '';
119+
}
120+
121+
private function removeBreaks(string $content): string
122+
{
123+
return preg_replace('/<br\b[^>]*>/i', ' ', $content) ?? $content;
124+
}
125+
}

tests/classes/factories/ThothAbstractFactoryTest.php

Lines changed: 119 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@
55
use APP\plugins\generic\thoth\classes\factories\ThothAbstractFactory;
66
use PKP\tests\PKPTestCase;
77

8+
require_once(__DIR__ . '/../../../vendor/autoload.php');
9+
810
class ThothAbstractFactoryTest extends PKPTestCase
911
{
10-
public function testCreateFromPublicationWrapsAbstractWithoutParagraph(): void
12+
public function testCreateFromPublicationSendsAbstractWithoutParagraphUnchanged(): void
1113
{
1214
$publication = new class () {
1315
public function getData($key)
@@ -24,7 +26,7 @@ public function getData($key)
2426
$factory = new ThothAbstractFactory();
2527
$thothAbstracts = $factory->createFromPublication($publication, 'work-id', 'en_US');
2628

27-
$this->assertSame('<p>English abstract</p>', $thothAbstracts['EN_US']->getContent());
29+
$this->assertSame('English abstract', $thothAbstracts['EN_US']->getContent());
2830
}
2931

3032
public function testCreateFromPublicationPreservesAbstractAlreadyWrappedInParagraph(): void
@@ -46,4 +48,119 @@ public function getData($key)
4648

4749
$this->assertSame('<p>English abstract</p>', $thothAbstracts['EN_US']->getContent());
4850
}
51+
52+
public function testCreateFromPublicationMovesListsOutsideParagraphs(): void
53+
{
54+
$publication = new class () {
55+
public function getData($key)
56+
{
57+
$values = [
58+
'locale' => 'en_US',
59+
'abstract' => ['en_US' => 'Intro<ul><li>First item</li></ul>Outro'],
60+
];
61+
62+
return $values[$key] ?? null;
63+
}
64+
};
65+
66+
$factory = new ThothAbstractFactory();
67+
$thothAbstracts = $factory->createFromPublication($publication, 'work-id', 'en_US');
68+
69+
$this->assertSame(
70+
'<p>Intro</p><ul><li>First item</li></ul><p>Outro</p>',
71+
$thothAbstracts['EN_US']->getContent()
72+
);
73+
}
74+
75+
public function testCreateFromPublicationMovesNestedListsOutsideParagraphs(): void
76+
{
77+
$publication = new class () {
78+
public function getData($key)
79+
{
80+
$values = [
81+
'locale' => 'en_US',
82+
'abstract' => ['en_US' => '<p>Intro<ul><li>First item</li></ul>Outro</p>'],
83+
];
84+
85+
return $values[$key] ?? null;
86+
}
87+
};
88+
89+
$factory = new ThothAbstractFactory();
90+
$thothAbstracts = $factory->createFromPublication($publication, 'work-id', 'en_US');
91+
92+
$this->assertSame(
93+
'<p>Intro</p><ul><li>First item</li></ul><p>Outro</p>',
94+
$thothAbstracts['EN_US']->getContent()
95+
);
96+
}
97+
98+
public function testCreateFromPublicationConvertsBreaksToParagraphs(): void
99+
{
100+
$publication = new class () {
101+
public function getData($key)
102+
{
103+
$values = [
104+
'locale' => 'en_US',
105+
'abstract' => ['en_US' => '<p>First line<br />Second line</p>'],
106+
];
107+
108+
return $values[$key] ?? null;
109+
}
110+
};
111+
112+
$factory = new ThothAbstractFactory();
113+
$thothAbstracts = $factory->createFromPublication($publication, 'work-id', 'en_US');
114+
115+
$this->assertSame('<p>First line</p><p>Second line</p>', $thothAbstracts['EN_US']->getContent());
116+
}
117+
118+
public function testCreateFromPublicationRemovesBreaksInsideInlineMarkup(): void
119+
{
120+
$publication = new class () {
121+
public function getData($key)
122+
{
123+
$values = [
124+
'locale' => 'en_US',
125+
'abstract' => ['en_US' => '<p><strong>First<br />Second</strong></p>'],
126+
];
127+
128+
return $values[$key] ?? null;
129+
}
130+
};
131+
132+
$factory = new ThothAbstractFactory();
133+
$thothAbstracts = $factory->createFromPublication($publication, 'work-id', 'en_US');
134+
135+
$this->assertSame('<p><strong>First Second</strong></p>', $thothAbstracts['EN_US']->getContent());
136+
}
137+
138+
public function testCreateFromPublicationRemovesOmpPresentationWrapper(): void
139+
{
140+
$publication = new class () {
141+
public function getData($key)
142+
{
143+
$values = [
144+
'locale' => 'en_US',
145+
'abstract' => [
146+
'en_US' => '<h2 class="label">Synopsis</h2><div class="value">'
147+
. '<p>Publisher<br />Address<br />Country</p>'
148+
. '<p><strong>Open</strong> <a href="https://example.com">platform</a></p>'
149+
. '</div>',
150+
],
151+
];
152+
153+
return $values[$key] ?? null;
154+
}
155+
};
156+
157+
$factory = new ThothAbstractFactory();
158+
$thothAbstracts = $factory->createFromPublication($publication, 'work-id', 'en_US');
159+
160+
$this->assertSame(
161+
'<p>Publisher</p><p>Address</p><p>Country</p>'
162+
. '<p><strong>Open</strong> <a href="https://example.com">platform</a></p>',
163+
$thothAbstracts['EN_US']->getContent()
164+
);
165+
}
49166
}

0 commit comments

Comments
 (0)