Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions OfficeIMO.Drawing/OfficeImageReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,8 @@ private static bool TryReadPng(byte[] data, out OfficeImageInfo info) {
int offset = 8;
while (offset + 12 <= data.Length) {
int length = ReadInt32BigEndian(data, offset);
if (length < 0 || offset + 12 + length > data.Length) {
long chunkEnd = (long)offset + 12L + length;
if (length < 0 || chunkEnd > data.Length) {
break;
}

Expand All @@ -150,7 +151,7 @@ private static bool TryReadPng(byte[] data, out OfficeImageInfo info) {
break;
}

offset += 12 + length;
offset = (int)chunkEnd;
}

info = new OfficeImageInfo(OfficeImageFormat.Png, width, height, dpiX, dpiY);
Expand Down
2 changes: 1 addition & 1 deletion OfficeIMO.Html.Pdf/HtmlPdfResourcePolicySummary.cs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ public static HtmlPdfResourcePolicySummary From(HtmlPdfSaveOptions options) {
return summary;
}

WordHtml.HtmlToWordOptions wordOptions = options.WordHtmlOptions ?? WordHtml.HtmlToWordOptions.CreateOfficeIMOProfile();
WordHtml.HtmlToWordOptions wordOptions = options.WordHtmlOptions ?? new WordHtml.HtmlToWordOptions();
summary.UsesWordHtmlPolicy = true;
summary.AllowDocumentStylesheetLinks = wordOptions.AllowDocumentStylesheetLinks;
summary.AllowedStylesheetUriSchemes = CopySorted(wordOptions.AllowedStylesheetUriSchemes);
Expand Down
143 changes: 141 additions & 2 deletions OfficeIMO.Html/Rtf/Internal/RtfHtmlReader.Fields.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ private bool TryStartField(IElement token) {
return false;
}

if (!IsHyperlinkFieldAllowed(token, instruction)) {
return false;
}

RtfField field = EnsureInlineParagraph().AddField(instruction ?? string.Empty);
ReadHyperlinkFieldData(token, field);
ReadFormFieldData(token, field);
Expand Down Expand Up @@ -50,6 +54,140 @@ private static bool IsFieldMarker(string? marker) {
string.Equals(marker, "start", StringComparison.OrdinalIgnoreCase);
}

private bool IsHyperlinkFieldAllowed(IElement token, string? instruction) {
if (string.IsNullOrWhiteSpace(instruction)) {
return AreHyperlinkFieldTargetsAllowed(token, null);
}

if (!TryReadHyperlinkInstructionTargets(instruction!, out IReadOnlyList<string> instructionTargets)) {
return false;
}

var field = new RtfField(instruction!);
string? instructionTarget = instructionTargets.Count == 0
? field.HyperlinkField?.Target?.ToString()
: instructionTargets[0];
return AreHyperlinkFieldTargetsAllowed(token, instructionTarget);
}

private bool TryReadHyperlinkInstructionTargets(string instruction, out IReadOnlyList<string> targets) {
targets = Array.Empty<string>();
IReadOnlyList<string> tokens = TokenizeRtfFieldInstruction(instruction);
if (tokens.Count == 0 || !string.Equals(tokens[0], "HYPERLINK", StringComparison.OrdinalIgnoreCase)) {
return true;
}

var values = new List<string>();
for (int index = 1; index < tokens.Count; index++) {
string token = tokens[index];
if (token.Length == 0) {
continue;
}

if (token[0] == '\\') {
if (RtfHyperlinkSwitchConsumesValue(token) && index + 1 < tokens.Count) {
index++;
}

continue;
}

values.Add(token);
}

if (values.Count > 1) {
_options.AddDiagnostic(
"RtfHtmlFieldHyperlinkRejected",
"RTF hyperlink field instruction contains multiple targets.",
"data-officeimo-rtf-field-instruction");
return false;
}

targets = values;
return true;
}

private static bool RtfHyperlinkSwitchConsumesValue(string token) =>
string.Equals(token, "\\l", StringComparison.OrdinalIgnoreCase) ||
string.Equals(token, "\\m", StringComparison.OrdinalIgnoreCase) ||
string.Equals(token, "\\n", StringComparison.OrdinalIgnoreCase) ||
string.Equals(token, "\\o", StringComparison.OrdinalIgnoreCase) ||
string.Equals(token, "\\t", StringComparison.OrdinalIgnoreCase);

private static IReadOnlyList<string> TokenizeRtfFieldInstruction(string instruction) {
var tokens = new List<string>();
int index = 0;
while (index < instruction.Length) {
while (index < instruction.Length && char.IsWhiteSpace(instruction[index])) {
index++;
}

if (index >= instruction.Length) {
break;
}

if (instruction[index] == '"') {
index++;
var quoted = new System.Text.StringBuilder();
while (index < instruction.Length) {
char c = instruction[index++];
if (c == '"') {
break;
}

quoted.Append(c);
}

tokens.Add(quoted.ToString());
continue;
}

int start = index;
while (index < instruction.Length && !char.IsWhiteSpace(instruction[index])) {
index++;
}

tokens.Add(instruction.Substring(start, index - start));
}

return tokens;
}

private bool AreHyperlinkFieldTargetsAllowed(IElement token, string? instructionTarget) {
string? explicitTarget = GetAttribute(token, "data-officeimo-rtf-field-hyperlink");
string? href = GetAttribute(token, "href");
if (!IsHyperlinkFieldTargetAllowed(instructionTarget, "data-officeimo-rtf-field-instruction")) {
return false;
}

if (!IsHyperlinkFieldTargetAllowed(explicitTarget, "data-officeimo-rtf-field-hyperlink")) {
return false;
}

if (!IsFragmentHref(href) && !IsHyperlinkFieldTargetAllowed(href, "href")) {
return false;
}

return true;
}

private bool IsHyperlinkFieldTargetAllowed(string? target, string source) {
if (string.IsNullOrWhiteSpace(target)) {
return true;
}

string resolved = HtmlUrlPolicyEvaluator.ResolveUrl(target, _baseUri, _options.UrlPolicy);
if (!string.IsNullOrWhiteSpace(resolved)) {
return true;
}

_options.AddDiagnostic(
"RtfHtmlFieldHyperlinkRejected",
"RTF hyperlink field target was rejected by the configured URL policy.",
source);
return false;
}

private void ReadFormFieldData(IElement token, RtfField field) {
if (!HasFormFieldData(token)) {
return;
Expand Down Expand Up @@ -80,11 +218,12 @@ private static bool HasFormFieldData(IElement token) {
GetAttribute(token, "data-officeimo-rtf-form-dropdown-items") != null;
}

private static void ReadHyperlinkFieldData(IElement token, RtfField field) {
private void ReadHyperlinkFieldData(IElement token, RtfField field) {
string? explicitTarget = GetAttribute(token, "data-officeimo-rtf-field-hyperlink");
string? href = GetAttribute(token, "href");
string? target = explicitTarget ?? (IsFragmentHref(href) ? null : href);
if (!string.IsNullOrWhiteSpace(target) && Uri.TryCreate(target, UriKind.RelativeOrAbsolute, out Uri? uri)) {
Uri? uri = ReadUriValue(target);
if (uri != null) {
field.Hyperlink = uri;
}

Expand Down
5 changes: 4 additions & 1 deletion OfficeIMO.Html/Rtf/Internal/RtfHtmlReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,10 @@ private RtfParagraph EnsureParagraph() {
}

private Uri? ReadUri(IElement token, string name) {
string? value = GetAttribute(token, name);
return ReadUriValue(GetAttribute(token, name));
}

private Uri? ReadUriValue(string? value) {
if (string.IsNullOrWhiteSpace(value)) {
return null;
}
Expand Down
11 changes: 6 additions & 5 deletions OfficeIMO.Markdown.Pdf/MarkdownPdfConverterExtensions.Images.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,6 @@ namespace OfficeIMO.Markdown.Pdf;
/// </summary>
public static partial class MarkdownPdfConverterExtensions {
private static void RenderImageBlock(PdfCore.PdfDocument pdf, ImageBlock image, MarkdownPdfSaveOptions options) {
if (!options.IncludeLocalImages) {
RenderImagePlaceholder(pdf, image);
return;
}

if (!TryReadImageBytes(image.Path, options, out byte[] bytes, out string sourceName, out string warningCode, out string warningMessage)) {
AddWarning(options, warningCode, image.Path, warningMessage);
RenderImagePlaceholder(pdf, image);
Expand Down Expand Up @@ -61,6 +56,12 @@ private static bool TryReadImageBytes(string path, MarkdownPdfSaveOptions option
return TryReadRemoteImageBytes(remoteUri!, options, out bytes, out sourceName, out warningCode, out warningMessage);
}

if (!options.IncludeLocalImages) {
warningCode = "LocalImageDisabled";
warningMessage = "Local Markdown images are disabled by default. Set MarkdownPdfSaveOptions.IncludeLocalImages to true for trusted documents.";
return false;
}

string? resolvedPath = ResolveImagePath(path, options.BaseDirectory);
if (resolvedPath == null) {
return false;
Expand Down
4 changes: 2 additions & 2 deletions OfficeIMO.Markdown.Pdf/MarkdownPdfSaveOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ public MarkdownPdfVisualTheme? VisualTheme {
/// <summary>Base directory used to resolve relative local image paths.</summary>
public string? BaseDirectory { get; set; }

/// <summary>When true, supported local image files are embedded as PDF images.</summary>
public bool IncludeLocalImages { get; set; } = true;
/// <summary>When true, supported local image files are embedded as PDF images. Defaults to false for untrusted Markdown.</summary>
public bool IncludeLocalImages { get; set; }

/// <summary>When true, supported base64 data URI images are embedded as PDF images.</summary>
public bool IncludeDataUriImages { get; set; } = true;
Expand Down
29 changes: 25 additions & 4 deletions OfficeIMO.Pdf/Core/PdfDocument.Blocks.ImageValidation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
namespace OfficeIMO.Pdf;

public sealed partial class PdfDocument {
private const string SupportedImageMessage =
"PdfDocument.Image currently supports JPEG and grayscale/grayscale-alpha/indexed-color/RGB/RGBA PNG image bytes only, including Adam7-interlaced PNGs and supported 16-bit grayscale/grayscale-alpha/RGB/RGBA PNG payloads.";

/// <summary>
/// Checks whether image bytes can be embedded by the first-party PDF writer.
/// </summary>
Expand Down Expand Up @@ -33,22 +36,40 @@ internal static OfficeImageInfo ValidateImageBytes(byte[] data) {
return info;
}

throw new NotSupportedException(
"PdfDocument.Image currently supports JPEG and grayscale/grayscale-alpha/indexed-color/RGB/RGBA PNG image bytes only, including Adam7-interlaced PNGs and supported 16-bit grayscale/grayscale-alpha/RGB/RGBA PNG payloads. " +
unsupportedReason);
throw new NotSupportedException(SupportedImageMessage + " " + unsupportedReason);
} else {
throw new NotSupportedException(
$"PdfDocument.Image currently supports JPEG and grayscale/grayscale-alpha/indexed-color/RGB/RGBA PNG image bytes only, including Adam7-interlaced PNGs and supported 16-bit grayscale/grayscale-alpha/RGB/RGBA PNG payloads. Detected {info.Format} ({info.MimeType}).");
$"{SupportedImageMessage} Detected {info.Format} ({info.MimeType}).");
}
}

if (LooksLikePng(data)) {
string? unsupportedReason;
if (PdfWriter.TryGetPngImageData(data, out var image, out unsupportedReason)) {
return new OfficeImageInfo(OfficeImageFormat.Png, image.PixelWidth, image.PixelHeight);
}

throw new NotSupportedException(SupportedImageMessage + " " + unsupportedReason);
}

if (!LooksLikeJpeg(data)) {
System.Diagnostics.Trace.TraceWarning("PdfDocument.Image: Provided bytes do not appear to be JPEG encoded.");
}

return new OfficeImageInfo(OfficeImageFormat.Unknown, 0, 0);
}

private static bool LooksLikePng(byte[] data) =>
data.Length >= 8 &&
data[0] == 137 &&
data[1] == 80 &&
data[2] == 78 &&
data[3] == 71 &&
data[4] == 13 &&
data[5] == 10 &&
data[6] == 26 &&
data[7] == 10;

private static bool LooksLikeJpeg(byte[] data) {
if (data.Length < 4)
return false;
Expand Down
21 changes: 16 additions & 5 deletions OfficeIMO.Pdf/Reading/Core/PdfReadDocument.XmpMetadata.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
using System.Xml;
using System.Xml.Linq;
using OfficeIMO.Pdf.Filters;

Expand All @@ -6,6 +7,8 @@ namespace OfficeIMO.Pdf;
public sealed partial class PdfReadDocument {
private const string DublinCoreNamespaceUri = "http://purl.org/dc/elements/1.1/";
private const string PdfAIdentificationNamespaceUri = "http://www.aiim.org/pdfa/ns/id/";
/// <summary>Maximum decoded XMP metadata size parsed as XML.</summary>
public const int MaxXmpMetadataBytes = 4_000_000;

/// <summary>Catalog XMP metadata stream discovered from /Metadata.</summary>
public PdfXmpMetadataInfo? XmpMetadata { get; }
Expand All @@ -22,15 +25,16 @@ public sealed partial class PdfReadDocument {
return null;
}

byte[] decoded = StreamDecoder.Decode(stream.Dictionary, stream.Data, _objects);
string? rawXml = DecodeMetadataText(decoded);
XDocument? document = TryParseXml(rawXml);
bool decodedWithinLimit = StreamDecoder.TryDecode(stream.Dictionary, stream.Data, MaxXmpMetadataBytes, out byte[] decoded, _objects);
string? rawXml = decodedWithinLimit ? DecodeMetadataText(decoded) : null;
int decodedSizeBytes = decodedWithinLimit ? decoded.Length : MaxXmpMetadataBytes + 1;
XDocument? document = rawXml is null ? null : TryParseXml(rawXml);
return new PdfXmpMetadataInfo(
objectNumber,
TryReadName(stream.Dictionary, "Subtype"),
TryReadStreamFilter(stream),
stream.Data.Length,
decoded.Length,
decodedSizeBytes,
StreamDecoder.GetUnsupportedFilters(stream.Dictionary, _objects).AsReadOnly(),
rawXml,
document is not null,
Expand Down Expand Up @@ -82,7 +86,14 @@ public sealed partial class PdfReadDocument {
}

try {
return XDocument.Parse(rawXml!, LoadOptions.None);
var settings = new XmlReaderSettings {
DtdProcessing = DtdProcessing.Prohibit,
MaxCharactersInDocument = MaxXmpMetadataBytes,
XmlResolver = null
};
using var stringReader = new StringReader(rawXml!);
using XmlReader reader = XmlReader.Create(stringReader, settings);
return XDocument.Load(reader, LoadOptions.None);
} catch (Exception ex) when (ex is System.Xml.XmlException || ex is InvalidOperationException) {
return null;
}
Expand Down
17 changes: 15 additions & 2 deletions OfficeIMO.Pdf/Reading/Core/ResourceResolver.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
namespace OfficeIMO.Pdf;

internal static class ResourceResolver {
private const int MaxCidWidthEntries = 65536;
private const int MaxCidWidthRangeEntries = 4096;

public static Dictionary<string, PdfFontResource> GetFontsForPage(PdfDictionary page, Dictionary<int, PdfIndirectObject> objects) {
var fonts = new Dictionary<string, PdfFontResource>(System.StringComparer.Ordinal);
var dict = GetInheritedDictionary(page, "Resources", objects);
Expand Down Expand Up @@ -103,14 +106,24 @@ private static bool TryBuildCidWidthMap(PdfDictionary type0Font, Dictionary<int,
if (i >= wArr.Items.Count) break;
var next = wArr.Items[i];
if (next is PdfArray list) {
for (int j = 0; j < list.Items.Count; j++) {
int count = System.Math.Min(list.Items.Count, MaxCidWidthEntries - dict.Count);
for (int j = 0; j < count; j++) {
if (list.Items[j] is PdfNumber wn) dict[startCid + j] = wn.Value; else dict[startCid + j] = dw;
}
} else if (next is PdfNumber endCidNum) {
int endCid = (int)endCidNum.Value; i++;
if (i >= wArr.Items.Count) break;
var wNum = wArr.Items[i] as PdfNumber; double wv = wNum?.Value ?? dw;
for (int cid = startCid; cid <= endCid; cid++) dict[cid] = wv;
int rangeLength = endCid >= startCid ? endCid - startCid + 1 : 0;
if (rangeLength <= 0) continue;

int count = System.Math.Min(rangeLength, MaxCidWidthRangeEntries);
count = System.Math.Min(count, MaxCidWidthEntries - dict.Count);
for (int offset = 0; offset < count; offset++) dict[startCid + offset] = wv;
}

if (dict.Count >= MaxCidWidthEntries) {
break;
}
}
}
Expand Down
Loading
Loading