From ee9aeae3546c0504f5bb9f0785228f7e376d5524 Mon Sep 17 00:00:00 2001 From: Michael Thomas Date: Mon, 8 Jun 2026 13:08:31 -0400 Subject: [PATCH] fix: preserve BOM for UTF-8 encoded files --- DprintPluginRoslyn.Tests/WorkspaceTests.cs | 31 +++++++++++++++++++ .../Formatters/CSharpCodeFormatter.cs | 6 ++-- .../Formatters/CodeFormatters.cs | 4 ++- .../Formatters/ICodeFormatter.cs | 2 +- .../Formatters/VisualBasicCodeFormatter.cs | 6 ++-- .../Utils/SourceTextExtensions.cs | 16 ++++++++++ test/src/lib.rs | 28 +++++++++++++++++ 7 files changed, 85 insertions(+), 8 deletions(-) create mode 100644 DprintPluginRoslyn/Utils/SourceTextExtensions.cs diff --git a/DprintPluginRoslyn.Tests/WorkspaceTests.cs b/DprintPluginRoslyn.Tests/WorkspaceTests.cs index 350dfba..990f324 100644 --- a/DprintPluginRoslyn.Tests/WorkspaceTests.cs +++ b/DprintPluginRoslyn.Tests/WorkspaceTests.cs @@ -1,7 +1,9 @@ using Dprint.Plugins.Roslyn.Configuration; using NUnit.Framework; using System.Collections.Generic; +using System.Linq; using System.Text; +using System.Threading; namespace Dprint.Plugins.Roslyn; @@ -45,4 +47,33 @@ public void Issue3_SetIndentWidth() var diagnostics = workspace.GetDiagnostics(2); Assert.That(diagnostics.Count, Is.EqualTo(0)); } + + [Test] + public void FormatCode_WithBom_PreservesBomWhenContentChanges() + { + var workspace = new Workspace(); + workspace.SetConfig(1, new(), new()); + var bom = Encoding.UTF8.GetPreamble(); + var content = Encoding.UTF8.GetBytes("namespace Test { }\n"); + var input = bom.Concat(content).ToArray(); + + var result = workspace.GetFormatters(1).FormatCode("file.cs", input, null, CancellationToken.None); + + Assert.That(result, Is.Not.Null); + Assert.That(result!.Take(3).ToArray(), Is.EqualTo(bom)); + } + + [Test] + public void FormatCode_WithBom_ReturnsNullWhenUnchanged() + { + var workspace = new Workspace(); + workspace.SetConfig(1, new(), new()); + var bom = Encoding.UTF8.GetPreamble(); + var content = Encoding.UTF8.GetBytes("namespace Test { }\n"); + var input = bom.Concat(content).ToArray(); + + var result = workspace.GetFormatters(1).FormatCode("file.cs", input, null, CancellationToken.None); + + Assert.That(result, Is.Null); + } } diff --git a/DprintPluginRoslyn/Formatters/CSharpCodeFormatter.cs b/DprintPluginRoslyn/Formatters/CSharpCodeFormatter.cs index 5d638eb..ce556d5 100644 --- a/DprintPluginRoslyn/Formatters/CSharpCodeFormatter.cs +++ b/DprintPluginRoslyn/Formatters/CSharpCodeFormatter.cs @@ -29,7 +29,7 @@ public bool ShouldFormat(string filePath) return filePath.EndsWith(".cs", StringComparison.OrdinalIgnoreCase); } - public byte[] FormatText(SourceText text, TextSpan? range, OptionSet options, CancellationToken token) + public SourceText FormatText(SourceText text, TextSpan? range, OptionSet options, CancellationToken token) { SyntaxNode formattedNode; @@ -39,8 +39,8 @@ public byte[] FormatText(SourceText text, TextSpan? range, OptionSet options, Ca formattedNode = Formatter.Format(root, range.Value, _workspace, options, token); else formattedNode = Formatter.Format(root, _workspace, options, token); - var result = formattedNode.GetText(text.Encoding, text.ChecksumAlgorithm); - return (result.Encoding ?? System.Text.Encoding.UTF8).GetBytes(result.ToString()); + + return formattedNode.GetText(text.Encoding, text.ChecksumAlgorithm); } public void ResolveConfiguration(ConfigurationResolutionContext context) diff --git a/DprintPluginRoslyn/Formatters/CodeFormatters.cs b/DprintPluginRoslyn/Formatters/CodeFormatters.cs index 1e08e3e..9537005 100644 --- a/DprintPluginRoslyn/Formatters/CodeFormatters.cs +++ b/DprintPluginRoslyn/Formatters/CodeFormatters.cs @@ -1,4 +1,5 @@ using Dprint.Plugins.Roslyn.Communication; +using Dprint.Plugins.Roslyn.Utils; using Microsoft.CodeAnalysis; using Microsoft.CodeAnalysis.Options; using Microsoft.CodeAnalysis.Text; @@ -31,7 +32,8 @@ public CodeFormatters(ICodeFormatter[] codeFormatters, OptionSet options) encoding: null // Let it auto-detect ); var result = formatter.FormatText(sourceText, range, _options, token); - return result.SequenceEqual(code) ? null : result; + var bytes = result.GetBytes(); + return bytes.SequenceEqual(code) ? null : bytes; } public Dictionary GetResolvedConfig() diff --git a/DprintPluginRoslyn/Formatters/ICodeFormatter.cs b/DprintPluginRoslyn/Formatters/ICodeFormatter.cs index bd66889..dc58763 100644 --- a/DprintPluginRoslyn/Formatters/ICodeFormatter.cs +++ b/DprintPluginRoslyn/Formatters/ICodeFormatter.cs @@ -10,7 +10,7 @@ public interface ICodeFormatter { string RoslynLanguageName { get; } bool ShouldFormat(string filePath); - byte[] FormatText(SourceText text, TextSpan? range, OptionSet options, CancellationToken token); + SourceText FormatText(SourceText text, TextSpan? range, OptionSet options, CancellationToken token); void ResolveConfiguration(ConfigurationResolutionContext context); IEnumerable<(string, object)> GetResolvedConfig(OptionSet options); } diff --git a/DprintPluginRoslyn/Formatters/VisualBasicCodeFormatter.cs b/DprintPluginRoslyn/Formatters/VisualBasicCodeFormatter.cs index 27fcd1f..0e485cc 100644 --- a/DprintPluginRoslyn/Formatters/VisualBasicCodeFormatter.cs +++ b/DprintPluginRoslyn/Formatters/VisualBasicCodeFormatter.cs @@ -27,7 +27,7 @@ public bool ShouldFormat(string filePath) return filePath.EndsWith(".vb", StringComparison.OrdinalIgnoreCase); } - public byte[] FormatText(SourceText text, TextSpan? range, OptionSet options, CancellationToken token) + public SourceText FormatText(SourceText text, TextSpan? range, OptionSet options, CancellationToken token) { SyntaxNode formattedNode; @@ -37,8 +37,8 @@ public byte[] FormatText(SourceText text, TextSpan? range, OptionSet options, Ca formattedNode = Formatter.Format(root, range.Value, _workspace, options, token); else formattedNode = Formatter.Format(root, _workspace, options, token); - var result = formattedNode.GetText(text.Encoding, text.ChecksumAlgorithm); - return (result.Encoding ?? System.Text.Encoding.UTF8).GetBytes(result.ToString()); + + return formattedNode.GetText(text.Encoding, text.ChecksumAlgorithm); } public void ResolveConfiguration(ConfigurationResolutionContext context) diff --git a/DprintPluginRoslyn/Utils/SourceTextExtensions.cs b/DprintPluginRoslyn/Utils/SourceTextExtensions.cs new file mode 100644 index 0000000..d529671 --- /dev/null +++ b/DprintPluginRoslyn/Utils/SourceTextExtensions.cs @@ -0,0 +1,16 @@ +using System.IO; +using Microsoft.CodeAnalysis.Text; + +namespace Dprint.Plugins.Roslyn.Utils; + +public static class SourceTextExtensions +{ + public static byte[] GetBytes(this SourceText sourceText) + { + var ms = new MemoryStream(); + using var sw = new StreamWriter(ms, sourceText.Encoding); + sw.Write(sourceText.ToString()); + sw.Flush(); + return ms.ToArray(); + } +} diff --git a/test/src/lib.rs b/test/src/lib.rs index 1166eb9..4276370 100644 --- a/test/src/lib.rs +++ b/test/src/lib.rs @@ -78,6 +78,34 @@ mod test { }) .await; assert_eq!(result.unwrap(), Some("namespace Test {\n class Test { }\n}\n".to_string(),)); + + // BOM preservation: content needs formatting + let result = communicator + .format_text(ProcessPluginCommunicatorFormatRequest { + file_path: PathBuf::from("file.cs"), + file_text: "\u{FEFF}namespace Test { }\n".to_string(), + range: None, + config_id, + override_config: Default::default(), + on_host_format: Rc::new(|_| unreachable!()), + token: token.clone(), + }) + .await; + assert_eq!(result.unwrap(), Some("\u{FEFF}namespace Test { }\n".to_string())); + + // BOM preservation: already formatted, should be unchanged + let result = communicator + .format_text(ProcessPluginCommunicatorFormatRequest { + file_path: PathBuf::from("file.cs"), + file_text: "\u{FEFF}namespace Test { }\n".to_string(), + range: None, + config_id, + override_config: Default::default(), + on_host_format: Rc::new(|_| unreachable!()), + token: token.clone(), + }) + .await; + assert_eq!(result.unwrap(), None); } let mut handles = Vec::new();