Skip to content

Commit 1d1c7c3

Browse files
authored
Merge pull request #136 from aboccag/single-character-ocr
Add per-character confidence results in OCR output
2 parents 05ea890 + 32a4954 commit 1d1c7c3

6 files changed

Lines changed: 121 additions & 7 deletions

File tree

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
namespace Sdcb.PaddleOCR;
2+
3+
/// <summary>
4+
/// A record struct representing a single character recognition result from an OCR operation.
5+
/// </summary>
6+
public record struct OcrRecognizerResultSingleChar
7+
{
8+
/// <summary>
9+
/// A single character recognized from the image.
10+
/// </summary>
11+
public string Character { get; init; }
12+
13+
/// <summary>
14+
/// The confidence score of the text recognition.
15+
/// </summary>
16+
public float Score { get; init; }
17+
18+
/// <summary>
19+
/// The index position of this character within the recognized text.
20+
/// </summary>
21+
public int Index { get; init; }
22+
23+
/// <summary>
24+
/// Initializes a new instance of the <see cref="OcrRecognizerResultSingleChar"/> record.
25+
/// </summary>
26+
/// <param name="character">The recognized character.</param>
27+
/// <param name="score">The confidence score of the character recognition.</param>
28+
/// <param name="index">The index position of this character within the recognized text.</param>
29+
public OcrRecognizerResultSingleChar(string character, float score, int index)
30+
{
31+
Character = character;
32+
Score = score;
33+
Index = index;
34+
}
35+
}

src/Sdcb.PaddleOCR/PaddleOcrAll.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ public PaddleOcrResult Run(Mat src, int recognizeBatchSize = 0)
146146
try
147147
{
148148
return new PaddleOcrResult(Recognizer.Run(mats, recognizeBatchSize)
149-
.Select((result, i) => new PaddleOcrResultRegion(rects[i], result.Text, result.Score))
149+
.Select((result, i) => new PaddleOcrResultRegion(rects[i], result.Text, result.Score, result.SingleChars))
150150
.ToArray());
151151
}
152152
finally

src/Sdcb.PaddleOCR/PaddleOcrRecognizer.cs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
using System.Linq;
66
using System.Runtime.InteropServices;
77
using System.Text;
8+
using System.Collections.Generic;
89

910
namespace Sdcb.PaddleOCR;
1011

@@ -174,6 +175,8 @@ private PaddleOcrRecognizerResult[] RunMulti(Mat[] srcs)
174175
StringBuilder sb = new();
175176
int lastIndex = 0;
176177
float score = 0;
178+
List<OcrRecognizerResultSingleChar> singleChars = new();
179+
int charIndex = 0;
177180
for (int n = 0; n < charCount; ++n)
178181
{
179182
using Mat mat = Mat.FromPixelData(1, labelCount, MatType.CV_32FC1, dataPtr + (n + i * charCount) * labelCount * sizeof(float));
@@ -183,12 +186,20 @@ private PaddleOcrRecognizerResult[] RunMulti(Mat[] srcs)
183186
if (maxIdx[1] > 0 && (!(n > 0 && maxIdx[1] == lastIndex)))
184187
{
185188
score += (float)maxVal;
186-
sb.Append(Model.GetLabelByIndex(maxIdx[1]));
189+
string character = Model.GetLabelByIndex(maxIdx[1]);
190+
sb.Append(character);
191+
192+
singleChars.Add(new OcrRecognizerResultSingleChar(
193+
character,
194+
(float)maxVal,
195+
charIndex
196+
));
197+
charIndex++;
187198
}
188199
lastIndex = maxIdx[1];
189200
}
190201

191-
return new PaddleOcrRecognizerResult(sb.ToString(), score / sb.Length);
202+
return new PaddleOcrRecognizerResult(sb.ToString(), score / sb.Length, singleChars);
192203
})
193204
.ToArray();
194205
}

src/Sdcb.PaddleOCR/PaddleOcrRecognizerResult.cs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
namespace Sdcb.PaddleOCR;
1+
using System.Collections.Generic;
2+
3+
namespace Sdcb.PaddleOCR;
24

35
/// <summary>
46
/// A struct representing the result of an image recognition operation using Paddle OCR.
@@ -14,15 +16,22 @@ public readonly record struct PaddleOcrRecognizerResult
1416
/// The confidence score of the text recognition.
1517
/// </summary>
1618
public float Score { get; init; }
19+
20+
/// <summary>
21+
/// A read-only list of single character recognition results.
22+
/// </summary>
23+
public IReadOnlyList<OcrRecognizerResultSingleChar> SingleChars { get; init; }
1724

1825
/// <summary>
1926
/// Initializes a new instance of the <see cref="PaddleOcrRecognizerResult"/> struct.
2027
/// </summary>
2128
/// <param name="text">The recognized text from the image.</param>
2229
/// <param name="score">The confidence score of the text recognition.</param>
23-
public PaddleOcrRecognizerResult(string text, float score)
30+
/// <param name="singleChars">A list of single character recognition results.</param>
31+
public PaddleOcrRecognizerResult(string text, float score, IReadOnlyList<OcrRecognizerResultSingleChar> singleChars)
2432
{
2533
Text = text;
2634
Score = score;
35+
SingleChars = singleChars ?? new List<OcrRecognizerResultSingleChar>();
2736
}
28-
}
37+
}
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
using OpenCvSharp;
2+
using System.Collections.Generic;
23

34
namespace Sdcb.PaddleOCR;
45

56
/// <summary>
67
/// Represents a region detected in an OCR result using Paddle OCR.
78
/// </summary>
8-
public record struct PaddleOcrResultRegion(RotatedRect Rect, string Text, float Score);
9+
public record struct PaddleOcrResultRegion(RotatedRect Rect, string Text, float Score, List<OcrRecognizerResultSingleChar> OcrRecognizerResultSingleChars);
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
using OpenCvSharp;
2+
using Sdcb.PaddleOCR.Models;
3+
using Sdcb.PaddleOCR.Models.Local;
4+
using System.Runtime.InteropServices;
5+
using Xunit;
6+
using Xunit.Abstractions;
7+
8+
namespace Sdcb.PaddleOCR.Tests;
9+
10+
public class SingleCharTest(ITestOutputHelper testOutputHelper)
11+
{
12+
[Fact]
13+
public void SingleCharRecognitionTest()
14+
{
15+
testOutputHelper.WriteLine(
16+
$"Running SingleChar test on {RuntimeInformation.OSDescription} ({RuntimeInformation.OSArchitecture})");
17+
18+
FullOcrModel model = LocalFullModels.ChineseV5;
19+
byte[] sampleImageData = File.ReadAllBytes(@"./samples/vsext.png");
20+
21+
using PaddleOcrAll all = new(model)
22+
{
23+
AllowRotateDetection = true,
24+
Enable180Classification = false,
25+
};
26+
27+
using Mat src = Cv2.ImDecode(sampleImageData, ImreadModes.Color);
28+
PaddleOcrResult result = all.Run(src);
29+
testOutputHelper.WriteLine("Detected all texts: \n" + result.Text);
30+
31+
Assert.NotEmpty(result.Regions);
32+
33+
foreach (PaddleOcrResultRegion region in result.Regions)
34+
{
35+
testOutputHelper.WriteLine($"Text: {region.Text}, Score: {region.Score}");
36+
testOutputHelper.WriteLine($"SingleChars count: {region.SingleChars.Count}");
37+
38+
// Verify single characters exist
39+
Assert.NotEmpty(region.SingleChars);
40+
41+
// Verify indices are properly set and sequential
42+
for (int i = 0; i < region.SingleChars.Count; i++)
43+
{
44+
OcrRecognizerResultSingleChar singleChar = region.SingleChars[i];
45+
Assert.Equal(i, singleChar.Index);
46+
Assert.NotNull(singleChar.Character);
47+
Assert.True(singleChar.Score > 0, $"Character '{singleChar.Character}' should have a positive score");
48+
49+
testOutputHelper.WriteLine(
50+
$" Char[{singleChar.Index}]: '{singleChar.Character}', Score: {singleChar.Score:F3}");
51+
}
52+
53+
// Verify the concatenated single characters match the full text
54+
string reconstructedText = string.Join("", region.SingleChars.Select(c => c.Character));
55+
Assert.Equal(region.Text, reconstructedText);
56+
}
57+
}
58+
}

0 commit comments

Comments
 (0)