From 3d6b00d24e4e65ac60d81b4e30de08bbd02a4ca2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juh=C3=A1sz=20P=C3=A9ter?= Date: Sat, 25 Jun 2022 21:11:14 +0200 Subject: [PATCH] caesar bruteforce benchmarks --- .github/workflows/cli.yml | 2 + .github/workflows/nuget.yml | 3 + README.md | 18 +- docs/performance-improvements.md | 38 +++- .../Program.cs | 2 +- .../v1v2/CaesarBruteforceBenchmarks.cs | 121 +++++++++++++ .../v1v2/FrequencyAnalysisBenchmarks.cs | 72 +------- ...lativeLetterFrequenciesScorerBenchmarks.cs | 168 ++++++++++++++++++ .../CaesarBruteforceTests.cs | 15 ++ 9 files changed, 353 insertions(+), 86 deletions(-) create mode 100644 src/Science.Cryptography.Ciphers.Benchmarks/v1v2/CaesarBruteforceBenchmarks.cs create mode 100644 src/Science.Cryptography.Ciphers.Benchmarks/v1v2/RelativeLetterFrequenciesScorerBenchmarks.cs create mode 100644 tests/Science.Cryptography.Ciphers.Analysis.Tests/CaesarBruteforceTests.cs diff --git a/.github/workflows/cli.yml b/.github/workflows/cli.yml index 6b46381..af9eb6b 100644 --- a/.github/workflows/cli.yml +++ b/.github/workflows/cli.yml @@ -4,6 +4,8 @@ on: push: branches: - main + paths-ignore: + - docs/** pull_request: branches: - main diff --git a/.github/workflows/nuget.yml b/.github/workflows/nuget.yml index a67d09b..083e20b 100644 --- a/.github/workflows/nuget.yml +++ b/.github/workflows/nuget.yml @@ -4,6 +4,8 @@ on: push: branches: - main + paths-ignore: + - docs/** pull_request: branches: - main @@ -70,3 +72,4 @@ jobs: - name: NuGet Push run: nuget push src/Science.Cryptography.*/bin/Release/Science.Cryptography.*.nupkg -Source https://api.nuget.org/v3/index.json + if: github.ref == 'refs/heads/main' && github.event_name != 'pull_request' \ No newline at end of file diff --git a/README.md b/README.md index a1b56c1..3b559e4 100644 --- a/README.md +++ b/README.md @@ -74,14 +74,10 @@ The full list of assets: - [List of languages](docs/assets.md#list-of-languages) - [List of encodings](docs/assets.md#list-of-encodings) -## Accepting PRs -* Enigma -* Hill -* Permutation -* Rail fence -* Myszkowski Transposition -* Nihilist -* Solitaire -* Trifid -* Any other missing cipher -* Unit tests \ No newline at end of file +## Contribution + - Add any [missing cipher](https://github.com/Peter-Juhasz/Science.Cryptography.Ciphers/issues?q=is%3Aissue+is%3Aopen+label%3Acipher) + - Prefer performance (no heap allocations, SIMD operations, ...) + - *Feel free to add any not listed* + - Performance improvements + - Post benchmark and its results as evidence to show change in efficiency + - Unit tests \ No newline at end of file diff --git a/docs/performance-improvements.md b/docs/performance-improvements.md index e2f26b9..e5bc1c0 100644 --- a/docs/performance-improvements.md +++ b/docs/performance-improvements.md @@ -25,28 +25,39 @@ Payload and key length: 64/64, 64/32, 43/32, 64/32 characters (*bestcase* represents the scenario where the payload is an exact multiple of key size) -Measured speed up: **27x** regular case, **81x** best case +Measured speed up: **27x** regular case, **81x** best case, memory allocation reduction from **400 bytes to zero**. ## Atbash cipher In version 2, a fast path was added for ASCII encoding: | | Method | Mean | Error | StdDev | Allocated | |---|---------------- |------------:|----------:|----------:|----------:| -|**v2**| Atbash | 3,974.15 ns | 10.977 ns | 10.268 ns | - | -|**v2**| Atbash_Ascii | 47.36 ns | 0.256 ns | 0.239 ns | - | +|**v2**| General | 3,974.15 ns | 10.977 ns | 10.268 ns | - | +|**v2**| Ascii | 47.36 ns | 0.256 ns | 0.239 ns | - | Payload length: 43 characters Measured speed up: **84x** +## Frequency analysis +The old v1 implementation was based on a very simple, but expensive functional LINQ implementation. In the new version, memory allocation was greatly reduced: + +| | Method | Mean | Error | StdDev | Gen 0 | Gen 1 | Allocated | +|------|--------- |-----------:|--------:|--------:|-------:|-------:|----------:| +|v1 | General | 2,331.2 ns | 9.72 ns | 9.10 ns | 0.7935 | 0.0076 | 4,992 B | +|**v2**| General | 512.9 ns | 2.11 ns | 1.76 ns | 0.2499 | - | 1,568 B | +|**v2**| Ascii_Optimized | 235.0 ns | 0.72 ns | 0.63 ns | - | - | - | + +Measured speed up: **10x**, memory allocation reduction from **5 KB to zero** (ascii). + ## Relative letter frequencies scorer The old v1 implementation was based on a very simple functional LINQ implementation. In the new version, memory allocation was greatly reduced: | | Method | Mean | Error | StdDev | Gen 0 | Gen 1 | Allocated | |------|--------- |---------:|----------:|----------:|-------:|-------:|----------:| |v1 | General_Linq | 3.968 us | 0.0223 us | 0.0209 us | 1.0147 | 0.0076 | 6,368 B | -|**v2**| General_Optimized | 1.464 us | 0.0042 us | 0.0035 us | 0.0210 | - | 136 B | -|**v2**| General_Ascii | 1.495 us | 0.0056 us | 0.0052 us | 0.0210 | - | 136 B | +|**v2**| General | 1.464 us | 0.0042 us | 0.0035 us | 0.0210 | - | 136 B | +|**v2**| Ascii_Optimized | 1.495 us | 0.0056 us | 0.0052 us | 0.0210 | - | 136 B | Measured speed up: **2.7x**, memory allocation reduction: **47x** @@ -57,13 +68,24 @@ In the new version, buffers can be shared, reading uses non-allocating enumerati | | Method | Mean | Error | StdDev | Gen 0 | Gen 1 | Allocated | |------|----------- |-----------:|---------:|---------:|-------:|-------:|----------:| -|v1 | Old | 5,530.7 ns | 27.76 ns | 25.97 ns | 1.7319 | 0.0458 | 10,888 B | -|**v2**| New | 1,124.4 ns | 2.14 ns | 2.01 ns | 0.3719 | 0.0019 | 2,336 B | -|**v2**| New_Ascii_2Grams | 306.5 ns | 1.45 ns | 1.36 ns | - | - | - | +|v1 | General | 5,530.7 ns | 27.76 ns | 25.97 ns | 1.7319 | 0.0458 | 10,888 B | +|**v2**| General | 1,124.4 ns | 2.14 ns | 2.01 ns | 0.3719 | 0.0019 | 2,336 B | +|**v2**| Ascii_2Grams | 306.5 ns | 1.45 ns | 1.36 ns | - | - | - | Measured speed up: **18x**, memory allocation reduction from **18 KB to zero** (ascii). +## Caesar brute-force + +| | Method | Mean | Error | StdDev | Gen 0 | Allocated | +|------|------------- |---------:|---------:|---------:|-------:|----------:| +|v1 | General | 41.41 us | 0.244 us | 0.217 us | 1.2817 | 8 KB | +|**v2**| General | 81.51 us | 0.180 us | 0.150 us | 0.9766 | 6 KB | +|**v2**| General_Optimized | 80.76 us | 0.262 us | 0.245 us | 0.8545 | 5 KB | + +Measured speed up: *-2x*, memory allocation reduced to **62.5%** + ## Appendix +Performance was measured on the following setup: ``` BenchmarkDotNet=v0.13.1, OS=Windows 10.0.22000 diff --git a/src/Science.Cryptography.Ciphers.Benchmarks/Program.cs b/src/Science.Cryptography.Ciphers.Benchmarks/Program.cs index e7361b4..69b6cd5 100644 --- a/src/Science.Cryptography.Ciphers.Benchmarks/Program.cs +++ b/src/Science.Cryptography.Ciphers.Benchmarks/Program.cs @@ -1,3 +1,3 @@ using BenchmarkDotNet.Running; -BenchmarkRunner.Run(); +BenchmarkRunner.Run(); diff --git a/src/Science.Cryptography.Ciphers.Benchmarks/v1v2/CaesarBruteforceBenchmarks.cs b/src/Science.Cryptography.Ciphers.Benchmarks/v1v2/CaesarBruteforceBenchmarks.cs new file mode 100644 index 0000000..f677e28 --- /dev/null +++ b/src/Science.Cryptography.Ciphers.Benchmarks/v1v2/CaesarBruteforceBenchmarks.cs @@ -0,0 +1,121 @@ +using BenchmarkDotNet.Attributes; + +using System.Collections.Generic; +using System.Linq; +using System; +using System.Collections; +using Science.Cryptography.Ciphers.Analysis; +using Science.Cryptography.Ciphers; +using System.Composition; +using System.Runtime.InteropServices; +using System.Runtime.CompilerServices; + +[MemoryDiagnoser] +public class V1V2CaesarBruteforceBenchmarks +{ + private static readonly string[] _buffer = new string[26]; + private static readonly Science.Cryptography.Ciphers.ShiftCipher _cipher = new(WellKnownAlphabets.English); + private static readonly string Text = "the quick brown fox jumps over the lazy dog"; + + + [Benchmark] + public void V1() + { + Analyze(Text); + } + + [Benchmark] + public void V2() + { + CaesarBruteforce.Analyze(Text, WellKnownAlphabets.English); + } + + [Benchmark] + public void V2_Optimized() + { + CaesarBruteforce.Analyze(Text, _cipher, _buffer); + } + + #region V1 + private static IReadOnlyDictionary Analyze(string text, string charset = "ABCDEFGHIJKLMNOPQRSTUVWXYZ") + { + if (text == null) + throw new ArgumentNullException(nameof(text)); + + if (charset == null) + throw new ArgumentNullException(nameof(charset)); + + + var cipher = new ShiftCipher(charset); + + return Enumerable.Range(0, charset.Length) + .ToDictionary(k => k, k => cipher.Encrypt(text, k)) + ; + } + + private class ShiftCipher + { + public ShiftCipher(string charset) + { + if (charset == null) + throw new ArgumentNullException(nameof(charset)); + + this.Charset = charset; + } + + public string Charset { get; set; } + + protected string Crypt(string text, int key) + { + char[] result = new char[text.Length]; + + for (int i = 0; i < text.Length; i++) + { + int idx = IndexOfIgnoreCase(Charset, text[i]); + + result[i] = idx != -1 + ? At(this.Charset, idx + key).ToSameCaseAs(text[i]) + : text[i] + ; + } + + return new String(result); + } + + public string Encrypt(string plaintext, int key) + { + return this.Crypt(plaintext, key); + } + + public string Decrypt(string ciphertext, int key) + { + return this.Crypt(ciphertext, -key); + } + + + public static int IndexOfIgnoreCase(string source, char subject) + { + Char toCompare = subject.ToUpper(); + + for (int i = 0; i < source.Length; i++) + { + if (source[i].ToUpper() == toCompare) + return i; + } + + return -1; + } + + public static char At(string source, int index) + { + return source[Mod(index, source.Length)]; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int Mod(int a, int b) + { + return a >= 0 ? a % b : (b + a) % b; + } + } + #endregion +} \ No newline at end of file diff --git a/src/Science.Cryptography.Ciphers.Benchmarks/v1v2/FrequencyAnalysisBenchmarks.cs b/src/Science.Cryptography.Ciphers.Benchmarks/v1v2/FrequencyAnalysisBenchmarks.cs index b181c85..b7e0880 100644 --- a/src/Science.Cryptography.Ciphers.Benchmarks/v1v2/FrequencyAnalysisBenchmarks.cs +++ b/src/Science.Cryptography.Ciphers.Benchmarks/v1v2/FrequencyAnalysisBenchmarks.cs @@ -9,8 +9,7 @@ [MemoryDiagnoser] public class V1V2FrequencyAnalysisBenchmarks { - private static readonly ISpeculativePlaintextScorer General = new RelativeLetterFrequenciesSpeculativePlaintextScorer(Languages.English.RelativeFrequenciesOfLetters).GetForPartition(); - private static readonly ISpeculativePlaintextScorer Ascii = new AsciiRelativeLetterFrequenciesSpeculativePlaintextScorer(Languages.English.RelativeFrequenciesOfLetters).GetForPartition(); + private static readonly Dictionary _buffer = new Dictionary(); private static readonly string Text = "the quick brown fox jumps over the lazy dog"; @@ -18,26 +17,22 @@ public class V1V2FrequencyAnalysisBenchmarks [Benchmark] public void V1() { - Classify(Text); + Analyze(Text); } [Benchmark] public void V2() { - General.Score(Text); + FrequencyAnalysis.Analyze(Text); } [Benchmark] public void V2_Ascii() { - Ascii.Score(Text); - } - - public double Classify(string speculativePlaintext) - { - return Compare(Languages.English.RelativeFrequenciesOfLetters, Analyze(speculativePlaintext).AsRelativeFrequencies()); + FrequencyAnalysis.AnalyzeAsciiLetters(Text, _buffer); } + #region V1 private static AbsoluteCharacterFrequencies Analyze(string text) { if (text == null) @@ -50,14 +45,6 @@ private static AbsoluteCharacterFrequencies Analyze(string text) ); } - private static double Compare(IReadOnlyDictionary reference, IReadOnlyDictionary subject) - { - return 1 - ( - from r in reference - select Math.Abs(r.Value - (subject.ContainsKey(r.Key) ? subject[r.Key] : 0)) - ).Sum(); - } - private class AbsoluteCharacterFrequencies : IReadOnlyDictionary { public AbsoluteCharacterFrequencies(IReadOnlyDictionary frequencies) @@ -117,52 +104,5 @@ public IReadOnlyDictionary AsRelativeFrequencies() IEnumerator IEnumerable.GetEnumerator() => _frequencies.GetEnumerator(); #endregion } - private class RelativeCharacterFrequencies : IReadOnlyDictionary - { - public RelativeCharacterFrequencies(IReadOnlyDictionary frequencies) - { - if (frequencies == null) - throw new ArgumentNullException(nameof(frequencies)); - - _frequencies = frequencies; - } - - private readonly IReadOnlyDictionary _frequencies; - - /// - /// Gets the occurrences of a given . - /// - /// - /// - public double this[char character] - { - get - { - double frequency = 0; - _frequencies.TryGetValue(character, out frequency); - return frequency; - } - } - - - public IReadOnlyDictionary ToDictionary() => _frequencies; - - - #region IReadOnlyDictionary - IEnumerable IReadOnlyDictionary.Keys => _frequencies.Keys; - IEnumerable IReadOnlyDictionary.Values => _frequencies.Values; - - int IReadOnlyCollection>.Count => _frequencies.Count; - - double IReadOnlyDictionary.this[char key] => this[key]; - - bool IReadOnlyDictionary.ContainsKey(char key) => _frequencies.ContainsKey(key); - - bool IReadOnlyDictionary.TryGetValue(char key, out double value) => _frequencies.TryGetValue(key, out value); - - IEnumerator> IEnumerable>.GetEnumerator() => _frequencies.GetEnumerator(); - - IEnumerator IEnumerable.GetEnumerator() => _frequencies.GetEnumerator(); - #endregion - } + #endregion } \ No newline at end of file diff --git a/src/Science.Cryptography.Ciphers.Benchmarks/v1v2/RelativeLetterFrequenciesScorerBenchmarks.cs b/src/Science.Cryptography.Ciphers.Benchmarks/v1v2/RelativeLetterFrequenciesScorerBenchmarks.cs new file mode 100644 index 0000000..fc783d5 --- /dev/null +++ b/src/Science.Cryptography.Ciphers.Benchmarks/v1v2/RelativeLetterFrequenciesScorerBenchmarks.cs @@ -0,0 +1,168 @@ +using BenchmarkDotNet.Attributes; + +using System.Collections.Generic; +using System.Linq; +using System; +using System.Collections; +using Science.Cryptography.Ciphers.Analysis; + +[MemoryDiagnoser] +public class V1V2RelativeLetterFrequenciesScorerBenchmarks +{ + private static readonly ISpeculativePlaintextScorer General = new RelativeLetterFrequenciesSpeculativePlaintextScorer(Languages.English.RelativeFrequenciesOfLetters).GetForPartition(); + private static readonly ISpeculativePlaintextScorer Ascii = new AsciiRelativeLetterFrequenciesSpeculativePlaintextScorer(Languages.English.RelativeFrequenciesOfLetters).GetForPartition(); + + private static readonly string Text = "the quick brown fox jumps over the lazy dog"; + + + [Benchmark] + public void V1() + { + Classify(Text); + } + + [Benchmark] + public void V2() + { + General.Score(Text); + } + + [Benchmark] + public void V2_Ascii() + { + Ascii.Score(Text); + } + + public double Classify(string speculativePlaintext) + { + return Compare(Languages.English.RelativeFrequenciesOfLetters, Analyze(speculativePlaintext).AsRelativeFrequencies()); + } + + private static AbsoluteCharacterFrequencies Analyze(string text) + { + if (text == null) + throw new ArgumentNullException(nameof(text)); + + return new AbsoluteCharacterFrequencies( + text + .GroupBy(c => c) + .ToDictionary(g => g.Key, g => g.Count()) + ); + } + + private static double Compare(IReadOnlyDictionary reference, IReadOnlyDictionary subject) + { + return 1 - ( + from r in reference + select Math.Abs(r.Value - (subject.ContainsKey(r.Key) ? subject[r.Key] : 0)) + ).Sum(); + } + + private class AbsoluteCharacterFrequencies : IReadOnlyDictionary + { + public AbsoluteCharacterFrequencies(IReadOnlyDictionary frequencies) + { + if (frequencies == null) + throw new ArgumentNullException(nameof(frequencies)); + + _frequencies = frequencies; + } + + private readonly IReadOnlyDictionary _frequencies; + + /// + /// Gets the occurrences of a given . + /// + /// + /// + public int this[char character] + { + get + { + int frequency = 0; + _frequencies.TryGetValue(character, out frequency); + return frequency; + } + } + + + public IReadOnlyDictionary AsRelativeFrequencies() + { + var sum = (double)_frequencies.Sum(f => f.Value); + return new RelativeCharacterFrequencies( + _frequencies.ToDictionary( + kv => kv.Key, + kv => kv.Value / sum + ) + ); + } + + + public IReadOnlyDictionary ToDictionary() => _frequencies; + + #region IReadOnlyDictionary + IEnumerable IReadOnlyDictionary.Keys => _frequencies.Keys; + IEnumerable IReadOnlyDictionary.Values => _frequencies.Values; + + int IReadOnlyCollection>.Count => _frequencies.Count; + + int IReadOnlyDictionary.this[char key] => this[key]; + + bool IReadOnlyDictionary.ContainsKey(char key) => _frequencies.ContainsKey(key); + + bool IReadOnlyDictionary.TryGetValue(char key, out int value) => _frequencies.TryGetValue(key, out value); + + IEnumerator> IEnumerable>.GetEnumerator() => _frequencies.GetEnumerator(); + + IEnumerator IEnumerable.GetEnumerator() => _frequencies.GetEnumerator(); + #endregion + } + private class RelativeCharacterFrequencies : IReadOnlyDictionary + { + public RelativeCharacterFrequencies(IReadOnlyDictionary frequencies) + { + if (frequencies == null) + throw new ArgumentNullException(nameof(frequencies)); + + _frequencies = frequencies; + } + + private readonly IReadOnlyDictionary _frequencies; + + /// + /// Gets the occurrences of a given . + /// + /// + /// + public double this[char character] + { + get + { + double frequency = 0; + _frequencies.TryGetValue(character, out frequency); + return frequency; + } + } + + + public IReadOnlyDictionary ToDictionary() => _frequencies; + + + #region IReadOnlyDictionary + IEnumerable IReadOnlyDictionary.Keys => _frequencies.Keys; + IEnumerable IReadOnlyDictionary.Values => _frequencies.Values; + + int IReadOnlyCollection>.Count => _frequencies.Count; + + double IReadOnlyDictionary.this[char key] => this[key]; + + bool IReadOnlyDictionary.ContainsKey(char key) => _frequencies.ContainsKey(key); + + bool IReadOnlyDictionary.TryGetValue(char key, out double value) => _frequencies.TryGetValue(key, out value); + + IEnumerator> IEnumerable>.GetEnumerator() => _frequencies.GetEnumerator(); + + IEnumerator IEnumerable.GetEnumerator() => _frequencies.GetEnumerator(); + #endregion + } +} \ No newline at end of file diff --git a/tests/Science.Cryptography.Ciphers.Analysis.Tests/CaesarBruteforceTests.cs b/tests/Science.Cryptography.Ciphers.Analysis.Tests/CaesarBruteforceTests.cs new file mode 100644 index 0000000..6d6d6b6 --- /dev/null +++ b/tests/Science.Cryptography.Ciphers.Analysis.Tests/CaesarBruteforceTests.cs @@ -0,0 +1,15 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Science.Cryptography.Ciphers.Analysis.Tests; + +[TestClass] +public class CaesarBruteforceTests +{ + [TestMethod] + public void NumberOfElements() + { + const string text = "the quick brown fox jumps over the lazy dog"; + var result = CaesarBruteforce.Analyze(text, WellKnownAlphabets.English); + Assert.AreEqual(26, result.Count); + } +}