Skip to content

Commit 0e7b70c

Browse files
author
Jack Dermody
committed
small refactors
1 parent 5ea42f0 commit 0e7b70c

File tree

19 files changed

+204
-58
lines changed

19 files changed

+204
-58
lines changed

BrightData.UnitTests/DirectedGraphTests.cs

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,20 @@ public class DirectedGraphTests
1414
[Fact]
1515
public void SimpleTest()
1616
{
17-
var builder = new DirectedGraphBuilder<GraphNodeIndex>();
18-
builder.Add(new(100));
19-
builder.Add(new(200));
20-
builder.AddEdge(200, 100);
17+
var builder = new SparseGraphBuilder<GraphNodeIndex>();
18+
var firstNodeIndex = builder.Add(new(100));
19+
var secondNodeIndex = builder.Add(new(200));
20+
builder.AddEdge(secondNodeIndex, firstNodeIndex);
2121

2222
var graph = builder.Build();
2323
graph.Size.Should().Be(2);
24-
graph.TryGetValue(100, out _).Should().BeTrue();
25-
graph.TryGetValue(200, out _).Should().BeTrue();
26-
graph.TryGetValue(300, out _).Should().BeFalse();
27-
graph.EnumerateConnectedNodes(200).Should().HaveCount(1);
28-
graph.EnumerateConnectedNodes(100).Should().BeEmpty();
24+
graph.TryGetValue(firstNodeIndex, out var val).Should().BeTrue();
25+
val.Value.Index.Should().Be(100);
26+
graph.TryGetValue(secondNodeIndex, out val).Should().BeTrue();
27+
val.Value.Index.Should().Be(200);
28+
graph.TryGetValue(3, out _).Should().BeFalse();
29+
graph.EnumerateDirectlyConnectedNodes(secondNodeIndex).Should().HaveCount(1);
30+
graph.EnumerateDirectlyConnectedNodes(firstNodeIndex).Should().BeEmpty();
2931
}
3032
}
3133
}

BrightData/BrightData.xml

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9030,6 +9030,21 @@
90309030
<param name="name">New column name</param>
90319031
<returns></returns>
90329032
</member>
9033+
<member name="M:BrightData.IBuildGraphs`1.Add(`0)">
9034+
<summary>
9035+
Add a new node
9036+
</summary>
9037+
<param name="node"></param>
9038+
<returns></returns>
9039+
</member>
9040+
<member name="M:BrightData.IBuildGraphs`1.AddEdge(System.UInt32,System.UInt32)">
9041+
<summary>
9042+
Adds an edge between two nodes
9043+
</summary>
9044+
<param name="fromNodeIndex"></param>
9045+
<param name="toNodeIndex"></param>
9046+
<returns></returns>
9047+
</member>
90339048
<member name="T:BrightData.IGraphNode">
90349049
<summary>
90359050
A graph node
@@ -13776,6 +13791,13 @@
1377613791
<param name="bitIndex"></param>
1377713792
<exception cref="T:System.ArgumentException"></exception>
1377813793
</member>
13794+
<member name="P:BrightData.Types.BitVector.Item(System.UInt32)">
13795+
<summary>
13796+
Gets or sets an item in the vector
13797+
</summary>
13798+
<param name="bitIndex"></param>
13799+
<exception cref="T:System.ArgumentException"></exception>
13800+
</member>
1377913801
<member name="M:BrightData.Types.BitVector.SetBits(System.Range)">
1378013802
<summary>
1378113803
Sets a range of bits
@@ -13814,6 +13836,9 @@
1381413836
<member name="M:BrightData.Types.BitVector.ToString">
1381513837
<inheritdoc />
1381613838
</member>
13839+
<member name="P:BrightData.Types.BitVector.DataAsBytes">
13840+
<inheritdoc />
13841+
</member>
1381713842
<member name="T:BrightData.Types.FixedSizeSortedAscending1Array`2">
1381813843
<summary>
1381913844
Fixed size sorted array of values and weights (max 1 elements)
@@ -19318,6 +19343,18 @@
1931819343
<member name="M:BrightData.Types.FixedSizeSortedDescending32Array`2.ToString">
1931919344
<inheritdoc />
1932019345
</member>
19346+
<member name="P:BrightData.Types.Graph.DenseGraph`1.DataAsBytes">
19347+
<inheritdoc />
19348+
</member>
19349+
<member name="P:BrightData.Types.Graph.DenseGraph`1.Size">
19350+
<inheritdoc />
19351+
</member>
19352+
<member name="M:BrightData.Types.Graph.DenseGraphBuilder`1.Add(`0)">
19353+
<inheritdoc />
19354+
</member>
19355+
<member name="M:BrightData.Types.Graph.DenseGraphBuilder`1.AddEdge(System.UInt32,System.UInt32)">
19356+
<inheritdoc />
19357+
</member>
1932119358
<member name="T:BrightData.Types.Graph.FixedSizeWeightedDynamicGraph`3">
1932219359
<summary>
1932319360
A fixed size weighted graph
@@ -19618,19 +19655,10 @@
1961819655
<typeparam name="T"></typeparam>
1961919656
</member>
1962019657
<member name="M:BrightData.Types.Graph.SparseGraphBuilder`1.Add(`0)">
19621-
<summary>
19622-
Add a new node
19623-
</summary>
19624-
<param name="node"></param>
19625-
<returns></returns>
19658+
<inheritdoc />
1962619659
</member>
1962719660
<member name="M:BrightData.Types.Graph.SparseGraphBuilder`1.AddEdge(System.UInt32,System.UInt32)">
19628-
<summary>
19629-
Adds an edge between two nodes
19630-
</summary>
19631-
<param name="fromNodeIndex"></param>
19632-
<param name="toNodeIndex"></param>
19633-
<returns></returns>
19661+
<inheritdoc />
1963419662
</member>
1963519663
<member name="M:BrightData.Types.Graph.SparseGraphBuilder`1.Build">
1963619664
<summary>
@@ -20067,6 +20095,9 @@
2006720095
<member name="M:BrightData.Types.ReadOnlyBitVector.ToString">
2006820096
<inheritdoc />
2006920097
</member>
20098+
<member name="P:BrightData.Types.ReadOnlyBitVector.DataAsBytes">
20099+
<inheritdoc />
20100+
</member>
2007020101
<member name="T:BrightData.Types.SingleTypeSpanTuple">
2007120102
<summary>
2007220103
A tuple of spans - each with the same type

BrightData/Buffer/Operations/IndexedCopyOperation.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ public async Task Execute(INotifyOperationProgress? notify = null, string? msg =
2727

2828
Guid? id = null;
2929
if (notify is not null) {
30-
id = new Guid();
30+
id = Guid.NewGuid();
3131
notify.OnStartOperation(id.Value, msg);
3232
}
3333

BrightData/Converter/GenericConverter.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ namespace BrightData.Converter
88
/// </summary>
99
/// <typeparam name="T"></typeparam>
1010
/// <param name="fallback"></param>
11-
internal class GenericConverter<[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.All)]T>([DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.All)]T? fallback = default)
11+
internal class GenericConverter<[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.All)]T>([DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.All)]T? fallback = null)
1212
where T : struct
1313
{
1414
readonly TypeConverter _converter = TypeDescriptor.GetConverter(typeof(T));

BrightData/DataTable/ColumnOrientedDataTable.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -187,11 +187,11 @@ protected async Task<ReadOnlyMemory<WeightedIndexList>> GetWeightedIndexLists(Re
187187
return Copy(block, (in DataRangeColumnType item) => new WeightedIndexList(data.Slice((int)item.StartIndex, (int)item.Size)));
188188
}
189189

190-
public async Task<ReadOnlyMemory<float>> GetTensorData() => _tensors ??= await GetBlock<float>(_header.TensorOffset, _header.TensorSizeBytes);
191-
public async Task<ReadOnlyMemory<byte>> GetBinaryData() => _binaryData ??= await GetBlock<byte>(_header.BinaryDataOffset, _header.BinaryDataSizeBytes);
192-
public async Task<ReadOnlyMemory<uint>> GetIndices() => _indices ??= await GetBlock<uint>(_header.IndexOffset, _header.IndexSizeBytes);
193-
public async Task<ReadOnlyMemory<WeightedIndexList.Item>> GetWeightedIndices() => _weightedIndices ??= await GetBlock<WeightedIndexList.Item>(_header.WeightedIndexOffset, _header.WeightedIndexSizeBytes);
194-
public async Task<List<string>> GetStringData() => _strings ??= await ReadStrings(_header.StringOffset, _header.StringSizeBytes);
190+
public async ValueTask<ReadOnlyMemory<float>> GetTensorData() => _tensors ??= await GetBlock<float>(_header.TensorOffset, _header.TensorSizeBytes);
191+
public async ValueTask<ReadOnlyMemory<byte>> GetBinaryData() => _binaryData ??= await GetBlock<byte>(_header.BinaryDataOffset, _header.BinaryDataSizeBytes);
192+
public async ValueTask<ReadOnlyMemory<uint>> GetIndices() => _indices ??= await GetBlock<uint>(_header.IndexOffset, _header.IndexSizeBytes);
193+
public async ValueTask<ReadOnlyMemory<WeightedIndexList.Item>> GetWeightedIndices() => _weightedIndices ??= await GetBlock<WeightedIndexList.Item>(_header.WeightedIndexOffset, _header.WeightedIndexSizeBytes);
194+
public async ValueTask<List<string>> GetStringData() => _strings ??= await ReadStrings(_header.StringOffset, _header.StringSizeBytes);
195195

196196
public uint RowCount => _header.RowCount;
197197
public uint ColumnCount => _header.ColumnCount;

BrightData/ExtensionMethods.IndexList.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ public static WeightedIndexListWithLabel<T>[] Normalize<T>(this Span<WeightedInd
146146
var index = 0;
147147
foreach(ref var item in data) {
148148
ret[index++] = item with { Data = WeightedIndexList.Create(
149-
item.Data.Indices.Select(wi => new WeightedIndexList.Item(wi.Index, wi.Weight / maxWeight)).ToArray()
149+
item.Data.Indices.Select(wi => wi with { Weight = wi.Weight / maxWeight }).ToArray()
150150
)};
151151
}
152152

BrightData/ExtensionMethods.Span.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ Func<T, T> transformer
196196
else {
197197
var xp = xfp;
198198
var zp = zfp;
199-
for (uint i = 0; i < size; i++)
199+
for (var i = 0; i < size; i++)
200200
*zp++ = transformer(*xp++);
201201
}
202202
}

BrightData/Helper/StringTables/StringTable.cs

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@ public class StringTable(ReadOnlyMemory<OffsetAndSize> stringTable, ReadOnlyMemo
1616
readonly ReadOnlyMemory<OffsetAndSize> _stringTable = stringTable;
1717
readonly ReadOnlyMemory<byte> _stringData = stringData;
1818

19+
/// <summary>
20+
/// Creates a string table from bytes
21+
/// </summary>
22+
/// <param name="data"></param>
23+
/// <returns></returns>
1924
public static StringTable Create(ReadOnlyMemory<byte> data)
2025
{
2126
var blocks = data.GetTupleFromBlockHeader<OffsetAndSize, byte>();
@@ -25,6 +30,11 @@ public static StringTable Create(ReadOnlyMemory<byte> data)
2530
);
2631
}
2732

33+
/// <summary>
34+
/// Creates a string table from a file
35+
/// </summary>
36+
/// <param name="filePath"></param>
37+
/// <returns></returns>
2838
public static async Task<StringTable> Create(string filePath)
2939
{
3040
return Create(await File.ReadAllBytesAsync(filePath));
@@ -65,13 +75,15 @@ public string[] GetAll(int maxStringSize = 1024)
6575
/// <returns></returns>
6676
/// <exception cref="Exception"></exception>
6777
/// <exception cref="NotImplementedException"></exception>
68-
public async Task<IIndexStrings> GetStringIndexer(StringIndexType type = StringIndexType.Dictionary, int maxStringSize = 1024)
78+
public Task<IIndexStrings> GetStringIndexer(StringIndexType type = StringIndexType.Dictionary, int maxStringSize = 1024)
6979
{
7080
var span = _stringTable.Span;
7181
var dataSpan = _stringData.Span;
82+
IIndexStrings ret;
7283
switch (type) {
7384
case StringIndexType.Dictionary: {
74-
return new FrozenDictionaryStringIndexer(span, dataSpan, maxStringSize);
85+
ret = new FrozenDictionaryStringIndexer(span, dataSpan, maxStringSize);
86+
break;
7587
}
7688
case StringIndexType.Trie: {
7789
using var buffer = SpanOwner<char>.Allocate(maxStringSize);
@@ -82,11 +94,14 @@ public async Task<IIndexStrings> GetStringIndexer(StringIndexType type = StringI
8294
var bufferSize = Encoding.UTF8.GetChars(utf8, bufferSpan);
8395
trieBuilder.Add(bufferSpan[..bufferSize], i);
8496
}
85-
return new TrieStringIndexer(trieBuilder.Build(), this);
97+
ret = new TrieStringIndexer(trieBuilder.Build(), this);
98+
break;
8699
}
87100
default:
88101
throw new NotImplementedException(type.ToString());
89102
}
103+
104+
return Task.FromResult(ret);
90105
}
91106

92107
/// <summary>
@@ -95,7 +110,7 @@ public async Task<IIndexStrings> GetStringIndexer(StringIndexType type = StringI
95110
/// <param name="tokenizer"></param>
96111
/// <param name="maxStringSize"></param>
97112
/// <returns></returns>
98-
public async Task<IIndexStrings> GetStringIndexer(Func<ReadOnlySpan<char>, ReadOnlySpan<int>> tokenizer, int maxStringSize = 1024)
113+
public Task<IIndexStrings> GetStringIndexer(Func<ReadOnlySpan<char>, ReadOnlySpan<int>> tokenizer, int maxStringSize = 1024)
99114
{
100115
// build the tokenized trie
101116
using var buffer = SpanOwner<char>.Allocate(maxStringSize);
@@ -109,7 +124,8 @@ public async Task<IIndexStrings> GetStringIndexer(Func<ReadOnlySpan<char>, ReadO
109124
trieBuilder.Add(tokenizer(bufferSpan[..bufferSize]), i);
110125
}
111126

112-
return new TokenizedTrieStringIndexer(trieBuilder.Build(), this, tokenizer);
127+
IIndexStrings ret = new TokenizedTrieStringIndexer(trieBuilder.Build(), this, tokenizer);
128+
return Task.FromResult(ret);
113129
}
114130
}
115131
}

BrightData/Helper/StringTables/UniqueStringTrie.cs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,11 @@ public UniqueIndexedStringTrie(ReadOnlyMemory<NodeData> data)
203203
_data = data;
204204
}
205205

206+
/// <summary>
207+
/// Loads a string trie from a file
208+
/// </summary>
209+
/// <param name="filePath"></param>
210+
/// <returns></returns>
206211
public static async Task<UniqueIndexedStringTrie<T>> Load(string filePath)
207212
{
208213
var data = await File.ReadAllBytesAsync(filePath);
@@ -252,10 +257,15 @@ public bool TryGetIndex(ReadOnlySpan<T> str, out uint ret)
252257
return true;
253258
}
254259

255-
ret = default;
260+
ret = 0;
256261
return false;
257262
}
258263

264+
/// <summary>
265+
/// Searches the trie for a prefix
266+
/// </summary>
267+
/// <param name="prefix"></param>
268+
/// <returns>The indices that match the prefix</returns>
259269
public IEnumerable<uint> Search(ReadOnlySpan<T> prefix)
260270
{
261271
var isValid = true;

BrightData/Interfaces.DataTable.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ public interface ITensorDataProvider
319319
/// Returns the entire block of tensor data
320320
/// </summary>
321321
/// <returns></returns>
322-
Task<ReadOnlyMemory<float>> GetTensorData();
322+
ValueTask<ReadOnlyMemory<float>> GetTensorData();
323323

324324
/// <summary>
325325
/// Sets the tensor mapping functions

0 commit comments

Comments
 (0)