Skip to content

Commit e5ae477

Browse files
committed
improve sorting speed for complex types
1 parent 33aa88d commit e5ae477

File tree

5 files changed

+163
-5
lines changed

5 files changed

+163
-5
lines changed

src/ParquetViewer.Engine/Helpers.cs

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
namespace ParquetViewer.Engine
2+
{
3+
internal static class Helpers
4+
{
5+
public static int CompareTo(object? value, object? otherValue)
6+
{
7+
value ??= DBNull.Value;
8+
otherValue ??= DBNull.Value;
9+
10+
if (otherValue == DBNull.Value && value == DBNull.Value)
11+
return 0;
12+
13+
if (otherValue == DBNull.Value)
14+
return 1;
15+
16+
if (value == DBNull.Value)
17+
return -1;
18+
19+
if (value is IComparable comparableValue && otherValue is IComparable otherComparableValue
20+
&& value.GetType().Equals(otherValue.GetType()))
21+
{
22+
return comparableValue.CompareTo(otherComparableValue);
23+
}
24+
else
25+
{
26+
return value.ToString()!.CompareTo(otherValue.ToString()!);
27+
}
28+
}
29+
}
30+
}

src/ParquetViewer.Engine/Types/ByteArrayValue.cs

+21-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
namespace ParquetViewer.Engine.Types
22
{
3-
public class ByteArrayValue
3+
public class ByteArrayValue : IComparable<ByteArrayValue>, IComparable
44
{
55
public string Name { get; }
66
public byte[] Data { get; }
@@ -12,5 +12,25 @@ public ByteArrayValue(string name, byte[] data)
1212
}
1313

1414
public override string ToString() => BitConverter.ToString(this.Data);
15+
16+
public int CompareTo(ByteArrayValue? other)
17+
{
18+
if (other?.Data is null)
19+
return 1;
20+
else if (this.Data is null)
21+
return -1;
22+
else
23+
return ByteArraysEqual(this.Data, other.Data);
24+
}
25+
26+
private static int ByteArraysEqual(ReadOnlySpan<byte> a1, ReadOnlySpan<byte> a2) => a1.SequenceCompareTo(a2);
27+
28+
public int CompareTo(object? obj)
29+
{
30+
if (obj is ByteArrayValue byteArray)
31+
return CompareTo(byteArray);
32+
else
33+
return 1;
34+
}
1535
}
1636
}

src/ParquetViewer.Engine/Types/ListValue.cs

+33-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
namespace ParquetViewer.Engine.Types
55
{
6-
public class ListValue
6+
public class ListValue : IComparable<ListValue>, IComparable
77
{
88
public IList Data { get; }
99
public Type? Type { get; private set; }
@@ -57,5 +57,37 @@ public override string ToString()
5757
sb.Append(']');
5858
return sb.ToString();
5959
}
60+
61+
public int CompareTo(ListValue? other)
62+
{
63+
if (other?.Data is null)
64+
return 1;
65+
else if (this.Data is null)
66+
return -1;
67+
68+
for (var i = 0; i < Data.Count; i++)
69+
{
70+
if (other.Data.Count == i)
71+
{
72+
//This list has more values, so lets say it's 'less than' in sort order
73+
return -1;
74+
}
75+
76+
var value = Data[i];
77+
var otherValue = other.Data[i];
78+
int comparison = Helpers.CompareTo(value, otherValue);
79+
if (comparison != 0)
80+
return comparison;
81+
}
82+
return 0; //the lists appear equal
83+
}
84+
85+
public int CompareTo(object? obj)
86+
{
87+
if (obj is ListValue list)
88+
return CompareTo(list);
89+
else
90+
return 1;
91+
}
6092
}
6193
}

src/ParquetViewer.Engine/Types/MapValue.cs

+26-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
namespace ParquetViewer.Engine.Types
22
{
3-
public class MapValue
3+
public class MapValue : IComparable<MapValue>, IComparable
44
{
55
public object Key { get; } = DBNull.Value;
66
public Type KeyType { get; }
@@ -46,5 +46,30 @@ public override string ToString()
4646

4747
return $"({key},{value})";
4848
}
49+
50+
/// <summary>
51+
/// Sorts by Key first, then Value.
52+
/// </summary>
53+
public int CompareTo(MapValue? other)
54+
{
55+
if (other is null)
56+
return 1;
57+
58+
int comparison = Helpers.CompareTo(Key, other.Key);
59+
if (comparison != 0)
60+
return comparison;
61+
62+
return Helpers.CompareTo(Value, other.Value);
63+
}
64+
65+
public int CompareTo(object? obj)
66+
{
67+
if (obj is MapValue mapValue)
68+
return CompareTo(mapValue);
69+
else
70+
return 1;
71+
}
72+
73+
4974
}
5075
}

src/ParquetViewer.Engine/Types/StructValue.cs

+53-2
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44

55
namespace ParquetViewer.Engine.Types
66
{
7-
public class StructValue
7+
public class StructValue : IComparable<StructValue>, IComparable
88
{
99
public string Name { get; }
1010

11-
//we are always guaranteed to have exactly one row in 'Data' since we don't allow nested structs right now
11+
//we are always guaranteed to have exactly one row in 'Data' as that is how we handle Structs
1212
public DataRow Data { get; }
1313

1414
public StructValue(string name, DataRow data)
@@ -113,5 +113,56 @@ private static void WriteValue(Utf8JsonWriter jsonWriter, object value, bool tru
113113
/// </summary>
114114
private static bool IsNumber(Type type) =>
115115
Array.Exists(type.GetInterfaces(), i => i.IsGenericType && i.GetGenericTypeDefinition() == typeof(INumber<>));
116+
117+
private IReadOnlyCollection<string>? _columnNames = null;
118+
private IReadOnlyCollection<string> GetFieldNames() =>
119+
_columnNames ??= GetColumns(Data).Select(c => c.ColumnName).ToList().AsReadOnly();
120+
121+
/// <summary>
122+
/// Sorts by field names first, then by values
123+
/// </summary>
124+
public int CompareTo(StructValue? other)
125+
{
126+
if (other?.Data is null || other.GetFieldNames().Count == 0)
127+
return 1;
128+
129+
if (Data is null || GetFieldNames().Count == 0)
130+
return -1;
131+
132+
var otherColumnNames = string.Join("|", other.GetFieldNames());
133+
var columnNames = string.Join("|", this.GetFieldNames());
134+
135+
int schemaComparison = columnNames.CompareTo(otherColumnNames);
136+
if (schemaComparison != 0)
137+
return schemaComparison;
138+
139+
int fieldCount = GetFieldNames().Count;
140+
for (var i = 0; i < fieldCount; i++)
141+
{
142+
var otherValue = other.Data[i];
143+
var value = Data[i];
144+
int comparison = Helpers.CompareTo(value, otherValue);
145+
if (comparison != 0)
146+
return comparison;
147+
}
148+
149+
return 0; //Both structs appear equal
150+
}
151+
152+
private static IEnumerable<DataColumn> GetColumns(DataRow dataRow)
153+
{
154+
foreach (DataColumn column in dataRow.Table.Columns)
155+
{
156+
yield return column;
157+
}
158+
}
159+
160+
public int CompareTo(object? obj)
161+
{
162+
if (obj is StructValue @struct)
163+
return CompareTo(@struct);
164+
else
165+
return 1;
166+
}
116167
}
117168
}

0 commit comments

Comments
 (0)