Skip to content

Commit 3604580

Browse files
authored
Fix bug that causes Merge to fail with empty DataFrames (#7573)
* Fix bug that causes Merge to fail with empty DataFrames * Improve readability of Clone method
1 parent 25b977e commit 3604580

File tree

3 files changed

+299
-1
lines changed

3 files changed

+299
-1
lines changed

src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -460,12 +460,18 @@ private List<ReadOnlyDataFrameBuffer<byte>> CloneNullBitMapBuffers()
460460
public PrimitiveColumnContainer<T> Clone<U>(PrimitiveColumnContainer<U> mapIndices, Type type, bool invertMapIndices = false)
461461
where U : unmanaged
462462
{
463+
PrimitiveColumnContainer<T> ret = new PrimitiveColumnContainer<T>(mapIndices.Length);
464+
465+
if (Buffers.Count == 0)
466+
{
467+
return ret;
468+
}
469+
463470
ReadOnlySpan<T> thisSpan = Buffers[0].ReadOnlySpan;
464471
ReadOnlySpan<byte> thisNullBitMapSpan = NullBitMapBuffers[0].ReadOnlySpan;
465472
long minRange = 0;
466473
long maxRange = DataFrameBuffer<T>.MaxCapacity;
467474
long maxCapacity = maxRange;
468-
PrimitiveColumnContainer<T> ret = new PrimitiveColumnContainer<T>(mapIndices.Length);
469475
for (int b = 0; b < mapIndices.Buffers.Count; b++)
470476
{
471477
int index = b;
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using Xunit;
6+
7+
namespace Microsoft.Data.Analysis.Tests
8+
{
9+
public static class DataFrameAssert
10+
{
11+
public static void Equal(DataFrame expected, DataFrame actual)
12+
{
13+
Assert.Equal(expected.Columns.Count, actual.Columns.Count);
14+
Assert.Equal(expected.Rows.Count, actual.Rows.Count);
15+
16+
for (int c = 0; c < expected.Columns.Count; c++)
17+
{
18+
var expectedColumn = expected.Columns[c];
19+
var actualColumn = actual.Columns[c];
20+
21+
Assert.Equal(expectedColumn.Name, actualColumn.Name);
22+
Assert.Equal(expectedColumn.GetType(), actualColumn.GetType());
23+
24+
for (int r = 0; r < expected.Rows.Count; r++)
25+
{
26+
var expectedValue = expectedColumn[r];
27+
var actualValue = actualColumn[r];
28+
29+
if (expectedValue == null || actualValue == null)
30+
{
31+
Assert.Null(expectedValue);
32+
Assert.Null(actualValue);
33+
}
34+
else
35+
{
36+
Assert.Equal(expectedValue, actualValue);
37+
}
38+
}
39+
}
40+
}
41+
}
42+
}

test/Microsoft.Data.Analysis.Tests/DataFrameTests.Merge.cs

Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -714,6 +714,256 @@ public void TestMerge_Issue5778()
714714
MatchRowsOnMergedDataFrame(merge, left, right, 1, 1, 0);
715715
}
716716

717+
public static IEnumerable<object[]> GenerateData_TestMerge_EmptyDataFrames()
718+
{
719+
yield return new object[]
720+
{
721+
new DataFrame(
722+
new Int32DataFrameColumn("Index"),
723+
new Int32DataFrameColumn("L1"),
724+
new Int32DataFrameColumn("L2"),
725+
new StringDataFrameColumn("L3")
726+
),
727+
new DataFrame(
728+
new Int32DataFrameColumn("Index", new[] { 0, 1, 2 }),
729+
new Int32DataFrameColumn("R1", new[] { 0, 1, 1 }),
730+
new Int32DataFrameColumn("R2", new[] { 1, 1, 2 }),
731+
new StringDataFrameColumn("R3", new[] { "Z", "Y", "B" })
732+
),
733+
new string[]{ "L1" },
734+
new string[]{ "R1" },
735+
JoinAlgorithm.Left,
736+
new DataFrame(
737+
new Int32DataFrameColumn("Index_left"),
738+
new Int32DataFrameColumn("L1"),
739+
new Int32DataFrameColumn("L2"),
740+
new StringDataFrameColumn("L3"),
741+
new Int32DataFrameColumn("Index_right"),
742+
new Int32DataFrameColumn("R1"),
743+
new Int32DataFrameColumn("R2"),
744+
new StringDataFrameColumn("R3")
745+
),
746+
};
747+
yield return new object[]
748+
{
749+
new DataFrame(
750+
new Int32DataFrameColumn("Index"),
751+
new Int32DataFrameColumn("L1"),
752+
new Int32DataFrameColumn("L2"),
753+
new StringDataFrameColumn("L3")
754+
),
755+
new DataFrame(
756+
new Int32DataFrameColumn("Index"),
757+
new Int32DataFrameColumn("R1"),
758+
new Int32DataFrameColumn("R2"),
759+
new StringDataFrameColumn("R3")
760+
),
761+
new string[]{ "L1" },
762+
new string[]{ "R1" },
763+
JoinAlgorithm.Inner,
764+
new DataFrame(
765+
new Int32DataFrameColumn("Index_left"),
766+
new Int32DataFrameColumn("L1"),
767+
new Int32DataFrameColumn("L2"),
768+
new StringDataFrameColumn("L3"),
769+
new Int32DataFrameColumn("Index_right"),
770+
new Int32DataFrameColumn("R1"),
771+
new Int32DataFrameColumn("R2"),
772+
new StringDataFrameColumn("R3")
773+
),
774+
};
775+
yield return new object[]
776+
{
777+
new DataFrame(
778+
new Int32DataFrameColumn("Index"),
779+
new Int32DataFrameColumn("L1"),
780+
new Int32DataFrameColumn("L2"),
781+
new StringDataFrameColumn("L3")
782+
),
783+
new DataFrame(
784+
new Int32DataFrameColumn("Index"),
785+
new Int32DataFrameColumn("R1"),
786+
new Int32DataFrameColumn("R2"),
787+
new StringDataFrameColumn("R3")
788+
),
789+
new string[]{ "L1" },
790+
new string[]{ "R1" },
791+
JoinAlgorithm.Left,
792+
new DataFrame(
793+
new Int32DataFrameColumn("Index_left"),
794+
new Int32DataFrameColumn("L1"),
795+
new Int32DataFrameColumn("L2"),
796+
new StringDataFrameColumn("L3"),
797+
new Int32DataFrameColumn("Index_right"),
798+
new Int32DataFrameColumn("R1"),
799+
new Int32DataFrameColumn("R2"),
800+
new StringDataFrameColumn("R3")
801+
),
802+
};
803+
yield return new object[]
804+
{
805+
new DataFrame(
806+
new Int32DataFrameColumn("Index"),
807+
new Int32DataFrameColumn("L1"),
808+
new Int32DataFrameColumn("L2"),
809+
new StringDataFrameColumn("L3")
810+
),
811+
new DataFrame(
812+
new Int32DataFrameColumn("Index"),
813+
new Int32DataFrameColumn("R1"),
814+
new Int32DataFrameColumn("R2"),
815+
new StringDataFrameColumn("R3")
816+
),
817+
new string[]{ "L1" },
818+
new string[]{ "R1" },
819+
JoinAlgorithm.Right,
820+
new DataFrame(
821+
new Int32DataFrameColumn("Index_left"),
822+
new Int32DataFrameColumn("L1"),
823+
new Int32DataFrameColumn("L2"),
824+
new StringDataFrameColumn("L3"),
825+
new Int32DataFrameColumn("Index_right"),
826+
new Int32DataFrameColumn("R1"),
827+
new Int32DataFrameColumn("R2"),
828+
new StringDataFrameColumn("R3")
829+
),
830+
};
831+
yield return new object[]
832+
{
833+
new DataFrame(
834+
new Int32DataFrameColumn("Index"),
835+
new Int32DataFrameColumn("L1"),
836+
new Int32DataFrameColumn("L2"),
837+
new StringDataFrameColumn("L3")
838+
),
839+
new DataFrame(
840+
new Int32DataFrameColumn("Index"),
841+
new Int32DataFrameColumn("R1"),
842+
new Int32DataFrameColumn("R2"),
843+
new StringDataFrameColumn("R3")
844+
),
845+
new string[]{ "L1" },
846+
new string[]{ "R1" },
847+
JoinAlgorithm.FullOuter,
848+
new DataFrame(
849+
new Int32DataFrameColumn("Index_left"),
850+
new Int32DataFrameColumn("L1"),
851+
new Int32DataFrameColumn("L2"),
852+
new StringDataFrameColumn("L3"),
853+
new Int32DataFrameColumn("Index_right"),
854+
new Int32DataFrameColumn("R1"),
855+
new Int32DataFrameColumn("R2"),
856+
new StringDataFrameColumn("R3")
857+
),
858+
};
859+
}
860+
861+
[Theory]
862+
[MemberData(nameof(GenerateData_TestMerge_EmptyDataFrames))]
863+
public void TestMerge_EmptyDataFrames(DataFrame left, DataFrame right, string[] leftColumns, string[] rightColumns, JoinAlgorithm joinAlgorithm, DataFrame expectedOutput)
864+
{
865+
DataFrame actualOutput = left.Merge(right, leftColumns, rightColumns, joinAlgorithm: joinAlgorithm);
866+
867+
DataFrameAssert.Equal(expectedOutput, actualOutput);
868+
}
869+
870+
public static IEnumerable<object[]> GenerateData_TestMerge_OuterJoinsPreserveUnmatched()
871+
{
872+
yield return new object[]
873+
{
874+
new DataFrame(
875+
new Int32DataFrameColumn("Index", new[] { 0, 1, 2 }),
876+
new Int32DataFrameColumn("L1", new[] { 1, 2, 3 }),
877+
new Int32DataFrameColumn("L2", new[] { 1, 2, 1 }),
878+
new StringDataFrameColumn("L3", new[] { "A", "B", "C" })
879+
),
880+
new DataFrame(
881+
new Int32DataFrameColumn("Index", new[] { 0, 1, 2 }),
882+
new Int32DataFrameColumn("R1", new[] { 10, 11, 11 }),
883+
new Int32DataFrameColumn("R2", new[] { 1, 1, 2 }),
884+
new StringDataFrameColumn("R3", new[] { "Z", "Y", "B" })
885+
),
886+
new string[]{ "L1" },
887+
new string[]{ "R1" },
888+
JoinAlgorithm.Left,
889+
new DataFrame(
890+
new Int32DataFrameColumn("Index_left", new[] { 0, 1, 2 }),
891+
new Int32DataFrameColumn("L1", new[] { 1, 2, 3 }),
892+
new Int32DataFrameColumn("L2", new[] { 1, 2, 1 }),
893+
new StringDataFrameColumn("L3", new[] { "A", "B", "C" }),
894+
new Int32DataFrameColumn("Index_right", new int?[] { null, null, null }),
895+
new Int32DataFrameColumn("R1", new int?[] { null, null, null }),
896+
new Int32DataFrameColumn("R2", new int?[] { null, null, null }),
897+
new StringDataFrameColumn("R3", new string[] { null, null, null })
898+
),
899+
};
900+
yield return new object[]
901+
{
902+
new DataFrame(
903+
new Int32DataFrameColumn("Index", new[] { 0, 1, 2 }),
904+
new Int32DataFrameColumn("L1", new[] { 1, 2, 3 }),
905+
new Int32DataFrameColumn("L2", new[] { 1, 2, 1 }),
906+
new StringDataFrameColumn("L3", new[] { "A", "B", "C" })
907+
),
908+
new DataFrame(
909+
new Int32DataFrameColumn("Index"),
910+
new Int32DataFrameColumn("R1"),
911+
new Int32DataFrameColumn("R2"),
912+
new StringDataFrameColumn("R3")
913+
),
914+
new string[]{ "L1" },
915+
new string[]{ "R1" },
916+
JoinAlgorithm.Left,
917+
new DataFrame(
918+
new Int32DataFrameColumn("Index_left", new[] { 0, 1, 2 }),
919+
new Int32DataFrameColumn("L1", new[] { 1, 2, 3 }),
920+
new Int32DataFrameColumn("L2", new[] { 1, 2, 1 }),
921+
new StringDataFrameColumn("L3", new[] { "A", "B", "C" }),
922+
new Int32DataFrameColumn("Index_right", new int?[] { null, null, null }),
923+
new Int32DataFrameColumn("R1", new int?[] { null, null, null }),
924+
new Int32DataFrameColumn("R2", new int?[] { null, null, null }),
925+
new StringDataFrameColumn("R3", new string[] { null, null, null })
926+
),
927+
};
928+
yield return new object[]
929+
{
930+
new DataFrame(
931+
new Int32DataFrameColumn("Index"),
932+
new Int32DataFrameColumn("L1"),
933+
new Int32DataFrameColumn("L2"),
934+
new StringDataFrameColumn("L3")
935+
),
936+
new DataFrame(
937+
new Int32DataFrameColumn("Index", new[] { 0, 1, 2 }),
938+
new Int32DataFrameColumn("R1", new[] { 1, 2, 3 }),
939+
new Int32DataFrameColumn("R2", new[] { 1, 2, 1 }),
940+
new StringDataFrameColumn("R3", new[] { "A", "B", "C" })
941+
),
942+
new string[]{ "L1" },
943+
new string[]{ "R1" },
944+
JoinAlgorithm.Right,
945+
new DataFrame(
946+
new Int32DataFrameColumn("Index_left", new int?[] { null, null, null }),
947+
new Int32DataFrameColumn("L1", new int?[] { null, null, null }),
948+
new Int32DataFrameColumn("L2", new int?[] { null, null, null }),
949+
new StringDataFrameColumn("L3", new string[] { null, null, null }),
950+
new Int32DataFrameColumn("Index_right", new[] { 0, 1, 2 }),
951+
new Int32DataFrameColumn("R1", new[] { 1, 2, 3 }),
952+
new Int32DataFrameColumn("R2", new[] { 1, 2, 1 }),
953+
new StringDataFrameColumn("R3", new[] { "A", "B", "C" })
954+
),
955+
};
956+
}
957+
958+
[Theory]
959+
[MemberData(nameof(GenerateData_TestMerge_OuterJoinsPreserveUnmatched))]
960+
public void TestMerge_OuterJoinsPreserveUnmatched(DataFrame left, DataFrame right, string[] leftColumns, string[] rightColumns, JoinAlgorithm joinAlgorithm, DataFrame expectedOutput)
961+
{
962+
DataFrame actualOutput = left.Merge(right, leftColumns, rightColumns, joinAlgorithm: joinAlgorithm);
963+
964+
DataFrameAssert.Equal(expectedOutput, actualOutput);
965+
}
966+
717967
[Fact]
718968
//Issue 6127
719969
public void TestMerge_CorrectColumnTypes()

0 commit comments

Comments
 (0)