Skip to content

Commit a67b1ff

Browse files
authored
Merge pull request #41 from mukunku/v2.3.2
v2.3.2
2 parents 97958b2 + 2db8383 commit a67b1ff

File tree

2 files changed

+31
-10
lines changed

2 files changed

+31
-10
lines changed

src/ParquetFileViewer/Helpers/UtilityMethods.cs

+29-8
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,17 @@ public static DataTable ParquetReaderToDataTable(ParquetReader parquetReader, Li
1313
{
1414
//Get list of data fields and construct the DataTable
1515
DataTable dataTable = new DataTable();
16-
List<Parquet.Data.DataField> fields = new List<Parquet.Data.DataField>();
16+
var fields = new List<(Parquet.Thrift.SchemaElement, Parquet.Data.DataField)>();
1717
var dataFields = parquetReader.Schema.GetDataFields();
1818
foreach (string selectedField in selectedFields)
1919
{
2020
var dataField = dataFields.FirstOrDefault(f => f.Name.Equals(selectedField, StringComparison.InvariantCultureIgnoreCase));
2121
if (dataField != null)
2222
{
23-
fields.Add(dataField);
24-
DataColumn newColumn = new DataColumn(dataField.Name, ParquetNetTypeToCSharpType(dataField.DataType));
23+
var thriftSchema = parquetReader.ThriftMetadata.Schema.First(f => f.Name.Equals(selectedField, StringComparison.InvariantCultureIgnoreCase));
24+
25+
fields.Add((thriftSchema, dataField));
26+
DataColumn newColumn = new DataColumn(dataField.Name, ParquetNetTypeToCSharpType(thriftSchema, dataField.DataType));
2527
dataTable.Columns.Add(newColumn);
2628
}
2729
else
@@ -64,17 +66,20 @@ public static DataTable ParquetReaderToDataTable(ParquetReader parquetReader, Li
6466
return dataTable;
6567
}
6668

67-
private static void ProcessRowGroup(DataTable dataTable, ParquetRowGroupReader groupReader, List<Parquet.Data.DataField> fields,
69+
private static void ProcessRowGroup(DataTable dataTable, ParquetRowGroupReader groupReader, List<(Parquet.Thrift.SchemaElement, Parquet.Data.DataField)> fields,
6870
int skipRecords, int readRecords, CancellationToken cancellationToken)
6971
{
7072
int rowBeginIndex = dataTable.Rows.Count;
7173
bool isFirstColumn = true;
7274

73-
foreach (var field in fields)
75+
foreach (var fieldTuple in fields)
7476
{
7577
if (cancellationToken.IsCancellationRequested)
7678
break;
7779

80+
var logicalType = fieldTuple.Item1.LogicalType;
81+
var field = fieldTuple.Item2;
82+
7883
int rowIndex = rowBeginIndex;
7984

8085
int skippedRecords = 0;
@@ -101,7 +106,23 @@ private static void ProcessRowGroup(DataTable dataTable, ParquetRowGroupReader g
101106
if (value == null)
102107
dataTable.Rows[rowIndex][field.Name] = DBNull.Value;
103108
else if (field.DataType == Parquet.Data.DataType.DateTimeOffset)
104-
dataTable.Rows[rowIndex][field.Name] = ((DateTimeOffset)value).DateTime; //converts to local time!
109+
dataTable.Rows[rowIndex][field.Name] = ((DateTimeOffset)value).DateTime;
110+
else if (field.DataType == Parquet.Data.DataType.Int64
111+
&& logicalType.TIMESTAMP != null)
112+
{
113+
int divideBy = 0;
114+
if (logicalType.TIMESTAMP.Unit.NANOS != null)
115+
divideBy = 1000 * 1000;
116+
else if (logicalType.TIMESTAMP.Unit.MICROS != null)
117+
divideBy = 1000;
118+
else if (logicalType.TIMESTAMP.Unit.MILLIS != null)
119+
divideBy = 1;
120+
121+
if (divideBy > 0)
122+
dataTable.Rows[rowIndex][field.Name] = DateTimeOffset.FromUnixTimeMilliseconds((long)value / divideBy).DateTime;
123+
else //Not sure if this 'else' is correct but adding just in case
124+
dataTable.Rows[rowIndex][field.Name] = DateTimeOffset.FromUnixTimeSeconds((long)value);
125+
}
105126
else
106127
dataTable.Rows[rowIndex][field.Name] = value;
107128

@@ -113,7 +134,7 @@ private static void ProcessRowGroup(DataTable dataTable, ParquetRowGroupReader g
113134
}
114135

115136

116-
public static Type ParquetNetTypeToCSharpType(Parquet.Data.DataType type)
137+
public static Type ParquetNetTypeToCSharpType(Parquet.Thrift.SchemaElement thriftSchema, Parquet.Data.DataType type)
117138
{
118139
Type columnType = null;
119140
switch (type)
@@ -147,7 +168,7 @@ public static Type ParquetNetTypeToCSharpType(Parquet.Data.DataType type)
147168
columnType = typeof(int);
148169
break;
149170
case Parquet.Data.DataType.Int64:
150-
columnType = typeof(long);
171+
columnType = thriftSchema.LogicalType.TIMESTAMP != null ? typeof(DateTime) : typeof(long);
151172
break;
152173
case Parquet.Data.DataType.UnsignedByte:
153174
columnType = typeof(byte);

src/ParquetFileViewer/Properties/AssemblyInfo.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -31,5 +31,5 @@
3131
// You can specify all the values or you can default the Build and Revision Numbers
3232
// by using the '*' as shown below:
3333
// [assembly: AssemblyVersion("1.0.*")]
34-
[assembly: AssemblyVersion("2.3.1.*")]
35-
[assembly: AssemblyFileVersion("2.3.1.*")]
34+
[assembly: AssemblyVersion("2.3.2.*")]
35+
[assembly: AssemblyFileVersion("2.3.2.*")]

0 commit comments

Comments
 (0)