Skip to content

Commit f4d75e7

Browse files
committed
gracefully handle dupe column names #68
1 parent ac12cba commit f4d75e7

File tree

3 files changed

+68
-44
lines changed

3 files changed

+68
-44
lines changed

src/ParquetFileViewer/FieldSelectionDialog.cs

+44-40
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,9 @@ private void RenderFieldsCheckboxes(IEnumerable<Field> availableFields, IEnumera
5353
int locationX = 0;
5454
int locationY = 5;
5555
bool isFirst = true;
56-
HashSet<string> fieldNames = new HashSet<string>();
5756
bool isClearingSelectAllCheckbox = false;
5857

58+
var checkboxControls = new List<CheckBox>();
5959
foreach (Field field in availableFields)
6060
{
6161
if (isFirst) //Add toggle all checkbox and some other setting changes
@@ -97,7 +97,6 @@ private void RenderFieldsCheckboxes(IEnumerable<Field> availableFields, IEnumera
9797
if (checkbox.Enabled)
9898
{
9999
checkbox.Checked = selectAllCheckBox.Checked;
100-
//this.PreSelectedFields.Remove((string)checkbox.Tag);
101100
}
102101
}
103102
}
@@ -108,57 +107,62 @@ private void RenderFieldsCheckboxes(IEnumerable<Field> availableFields, IEnumera
108107
locationY += DynamicFieldCheckboxYIncrement;
109108
}
110109

111-
if (!fieldNames.Contains(field.Name.ToLowerInvariant())) //Normally two fields with the same name shouldn't exist but lets make sure
110+
bool isUnsupportedFieldType = UnsupportedSchemaTypes.Contains(field.SchemaType);
111+
var fieldCheckbox = new CheckBox()
112112
{
113-
bool isUnsupportedFieldType = UnsupportedSchemaTypes.Contains(field.SchemaType);
114-
var fieldCheckbox = new CheckBox()
113+
Name = string.Concat("checkbox_", field.Name),
114+
Text = string.Concat(field.Name, isUnsupportedFieldType ? "(Unsupported)" : string.Empty),
115+
Tag = field.Name,
116+
Checked = preSelectedFields.Contains(field.Name),
117+
Location = new Point(locationX, locationY),
118+
AutoSize = true,
119+
Enabled = !isUnsupportedFieldType
120+
};
121+
fieldCheckbox.CheckedChanged += (object checkboxSender, EventArgs checkboxEventArgs) =>
122+
{
123+
var fieldCheckBox = (CheckBox)checkboxSender;
124+
125+
if (fieldCheckBox.Checked)
115126
{
116-
Name = string.Concat("checkbox_", field.Name),
117-
Text = string.Concat(field.Name, isUnsupportedFieldType ? "(Unsupported)" : string.Empty),
118-
Tag = field.Name,
119-
Checked = preSelectedFields.Contains(field.Name),
120-
Location = new Point(locationX, locationY),
121-
AutoSize = true,
122-
Enabled = !isUnsupportedFieldType
123-
};
124-
fieldCheckbox.CheckedChanged += (object checkboxSender, EventArgs checkboxEventArgs) =>
127+
this.PreSelectedFields.Add((string)fieldCheckBox.Tag);
128+
}
129+
else
125130
{
126-
var fieldCheckBox = (CheckBox)checkboxSender;
127-
128-
if (fieldCheckBox.Checked)
129-
{
130-
this.PreSelectedFields.Add((string)fieldCheckBox.Tag);
131-
}
132-
else
133-
{
134-
this.PreSelectedFields.Remove((string)fieldCheckBox.Tag);
135-
}
131+
this.PreSelectedFields.Remove((string)fieldCheckBox.Tag);
132+
}
136133

137134

138-
if (!fieldCheckBox.Checked)
135+
if (!fieldCheckBox.Checked)
136+
{
137+
foreach (Control control in this.fieldsPanel.Controls)
139138
{
140-
foreach (Control control in this.fieldsPanel.Controls)
139+
if (control.Tag.Equals(SelectAllCheckboxName) && control is CheckBox checkbox)
141140
{
142-
if (control.Tag.Equals(SelectAllCheckboxName) && control is CheckBox checkbox)
141+
if (checkbox.Enabled && checkbox.Checked)
143142
{
144-
if (checkbox.Enabled && checkbox.Checked)
145-
{
146-
isClearingSelectAllCheckbox = true;
147-
checkbox.Checked = false;
148-
this.PreSelectedFields.Remove((string)fieldCheckBox.Tag);
149-
isClearingSelectAllCheckbox = false;
150-
break;
151-
}
143+
isClearingSelectAllCheckbox = true;
144+
checkbox.Checked = false;
145+
this.PreSelectedFields.Remove((string)fieldCheckBox.Tag);
146+
isClearingSelectAllCheckbox = false;
147+
break;
152148
}
153149
}
154150
}
155-
};
156-
this.fieldsPanel.Controls.Add(fieldCheckbox);
151+
}
152+
};
153+
checkboxControls.Add(fieldCheckbox);
157154

158-
locationY += DynamicFieldCheckboxYIncrement;
159-
fieldNames.Add(field.Name.ToLowerInvariant());
160-
}
155+
locationY += DynamicFieldCheckboxYIncrement;
161156
}
157+
158+
//Disable fields with dupe names because we don't support case sensitive fields right now
159+
var duplicateFields = checkboxControls?.GroupBy(f => f.Text.ToUpperInvariant()).Where(g => g.Count() > 1).SelectMany(g => g).ToList();
160+
foreach(var duplicateField in duplicateFields)
161+
{
162+
duplicateField.Enabled = false;
163+
}
164+
165+
this.fieldsPanel.Controls.AddRange(checkboxControls.ToArray<Control>());
162166
}
163167
}
164168
catch (Exception ex)

src/ParquetFileViewer/Helpers/UtilityMethods.cs

+10-2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
using System.Linq;
66
using System.Threading;
77
using System.Threading.Tasks;
8+
using System.Windows.Forms;
89

910
namespace ParquetFileViewer.Helpers
1011
{
@@ -13,7 +14,7 @@ public static class UtilityMethods
1314
public static async Task<DataTable> ParquetReaderToDataTable(ParquetReader parquetReader, List<string> selectedFields, int offset, int recordCount, CancellationToken cancellationToken)
1415
{
1516
//Get list of data fields and construct the DataTable
16-
DataTable dataTable = new DataTable();
17+
var dataTable = new DataTable();
1718
var fields = new List<(Parquet.Thrift.SchemaElement, Parquet.Schema.DataField)>();
1819
var dataFields = parquetReader.Schema.GetDataFields();
1920
foreach (string selectedField in selectedFields)
@@ -25,7 +26,14 @@ public static async Task<DataTable> ParquetReaderToDataTable(ParquetReader parqu
2526

2627
fields.Add((thriftSchema, dataField));
2728
DataColumn newColumn = new DataColumn(dataField.Name, ParquetNetTypeToCSharpType(thriftSchema, dataField.DataType));
28-
dataTable.Columns.Add(newColumn);
29+
30+
//We don't support case sensitive field names unfortunately
31+
if (dataTable.Columns.Contains(newColumn.ColumnName))
32+
{
33+
throw new NotSupportedException("Duplicate column detected. Column names are case insensitive and must be unique.");
34+
}
35+
36+
dataTable.Columns.Add(newColumn);
2937
}
3038
else
3139
throw new Exception(string.Format("Field '{0}' does not exist", selectedField));

src/ParquetFileViewer/MainForm.cs

+14-2
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,19 @@ private List<string> SelectedFields
6969
set
7070
{
7171
this.selectedFields = value;
72-
if (value != null && value.Count > 0)
72+
73+
//Check for duplicate fields (We don't support case sensitive field names unfortunately)
74+
var duplicateFields = this.selectedFields?.GroupBy(f => f.ToUpperInvariant()).Where(g => g.Count() > 1).SelectMany(g => g).ToList();
75+
if (duplicateFields?.Count() > 0)
76+
{
77+
this.selectedFields = this.selectedFields.Where(f => !duplicateFields.Any(df => df.Equals(f, StringComparison.InvariantCultureIgnoreCase))).ToList();
78+
79+
MessageBox.Show($"The following duplicate fields could not be loaded: {string.Join(',', duplicateFields)}. " +
80+
$"\r\n\r\nCase sensitive field names are not currently supported.", "Duplicate fields detected",
81+
MessageBoxButtons.OK, MessageBoxIcon.Warning);
82+
}
83+
84+
if (value?.Count > 0)
7385
{
7486
LoadFileToGridview();
7587
}
@@ -546,7 +558,7 @@ private async void LoadFileToGridview()
546558
{
547559
int i = 0;
548560
var fieldGroups = new List<(int, List<string>)>();
549-
foreach (List<string> fields in UtilityMethods.Split(this.SelectedFields, (int)(this.selectedFields.Count / Environment.ProcessorCount)))
561+
foreach (List<string> fields in UtilityMethods.Split(this.SelectedFields, (int)(this.SelectedFields.Count / Environment.ProcessorCount)))
550562
{
551563
fieldGroups.Add((i++, fields));
552564
}

0 commit comments

Comments
 (0)