Skip to content

Fix crash on too big files #94

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions CSVLintNppPlugin/CsvLint/CsvAnalyze.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,16 @@ class CsvAnalyze
/// <param name="autodetect">automatically detect separator and header names</param>
/// <param name="mansep">Override automatic detection, manually provided column separator</param>
/// <param name="manhead">Override automatic detection, manually set first header row contains columns names</param>
/// <param name="userRequested">if the inference was explicitly requested by the user (rather than auto-triggered on opening a file)</param>
/// <returns></returns>
public static CsvDefinition InferFromData(bool autodetect, char mansep, string manwid, bool manhead, int manskip, char commchar)
public static CsvDefinition InferFromData(bool autodetect, char mansep, string manwid, bool manhead, int manskip, char commchar, bool userRequested)
{
// First do a letter frequency analysis on each row
var strfreq = ScintillaStreams.StreamAllText();
if (!ScintillaStreams.TryStreamAllText(out StreamReader strfreq, userRequested))
{
var csvdef = new CsvDefinition { FileIsTooBig = true };
return csvdef;
}
string line;
int lineCount = 0, linesQuoted = 0, lineContent = 0;

Expand Down Expand Up @@ -326,7 +331,7 @@ public static CsvDefinition InferFromData(bool autodetect, char mansep, string m
// reset string reader to first line is not possible, create a new one
bool fixedwidth = result.Separator == '\0';

var strdata = ScintillaStreams.StreamAllText();
ScintillaStreams.TryStreamAllText(out var strdata);

// examine data and keep statistics for each column
List<CsvAnalyzeColumn> colstats = new List<CsvAnalyzeColumn>();
Expand Down Expand Up @@ -503,7 +508,8 @@ public static void StatisticalReportData(CsvDefinition csvdef)
int lineCount = 0;
bool fixedwidth = csvdef.Separator == '\0';

var strdata = ScintillaStreams.StreamAllText();
if (!ScintillaStreams.TryStreamAllText(out var strdata))
return;

// skip any comment lines
int commentCount = csvdef.SkipCommentLinesAtStart(strdata);
Expand Down Expand Up @@ -672,7 +678,8 @@ public static void CountUniqueValues(CsvDefinition csvdef, List<int> colidx, boo
{
// examine data and keep list of counters per unique values
Dictionary<string, int> uniquecount = new Dictionary<string, int>();
var strdata = ScintillaStreams.StreamAllText();
if (!ScintillaStreams.TryStreamAllText(out var strdata))
return;
List<string> values;

bool iscomm = false;
Expand Down
3 changes: 3 additions & 0 deletions CSVLintNppPlugin/CsvLint/CsvDefinition.cs
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,9 @@ public void AddCodedValues(Dictionary<string, int> slcodes)
/// </summary>
public class CsvDefinition
{
/// <summary>True if and only if the current file has more than <see cref="int.MaxValue"/> bytes</summary>
public bool FileIsTooBig { get; set; } = false;

public int DefaultLanguageId { get; set; } = 0;

/// column separator character
Expand Down
18 changes: 12 additions & 6 deletions CSVLintNppPlugin/CsvLint/CsvEdit.cs
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,8 @@ public static void ReformatDataFile(CsvDefinition csvdef, string reformatSeparat
var CRLF = getEditorEOLchars(scintillaGateway.GetEOLMode());

// use stringreader to go line by line
var strdata = ScintillaStreams.StreamAllText();
if (!ScintillaStreams.TryStreamAllText(out StreamReader strdata))
return;

//var s = new StringReader(data);
int linenr = 0;
Expand Down Expand Up @@ -481,7 +482,8 @@ public static void ConvertToSQL(CsvDefinition csvdef)
if (enumcols1 != "") sb.Append(string.Format("-- Enumeration columns (optional)\r\n/*\r\n{0}{1}*/\r\n", enumcols1, enumcols2));

// use stringreader to go line by line
var strdata = ScintillaStreams.StreamAllText();
if (!ScintillaStreams.TryStreamAllText(out StreamReader strdata))
return;

int lineCount = csvdef.ColNameHeader ? -1 : 0;
int batchcomm = -1; // batch comment line
Expand Down Expand Up @@ -703,7 +705,8 @@ public static void ConvertToXML(CsvDefinition csvdef)
sb.Append("\t-->\r\n");

// use stringreader to go line by line
var strdata = ScintillaStreams.StreamAllText();
if (!ScintillaStreams.TryStreamAllText(out StreamReader strdata))
return;

int lineCount = (csvdef.ColNameHeader ? -1 : 0);

Expand Down Expand Up @@ -847,7 +850,8 @@ public static void ConvertToJSON(CsvDefinition csvdef)
sb.Append("\t\"JSONdata\":[");

// use stringreader to go line by line
var strdata = ScintillaStreams.StreamAllText();
if (!ScintillaStreams.TryStreamAllText(out StreamReader strdata))
return;

int lineCount = (csvdef.ColNameHeader ? -1 : 0);

Expand Down Expand Up @@ -1074,7 +1078,8 @@ public static void SortData(CsvDefinition csvdef, int SortIdx, bool AscDesc, boo
// examine data and keep list of all data lines
// Note: can be a dictionary, not a list, because the sortable values are guaranteed to be unique
Dictionary<string, string> sortlines = new Dictionary<string, string>();
var strdata = ScintillaStreams.StreamAllText();
if (!ScintillaStreams.TryStreamAllText(out StreamReader strdata))
return;

// variables to read original data file
List<string> values;
Expand Down Expand Up @@ -1161,7 +1166,8 @@ public static void ColumnSplit(CsvDefinition csvdef, int ColumnIndex, int SplitC
var CRLF = getEditorEOLchars(scintillaGateway.GetEOLMode());

// use stringreader to go line by line
var strdata = ScintillaStreams.StreamAllText();
if (!ScintillaStreams.TryStreamAllText(out StreamReader strdata))
return;

//var s = new StringReader(data);
int linenr = 0;
Expand Down
13 changes: 10 additions & 3 deletions CSVLintNppPlugin/Forms/CsvLintWindow.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,13 @@ public void SetCsvDefinition(CsvDefinition csvdef, bool applybtn)
{
// clear message to user when no columns found
var msg = "";
if ((csvdef.Fields.Count == 1) && (csvdef.Fields[0].DataType == ColumnType.String) && (csvdef.Fields[0].MaxWidth >= 9999))
if (csvdef.FileIsTooBig)
{
msg += "; *********************************\r\n";
msg += "; File is too large for CsvLint to analyze\r\n";
msg += "; *********************************\r\n";
}
else if ((csvdef.Fields.Count == 1) && (csvdef.Fields[0].DataType == ColumnType.String) && (csvdef.Fields[0].MaxWidth >= 9999))
{
// give a clear message
msg += "; *********************************\r\n";
Expand Down Expand Up @@ -75,7 +81,7 @@ private void OnBtnDetectColumns_Click(object sender, EventArgs e)
var dtStart = DateTime.Now;

// analyze and determine csv definition
CsvDefinition csvdef = CsvAnalyze.InferFromData(chkAutoDetect.Checked, sep, widths, header, skip, comm);
CsvDefinition csvdef = CsvAnalyze.InferFromData(chkAutoDetect.Checked, sep, widths, header, skip, comm, true);

Main.UpdateCSVChanges(csvdef, false);

Expand Down Expand Up @@ -106,7 +112,8 @@ private void OnBtnValidate_Click(object sender, EventArgs e)
// validate data
CsvValidate csvval = new CsvValidate();

var sr = ScintillaStreams.StreamAllText();
if (!ScintillaStreams.TryStreamAllText(out var sr))
return;

csvval.ValidateData(sr, csvdef);

Expand Down
3 changes: 2 additions & 1 deletion CSVLintNppPlugin/Forms/DetectColumnsForm.cs
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,8 @@ private void btnFixedWidthPos_Click(object sender, EventArgs e)
if (dialogResult == DialogResult.OK)
{
// paste column positions
CsvDefinition csvdef = Main.GetCurrentCsvDef();
if (!Main.TryGetCurrentCsvDef(out CsvDefinition csvdef))
return;
txtFixedWidthPos.Text = csvdef.GetColumnWidths(true);
}
}
Expand Down
29 changes: 21 additions & 8 deletions CSVLintNppPlugin/Main.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
using System.Windows.Forms;
using System.Xml;
using CSVLint;
using CSVLint.Tools;
using CSVLintNppPlugin.CsvLint;
using CSVLintNppPlugin.Forms;
using Kbg.NppPluginNET.PluginInfrastructure;
Expand Down Expand Up @@ -440,7 +441,7 @@ public static void CSVChangeFileTab()
else
{
// analyze and determine csv definition
csvdef = CsvAnalyze.InferFromData(true, '\0', "", false, 0, Main.Settings.CommentCharacter); // parameters "", false, 0 -> defaults
csvdef = CsvAnalyze.InferFromData(true, '\0', "", false, 0, Main.Settings.CommentCharacter, false); // parameters "", false, 0 -> defaults
}
FileCsvDef.Add(filename, csvdef);
}
Expand Down Expand Up @@ -481,7 +482,8 @@ public static void CSVChangeFileTab()
public static void EnableDisableLanguage()
{
Win32.SendMessage(PluginBase.nppData._nppHandle, (uint)NppMsg.NPPM_GETCURRENTLANGTYPE, 0, out int currentLanguageId);
CsvDefinition csvdef = GetCurrentCsvDef();
if (!TryGetCurrentCsvDef(out CsvDefinition csvdef))
return;
int newLanguageId;
if (currentLanguageId == CsvLanguageId.Value)
{
Expand Down Expand Up @@ -540,13 +542,20 @@ public static void UpdateCSVChanges(CsvDefinition csvdef, bool saveini)
}
}

public static CsvDefinition GetCurrentCsvDef()
/// <summary>
/// If this returns false, csvdef is null.<br></br>
/// Otherwise, csvdef is the CsvDefinition for the current file.
/// </summary>
public static bool TryGetCurrentCsvDef(out CsvDefinition csvdef)
{
csvdef = null;
if (!Helper.TryGetLengthAsInt(PluginBase.CurrentScintillaGateway, true, out _))
return false;
// Notepad++ switc to a different file tab
INotepadPPGateway notepad = new NotepadPPGateway();
string filename = notepad.GetCurrentFilePath();

return FileCsvDef.TryGetValue(filename, out CsvDefinition result) ? result : null;
return FileCsvDef.TryGetValue(filename, out csvdef);
}

public static bool CheckValidCsvDef(CsvDefinition csvdef, string errmsg)
Expand Down Expand Up @@ -605,7 +614,8 @@ internal static void DoAboutForm()
internal static void convertData()
{
// get dictionary
CsvDefinition csvdef = GetCurrentCsvDef();
if (!TryGetCurrentCsvDef(out CsvDefinition csvdef))
return;

// check if valid csv metadata
if (CheckValidCsvDef(csvdef, "convert data"))
Expand Down Expand Up @@ -647,7 +657,8 @@ internal static void convertData()
internal static void generateMetaData()
{
// get dictionary
CsvDefinition csvdef = GetCurrentCsvDef();
if (!TryGetCurrentCsvDef(out CsvDefinition csvdef))
return;

// check if valid csv metadata
if (CheckValidCsvDef(csvdef, "generate script"))
Expand Down Expand Up @@ -692,7 +703,8 @@ internal static void generateMetaData()
internal static void AnalyseDataReport()
{
// get dictionary
CsvDefinition csvdef = GetCurrentCsvDef();
if (!TryGetCurrentCsvDef(out CsvDefinition csvdef))
return;

// check if valid csv metadata
if (CheckValidCsvDef(csvdef, "run Analyze Data Report"))
Expand All @@ -705,7 +717,8 @@ internal static void AnalyseDataReport()
internal static void CountUniqueValues()
{
// get dictionary
CsvDefinition csvdef = GetCurrentCsvDef();
if (!TryGetCurrentCsvDef(out CsvDefinition csvdef))
return;

// check if valid csv metadata
if (CheckValidCsvDef(csvdef, "count unique values"))
Expand Down
2 changes: 1 addition & 1 deletion CSVLintNppPlugin/PluginInfrastructure/IScintillaGateway.cs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public interface IScintillaGateway
void ClearDocumentStyle();

/// <summary>Returns the number of bytes in the document. (Scintilla feature 2006)</summary>
int GetLength();
long GetLength();

/// <summary>Returns the character byte at the position. (Scintilla feature 2007)</summary>
int GetCharAt(int pos);
Expand Down
4 changes: 2 additions & 2 deletions CSVLintNppPlugin/PluginInfrastructure/ScintillaGateway.cs
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,9 @@ public void ClearDocumentStyle()
}

/// <summary>Returns the number of bytes in the document. (Scintilla feature 2006)</summary>
public int GetLength()
public long GetLength()
{
return (int)Win32.SendMessage(scintilla, SciMsg.SCI_GETLENGTH, (IntPtr) Unused, (IntPtr) Unused);
return (long)Win32.SendMessage(scintilla, SciMsg.SCI_GETLENGTH, (IntPtr) Unused, (IntPtr) Unused);
}

/// <summary>Returns the character byte at the position. (Scintilla feature 2007)</summary>
Expand Down
20 changes: 14 additions & 6 deletions CSVLintNppPlugin/PluginInfrastructure/ScintillaStreams.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,32 @@
public class ScintillaStreams
{
/// <summary>
/// Reads the whole document as a text stream, trying to use the right encoding
/// If the current document has more than <see cref="int.MaxValue"/> bytes:<br></br>
/// * throw up a MessageBox as described in <see cref="Helper.TryGetLengthAsInt(IScintillaGateway, out int)"/><br></br>
/// * return false and reader is null<br></br>
/// Otherwise, return true, and reader is a StreamReader tha treads the whole document as a text stream, trying to use the right encoding
/// </summary>
public static StreamReader StreamAllText()
public static bool TryStreamAllText(out StreamReader reader, bool notifyUser = true)
{
reader = null;
var doc = PluginBase.CurrentScintillaGateway;
if (!Helper.TryGetLengthAsInt(doc, notifyUser, out _))
return false;
var codepage = doc.GetCodePage();
var encoding = codepage == (int)SciMsg.SC_CP_UTF8 ? Encoding.UTF8 : Encoding.Default;
return new StreamReader(StreamAllRawText(), encoding);
reader = new StreamReader(StreamAllRawText(), encoding);
return true;
}

/// <summary>
/// Reads the whole document as a byte stream.
/// Will likely throw exceptions if the document is edited while the stream is open.
/// Reads the whole document as a byte stream.<br></br>
/// Will likely throw exceptions if the document is edited while the stream is open.<br></br>
/// Will also throw an exception if the document has more than <see cref="int.MaxValue"/> characters.
/// </summary>
public static Stream StreamAllRawText()
{
var doc = PluginBase.CurrentScintillaGateway;
var length = doc.GetLength();
var length = (int)doc.GetLength();

// When editing a document Scintilla divides it into two - one before the cursor and one after, calling the break point the "gap"
int gap = doc.GetGapPosition();
Expand Down
31 changes: 31 additions & 0 deletions CSVLintNppPlugin/Tools/Helper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
// CsvDefinition
// Helper functions and methods
// -------------------------------------
using Kbg.NppPluginNET.PluginInfrastructure;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;

namespace CSVLint.Tools
{
Expand All @@ -25,5 +27,34 @@ public static void Increase<T>(this Dictionary<T, int> counts, T c)
else
counts[c]++;
}

/// <summary>Tell the user the first time they try to use a plugin command on a too-long file, but let them turn off such notifications</summary>
private static bool notifyUserIfFileTooLong = true;

/// <summary>
/// Let <c>longLen</c> be the number of bytes in the document managed by scintilla<br></br>
/// If <c>longLen</c> is less than <see cref="int.MaxValue"/>, returns true and sets <c>length = longLen</c>.<br></br>
/// Otherwise, <c>length = -1</c> and returns false, and if (<see cref="notifyUserIfFileTooLong"/> && notifyUser), throws up a MessageBox warning the user that the file is too long.
/// </summary>
public static bool TryGetLengthAsInt(IScintillaGateway scintilla, bool notifyUser, out int length)
{
long longLen = scintilla.GetLength();
if (longLen >= 0 && longLen <= int.MaxValue)
{
length = (int)longLen;
return true;
}
length = -1;
if (notifyUser && notifyUserIfFileTooLong)
{
notifyUserIfFileTooLong = MessageBox.Show(
$"This plugin command cannot be performed on this document, because the document has more than {int.MaxValue} characters.\r\n" +
$"Do you want to stop showing these messages for too-long documents?",
"Document too long for this command",
MessageBoxButtons.YesNo, MessageBoxIcon.Warning
) == DialogResult.No;
}
return false;
}
}
}
Binary file modified CSVLintNppPlugin/bin/Release-x64/CSVLint.dll
Binary file not shown.
Binary file modified CSVLintNppPlugin/bin/Release/CSVLint.dll
Binary file not shown.