diff --git a/CSVLintNppPlugin/CsvLint/CsvAnalyze.cs b/CSVLintNppPlugin/CsvLint/CsvAnalyze.cs index ea1a770..04f9cb6 100644 --- a/CSVLintNppPlugin/CsvLint/CsvAnalyze.cs +++ b/CSVLintNppPlugin/CsvLint/CsvAnalyze.cs @@ -23,11 +23,16 @@ class CsvAnalyze /// automatically detect separator and header names /// Override automatic detection, manually provided column separator /// Override automatic detection, manually set first header row contains columns names + /// if the inference was explicitly requested by the user (rather than auto-triggered on opening a file) /// - public static CsvDefinition InferFromData(bool autodetect, char mansep, string manwid, bool manhead, int manskip, char commchar) + public static CsvDefinition InferFromData(bool autodetect, char mansep, string manwid, bool manhead, int manskip, char commchar, bool userRequested) { // First do a letter frequency analysis on each row - var strfreq = ScintillaStreams.StreamAllText(); + if (!ScintillaStreams.TryStreamAllText(out StreamReader strfreq, userRequested)) + { + var csvdef = new CsvDefinition { FileIsTooBig = true }; + return csvdef; + } string line; int lineCount = 0, linesQuoted = 0, lineContent = 0; @@ -326,7 +331,7 @@ public static CsvDefinition InferFromData(bool autodetect, char mansep, string m // reset string reader to first line is not possible, create a new one bool fixedwidth = result.Separator == '\0'; - var strdata = ScintillaStreams.StreamAllText(); + ScintillaStreams.TryStreamAllText(out var strdata); // examine data and keep statistics for each column List colstats = new List(); @@ -503,7 +508,8 @@ public static void StatisticalReportData(CsvDefinition csvdef) int lineCount = 0; bool fixedwidth = csvdef.Separator == '\0'; - var strdata = ScintillaStreams.StreamAllText(); + if (!ScintillaStreams.TryStreamAllText(out var strdata)) + return; // skip any comment lines int commentCount = csvdef.SkipCommentLinesAtStart(strdata); @@ -672,7 +678,8 @@ public static void CountUniqueValues(CsvDefinition csvdef, List colidx, boo { // examine data and keep list of counters per unique values Dictionary uniquecount = new Dictionary(); - var strdata = ScintillaStreams.StreamAllText(); + if (!ScintillaStreams.TryStreamAllText(out var strdata)) + return; List values; bool iscomm = false; diff --git a/CSVLintNppPlugin/CsvLint/CsvDefinition.cs b/CSVLintNppPlugin/CsvLint/CsvDefinition.cs index 85b0593..91a79de 100644 --- a/CSVLintNppPlugin/CsvLint/CsvDefinition.cs +++ b/CSVLintNppPlugin/CsvLint/CsvDefinition.cs @@ -181,6 +181,9 @@ public void AddCodedValues(Dictionary slcodes) /// public class CsvDefinition { + /// True if and only if the current file has more than bytes + public bool FileIsTooBig { get; set; } = false; + public int DefaultLanguageId { get; set; } = 0; /// column separator character diff --git a/CSVLintNppPlugin/CsvLint/CsvEdit.cs b/CSVLintNppPlugin/CsvLint/CsvEdit.cs index 215c391..d8e43a4 100644 --- a/CSVLintNppPlugin/CsvLint/CsvEdit.cs +++ b/CSVLintNppPlugin/CsvLint/CsvEdit.cs @@ -142,7 +142,8 @@ public static void ReformatDataFile(CsvDefinition csvdef, string reformatSeparat var CRLF = getEditorEOLchars(scintillaGateway.GetEOLMode()); // use stringreader to go line by line - var strdata = ScintillaStreams.StreamAllText(); + if (!ScintillaStreams.TryStreamAllText(out StreamReader strdata)) + return; //var s = new StringReader(data); int linenr = 0; @@ -481,7 +482,8 @@ public static void ConvertToSQL(CsvDefinition csvdef) if (enumcols1 != "") sb.Append(string.Format("-- Enumeration columns (optional)\r\n/*\r\n{0}{1}*/\r\n", enumcols1, enumcols2)); // use stringreader to go line by line - var strdata = ScintillaStreams.StreamAllText(); + if (!ScintillaStreams.TryStreamAllText(out StreamReader strdata)) + return; int lineCount = csvdef.ColNameHeader ? -1 : 0; int batchcomm = -1; // batch comment line @@ -703,7 +705,8 @@ public static void ConvertToXML(CsvDefinition csvdef) sb.Append("\t-->\r\n"); // use stringreader to go line by line - var strdata = ScintillaStreams.StreamAllText(); + if (!ScintillaStreams.TryStreamAllText(out StreamReader strdata)) + return; int lineCount = (csvdef.ColNameHeader ? -1 : 0); @@ -847,7 +850,8 @@ public static void ConvertToJSON(CsvDefinition csvdef) sb.Append("\t\"JSONdata\":["); // use stringreader to go line by line - var strdata = ScintillaStreams.StreamAllText(); + if (!ScintillaStreams.TryStreamAllText(out StreamReader strdata)) + return; int lineCount = (csvdef.ColNameHeader ? -1 : 0); @@ -1074,7 +1078,8 @@ public static void SortData(CsvDefinition csvdef, int SortIdx, bool AscDesc, boo // examine data and keep list of all data lines // Note: can be a dictionary, not a list, because the sortable values are guaranteed to be unique Dictionary sortlines = new Dictionary(); - var strdata = ScintillaStreams.StreamAllText(); + if (!ScintillaStreams.TryStreamAllText(out StreamReader strdata)) + return; // variables to read original data file List values; @@ -1161,7 +1166,8 @@ public static void ColumnSplit(CsvDefinition csvdef, int ColumnIndex, int SplitC var CRLF = getEditorEOLchars(scintillaGateway.GetEOLMode()); // use stringreader to go line by line - var strdata = ScintillaStreams.StreamAllText(); + if (!ScintillaStreams.TryStreamAllText(out StreamReader strdata)) + return; //var s = new StringReader(data); int linenr = 0; diff --git a/CSVLintNppPlugin/Forms/CsvLintWindow.cs b/CSVLintNppPlugin/Forms/CsvLintWindow.cs index 3771462..39f0ede 100644 --- a/CSVLintNppPlugin/Forms/CsvLintWindow.cs +++ b/CSVLintNppPlugin/Forms/CsvLintWindow.cs @@ -21,7 +21,13 @@ public void SetCsvDefinition(CsvDefinition csvdef, bool applybtn) { // clear message to user when no columns found var msg = ""; - if ((csvdef.Fields.Count == 1) && (csvdef.Fields[0].DataType == ColumnType.String) && (csvdef.Fields[0].MaxWidth >= 9999)) + if (csvdef.FileIsTooBig) + { + msg += "; *********************************\r\n"; + msg += "; File is too large for CsvLint to analyze\r\n"; + msg += "; *********************************\r\n"; + } + else if ((csvdef.Fields.Count == 1) && (csvdef.Fields[0].DataType == ColumnType.String) && (csvdef.Fields[0].MaxWidth >= 9999)) { // give a clear message msg += "; *********************************\r\n"; @@ -75,7 +81,7 @@ private void OnBtnDetectColumns_Click(object sender, EventArgs e) var dtStart = DateTime.Now; // analyze and determine csv definition - CsvDefinition csvdef = CsvAnalyze.InferFromData(chkAutoDetect.Checked, sep, widths, header, skip, comm); + CsvDefinition csvdef = CsvAnalyze.InferFromData(chkAutoDetect.Checked, sep, widths, header, skip, comm, true); Main.UpdateCSVChanges(csvdef, false); @@ -106,7 +112,8 @@ private void OnBtnValidate_Click(object sender, EventArgs e) // validate data CsvValidate csvval = new CsvValidate(); - var sr = ScintillaStreams.StreamAllText(); + if (!ScintillaStreams.TryStreamAllText(out var sr)) + return; csvval.ValidateData(sr, csvdef); diff --git a/CSVLintNppPlugin/Forms/DetectColumnsForm.cs b/CSVLintNppPlugin/Forms/DetectColumnsForm.cs index 05d360d..d947f66 100644 --- a/CSVLintNppPlugin/Forms/DetectColumnsForm.cs +++ b/CSVLintNppPlugin/Forms/DetectColumnsForm.cs @@ -151,7 +151,8 @@ private void btnFixedWidthPos_Click(object sender, EventArgs e) if (dialogResult == DialogResult.OK) { // paste column positions - CsvDefinition csvdef = Main.GetCurrentCsvDef(); + if (!Main.TryGetCurrentCsvDef(out CsvDefinition csvdef)) + return; txtFixedWidthPos.Text = csvdef.GetColumnWidths(true); } } diff --git a/CSVLintNppPlugin/Main.cs b/CSVLintNppPlugin/Main.cs index 08e974a..4e4bcdd 100644 --- a/CSVLintNppPlugin/Main.cs +++ b/CSVLintNppPlugin/Main.cs @@ -10,6 +10,7 @@ using System.Windows.Forms; using System.Xml; using CSVLint; +using CSVLint.Tools; using CSVLintNppPlugin.CsvLint; using CSVLintNppPlugin.Forms; using Kbg.NppPluginNET.PluginInfrastructure; @@ -440,7 +441,7 @@ public static void CSVChangeFileTab() else { // analyze and determine csv definition - csvdef = CsvAnalyze.InferFromData(true, '\0', "", false, 0, Main.Settings.CommentCharacter); // parameters "", false, 0 -> defaults + csvdef = CsvAnalyze.InferFromData(true, '\0', "", false, 0, Main.Settings.CommentCharacter, false); // parameters "", false, 0 -> defaults } FileCsvDef.Add(filename, csvdef); } @@ -481,7 +482,8 @@ public static void CSVChangeFileTab() public static void EnableDisableLanguage() { Win32.SendMessage(PluginBase.nppData._nppHandle, (uint)NppMsg.NPPM_GETCURRENTLANGTYPE, 0, out int currentLanguageId); - CsvDefinition csvdef = GetCurrentCsvDef(); + if (!TryGetCurrentCsvDef(out CsvDefinition csvdef)) + return; int newLanguageId; if (currentLanguageId == CsvLanguageId.Value) { @@ -540,13 +542,20 @@ public static void UpdateCSVChanges(CsvDefinition csvdef, bool saveini) } } - public static CsvDefinition GetCurrentCsvDef() + /// + /// If this returns false, csvdef is null.

+ /// Otherwise, csvdef is the CsvDefinition for the current file. + ///
+ public static bool TryGetCurrentCsvDef(out CsvDefinition csvdef) { + csvdef = null; + if (!Helper.TryGetLengthAsInt(PluginBase.CurrentScintillaGateway, true, out _)) + return false; // Notepad++ switc to a different file tab INotepadPPGateway notepad = new NotepadPPGateway(); string filename = notepad.GetCurrentFilePath(); - return FileCsvDef.TryGetValue(filename, out CsvDefinition result) ? result : null; + return FileCsvDef.TryGetValue(filename, out csvdef); } public static bool CheckValidCsvDef(CsvDefinition csvdef, string errmsg) @@ -605,7 +614,8 @@ internal static void DoAboutForm() internal static void convertData() { // get dictionary - CsvDefinition csvdef = GetCurrentCsvDef(); + if (!TryGetCurrentCsvDef(out CsvDefinition csvdef)) + return; // check if valid csv metadata if (CheckValidCsvDef(csvdef, "convert data")) @@ -647,7 +657,8 @@ internal static void convertData() internal static void generateMetaData() { // get dictionary - CsvDefinition csvdef = GetCurrentCsvDef(); + if (!TryGetCurrentCsvDef(out CsvDefinition csvdef)) + return; // check if valid csv metadata if (CheckValidCsvDef(csvdef, "generate script")) @@ -692,7 +703,8 @@ internal static void generateMetaData() internal static void AnalyseDataReport() { // get dictionary - CsvDefinition csvdef = GetCurrentCsvDef(); + if (!TryGetCurrentCsvDef(out CsvDefinition csvdef)) + return; // check if valid csv metadata if (CheckValidCsvDef(csvdef, "run Analyze Data Report")) @@ -705,7 +717,8 @@ internal static void AnalyseDataReport() internal static void CountUniqueValues() { // get dictionary - CsvDefinition csvdef = GetCurrentCsvDef(); + if (!TryGetCurrentCsvDef(out CsvDefinition csvdef)) + return; // check if valid csv metadata if (CheckValidCsvDef(csvdef, "count unique values")) diff --git a/CSVLintNppPlugin/PluginInfrastructure/IScintillaGateway.cs b/CSVLintNppPlugin/PluginInfrastructure/IScintillaGateway.cs index 9274bd1..31b6653 100644 --- a/CSVLintNppPlugin/PluginInfrastructure/IScintillaGateway.cs +++ b/CSVLintNppPlugin/PluginInfrastructure/IScintillaGateway.cs @@ -55,7 +55,7 @@ public interface IScintillaGateway void ClearDocumentStyle(); /// Returns the number of bytes in the document. (Scintilla feature 2006) - int GetLength(); + long GetLength(); /// Returns the character byte at the position. (Scintilla feature 2007) int GetCharAt(int pos); diff --git a/CSVLintNppPlugin/PluginInfrastructure/ScintillaGateway.cs b/CSVLintNppPlugin/PluginInfrastructure/ScintillaGateway.cs index a509173..6970e5f 100644 --- a/CSVLintNppPlugin/PluginInfrastructure/ScintillaGateway.cs +++ b/CSVLintNppPlugin/PluginInfrastructure/ScintillaGateway.cs @@ -144,9 +144,9 @@ public void ClearDocumentStyle() } /// Returns the number of bytes in the document. (Scintilla feature 2006) - public int GetLength() + public long GetLength() { - return (int)Win32.SendMessage(scintilla, SciMsg.SCI_GETLENGTH, (IntPtr) Unused, (IntPtr) Unused); + return (long)Win32.SendMessage(scintilla, SciMsg.SCI_GETLENGTH, (IntPtr) Unused, (IntPtr) Unused); } /// Returns the character byte at the position. (Scintilla feature 2007) diff --git a/CSVLintNppPlugin/PluginInfrastructure/ScintillaStreams.cs b/CSVLintNppPlugin/PluginInfrastructure/ScintillaStreams.cs index 11d4617..b48e64e 100644 --- a/CSVLintNppPlugin/PluginInfrastructure/ScintillaStreams.cs +++ b/CSVLintNppPlugin/PluginInfrastructure/ScintillaStreams.cs @@ -9,24 +9,32 @@ public class ScintillaStreams { /// - /// Reads the whole document as a text stream, trying to use the right encoding + /// If the current document has more than bytes:

+ /// * throw up a MessageBox as described in

+ /// * return false and reader is null

+ /// Otherwise, return true, and reader is a StreamReader tha treads the whole document as a text stream, trying to use the right encoding ///
- public static StreamReader StreamAllText() + public static bool TryStreamAllText(out StreamReader reader, bool notifyUser = true) { + reader = null; var doc = PluginBase.CurrentScintillaGateway; + if (!Helper.TryGetLengthAsInt(doc, notifyUser, out _)) + return false; var codepage = doc.GetCodePage(); var encoding = codepage == (int)SciMsg.SC_CP_UTF8 ? Encoding.UTF8 : Encoding.Default; - return new StreamReader(StreamAllRawText(), encoding); + reader = new StreamReader(StreamAllRawText(), encoding); + return true; } /// - /// Reads the whole document as a byte stream. - /// Will likely throw exceptions if the document is edited while the stream is open. + /// Reads the whole document as a byte stream.

+ /// Will likely throw exceptions if the document is edited while the stream is open.

+ /// Will also throw an exception if the document has more than characters. ///
public static Stream StreamAllRawText() { var doc = PluginBase.CurrentScintillaGateway; - var length = doc.GetLength(); + var length = (int)doc.GetLength(); // When editing a document Scintilla divides it into two - one before the cursor and one after, calling the break point the "gap" int gap = doc.GetGapPosition(); diff --git a/CSVLintNppPlugin/Tools/Helper.cs b/CSVLintNppPlugin/Tools/Helper.cs index e1513af..1db63d6 100644 --- a/CSVLintNppPlugin/Tools/Helper.cs +++ b/CSVLintNppPlugin/Tools/Helper.cs @@ -2,11 +2,13 @@ // CsvDefinition // Helper functions and methods // ------------------------------------- +using Kbg.NppPluginNET.PluginInfrastructure; using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; +using System.Windows.Forms; namespace CSVLint.Tools { @@ -25,5 +27,34 @@ public static void Increase(this Dictionary counts, T c) else counts[c]++; } + + /// Tell the user the first time they try to use a plugin command on a too-long file, but let them turn off such notifications + private static bool notifyUserIfFileTooLong = true; + + /// + /// Let longLen be the number of bytes in the document managed by scintilla

+ /// If longLen is less than , returns true and sets length = longLen.

+ /// Otherwise, length = -1 and returns false, and if ( && notifyUser), throws up a MessageBox warning the user that the file is too long. + ///
+ public static bool TryGetLengthAsInt(IScintillaGateway scintilla, bool notifyUser, out int length) + { + long longLen = scintilla.GetLength(); + if (longLen >= 0 && longLen <= int.MaxValue) + { + length = (int)longLen; + return true; + } + length = -1; + if (notifyUser && notifyUserIfFileTooLong) + { + notifyUserIfFileTooLong = MessageBox.Show( + $"This plugin command cannot be performed on this document, because the document has more than {int.MaxValue} characters.\r\n" + + $"Do you want to stop showing these messages for too-long documents?", + "Document too long for this command", + MessageBoxButtons.YesNo, MessageBoxIcon.Warning + ) == DialogResult.No; + } + return false; + } } } diff --git a/CSVLintNppPlugin/bin/Release-x64/CSVLint.dll b/CSVLintNppPlugin/bin/Release-x64/CSVLint.dll index 641a9f3..a5f00b2 100644 Binary files a/CSVLintNppPlugin/bin/Release-x64/CSVLint.dll and b/CSVLintNppPlugin/bin/Release-x64/CSVLint.dll differ diff --git a/CSVLintNppPlugin/bin/Release/CSVLint.dll b/CSVLintNppPlugin/bin/Release/CSVLint.dll index c647b53..166ef38 100644 Binary files a/CSVLintNppPlugin/bin/Release/CSVLint.dll and b/CSVLintNppPlugin/bin/Release/CSVLint.dll differ