Skip to content

Commit 8d7369c

Browse files
committed
Refactor scan interval parsing and add NumberIterator
Replace custom scan interval parsing logic with a reusable NumberIterator class. Updates MainClass, QueryExecutor, and ProxiSpectrumReader to use NumberIterator for scan and MS level selection. MZArray.cs is merged into GeneralHelpers.cs. Add unit tests for NumberIterator and scan interval handling.
1 parent 1f1a837 commit 8d7369c

File tree

8 files changed

+265
-150
lines changed

8 files changed

+265
-150
lines changed

MainClass.cs

Lines changed: 8 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
using ThermoRawFileParser.XIC;
1111
using System.Globalization;
1212
using System.Collections.Generic;
13-
using System.Text.RegularExpressions;
1413
using System.Data;
14+
using ThermoRawFileParser.Util;
1515

1616
[assembly: log4net.Config.XmlConfigurator()]
1717

@@ -942,62 +942,15 @@ private static int ParseToEnum(Type enumType, string formatString, string keyNam
942942

943943
private static HashSet<int> ParseMsLevel(string inputString)
944944
{
945-
HashSet<int> result = new HashSet<int>();
946-
Regex valid = new Regex(@"^[\d,\-\s]+$");
947-
Regex interval = new Regex(@"^\s*(\d+)?\s*(-)?\s*(\d+)?\s*$");
948-
949-
if (!valid.IsMatch(inputString))
950-
throw new OptionException("Invalid characters in msLevel key", "msLevel");
951-
952-
foreach (var piece in inputString.Split(new char[] {','}))
945+
try
953946
{
954-
try
955-
{
956-
int start;
957-
int end;
958-
959-
var intervalMatch = interval.Match(piece);
960-
961-
if (!intervalMatch.Success)
962-
throw new OptionException();
963-
964-
if (intervalMatch.Groups[2].Success) //it is interval
965-
{
966-
if (intervalMatch.Groups[1].Success)
967-
start = Math.Max(1, int.Parse(intervalMatch.Groups[1].Value));
968-
else
969-
start = 1;
970-
971-
if (intervalMatch.Groups[3].Success)
972-
end = Math.Min(10, int.Parse(intervalMatch.Groups[3].Value));
973-
else
974-
end = 10;
975-
}
976-
else
977-
{
978-
if (intervalMatch.Groups[1].Success)
979-
end = start = int.Parse(intervalMatch.Groups[1].Value);
980-
else
981-
throw new OptionException();
982-
983-
if (intervalMatch.Groups[3].Success)
984-
throw new OptionException();
985-
}
986-
987-
for (int l = start; l <= end; l++)
988-
{
989-
result.Add(l);
990-
}
991-
}
992-
993-
catch (Exception ex)
994-
{
995-
throw new OptionException(String.Format("Cannot parse part of msLevel input: '{0}'", piece),
996-
"msLevel", ex);
997-
}
947+
var levelIterator = new NumberIterator(inputString, 1, 10);
948+
return new HashSet<int>(levelIterator.IterateScans());
949+
}
950+
catch (Exception ex)
951+
{
952+
throw new OptionException($"Cannot parse MS level from {inputString} - {ex.Message}", "msLevel");
998953
}
999-
1000-
return result;
1001954
}
1002955
}
1003956
}

Query/ProxiSpectrumReader.cs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,19 @@ public List<ProxiSpectrum> Retrieve()
7777
// Set a cvGroup number counter
7878
var cvGroup = 1;
7979

80-
foreach (var scanNumber in queryParameters.scanNumbers)
80+
NumberIterator scanNumbers;
81+
82+
try
83+
{
84+
scanNumbers = new NumberIterator(queryParameters.scans,
85+
rawFile.RunHeaderEx.FirstSpectrum, rawFile.RunHeaderEx.LastSpectrum);
86+
}
87+
catch (Exception ex)
88+
{
89+
throw new RawFileParserException($"Cannot create scan iterator from {queryParameters.scans} - {ex.Message}");
90+
}
91+
92+
foreach (var scanNumber in scanNumbers.IterateScans())
8193
{
8294
var proxiSpectrum = new ProxiSpectrum();
8395

Query/QueryExecutor.cs

Lines changed: 1 addition & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
using System.Collections.Generic;
33
using System.Data;
44
using System.IO;
5-
using Mono.Options;
65
using Newtonsoft.Json;
76

87
namespace ThermoRawFileParser.Query
@@ -11,10 +10,7 @@ public class QueryExecutor
1110
{
1211
public static void Run(QueryParameters parameters)
1312
{
14-
// parse the scans string
15-
var scanIds = ParseScanIds(parameters.scans);
16-
parameters.scanNumbers = scanIds;
17-
13+
1814
var reader = new ProxiSpectrumReader(parameters);
1915
var results = reader.Retrieve();
2016

@@ -58,65 +54,5 @@ private static void StdOutputQueryData(List<ProxiSpectrum> outputData)
5854
var outputString = JsonConvert.SerializeObject(outputData, Formatting.Indented);
5955
Console.Write(outputString);
6056
}
61-
62-
63-
private static HashSet<int> ParseScanIds(string text)
64-
{
65-
if (text.Length == 0) throw new OptionException("Scan ID string invalid, nothing specified", null);
66-
foreach (var c in text)
67-
{
68-
int ic = c;
69-
if (!((ic == ',') || (ic == '-') || (ic == ' ') || ('0' <= ic && ic <= '9')))
70-
{
71-
throw new OptionException("Scan ID string contains invalid character", null);
72-
}
73-
}
74-
75-
var tokens = text.Split(new[] {','}, StringSplitOptions.None);
76-
77-
var container = new HashSet<int>();
78-
79-
for (var i = 0; i < tokens.Length; ++i)
80-
{
81-
if (tokens[i].Length == 0) throw new OptionException("Scan ID string has invalid format", null);
82-
var rangeBoundaries = tokens[i].Split(new[] {'-'}, StringSplitOptions.None);
83-
if (rangeBoundaries.Length == 1)
84-
{
85-
int rangeStart;
86-
try
87-
{
88-
rangeStart = Convert.ToInt32(rangeBoundaries[0]);
89-
}
90-
catch (Exception)
91-
{
92-
throw new OptionException("Scan ID string has invalid format", null);
93-
}
94-
95-
container.Add(rangeStart);
96-
}
97-
else if (rangeBoundaries.Length == 2)
98-
{
99-
int rangeStart;
100-
int rangeEnd;
101-
try
102-
{
103-
rangeStart = Convert.ToInt32(rangeBoundaries[0]);
104-
rangeEnd = Convert.ToInt32(rangeBoundaries[1]);
105-
}
106-
catch (Exception)
107-
{
108-
throw new OptionException("Scan ID string has invalid format", null);
109-
}
110-
111-
for (var l = rangeStart; l <= rangeEnd; ++l)
112-
{
113-
container.Add(l);
114-
}
115-
}
116-
else throw new OptionException("Scan ID string has invalid format", null);
117-
}
118-
119-
return container;
120-
}
12157
}
12258
}

Query/QueryParameters.cs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
using System.Collections.Generic;
2-
31
namespace ThermoRawFileParser.Query
42
{
53
public class QueryParameters
@@ -28,7 +26,6 @@ public string rawFilePath
2826
public string scans { get; set; }
2927
public string outputFile { get; set; }
3028
public bool noPeakPicking { get; set; }
31-
public HashSet<int> scanNumbers { get; set; }
3229
public bool stdout { get; set; }
3330
public bool Vigilant { get; set; }
3431
public int Errors { get => _errors; }
@@ -42,7 +39,6 @@ public QueryParameters()
4239
scans = "";
4340
outputFile = null;
4441
noPeakPicking = false;
45-
scanNumbers = new HashSet<int>();
4642
stdout = false;
4743
Vigilant = false;
4844
LogFormat = LogFormat.DEFAULT;
@@ -57,8 +53,6 @@ public QueryParameters(QueryParameters copy)
5753
scans = copy.scans;
5854
outputFile = copy.outputFile;
5955
noPeakPicking = copy.noPeakPicking;
60-
scanNumbers = new HashSet<int>();
61-
foreach (int s in copy.scanNumbers) scanNumbers.Add(s);
6256
stdout = copy.stdout;
6357
Vigilant = copy.Vigilant;
6458
LogFormat = copy.LogFormat;
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
using NUnit.Framework;
2+
using System;
3+
using System.Collections.Generic;
4+
using System.IO;
5+
using System.Linq;
6+
using ThermoRawFileParser.Query;
7+
8+
namespace ThermoRawFileParserTest
9+
{
10+
[TestFixture]
11+
public class QueryTests
12+
{
13+
[Test]
14+
public void TestProxiReaderScans()
15+
{
16+
var testRawFile = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"Data/small.RAW");
17+
18+
var parameters = new QueryParameters
19+
{
20+
rawFilePath = testRawFile,
21+
};
22+
23+
//Interval of scans to retrieve
24+
parameters.scans = "1-10";
25+
ProxiSpectrumReader reader = new ProxiSpectrumReader(parameters);
26+
var results = reader.Retrieve();
27+
Assert.That(GetScanNumbers(results), Is.EqualTo(new List<int> { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }));
28+
29+
//Open-ended interval
30+
parameters.scans = "-5";
31+
reader = new ProxiSpectrumReader(parameters);
32+
results = reader.Retrieve();
33+
Assert.That(GetScanNumbers(results), Is.EqualTo(new List<int> { 1, 2, 3, 4, 5 }));
34+
35+
//Open-ended interval
36+
parameters.scans = "41-";
37+
reader = new ProxiSpectrumReader(parameters);
38+
results = reader.Retrieve();
39+
Assert.That(GetScanNumbers(results), Is.EqualTo(new List<int> { 41, 42, 43, 44, 45, 46, 47, 48}));
40+
41+
//Interval larger than available scans
42+
parameters.scans = "45-50";
43+
reader = new ProxiSpectrumReader(parameters);
44+
results = reader.Retrieve();
45+
Assert.That(GetScanNumbers(results), Is.EqualTo(new List<int> { 45, 46, 47, 48 }));
46+
47+
//Sequence of scans to retrieve
48+
parameters.scans = "1,5,7";
49+
reader = new ProxiSpectrumReader(parameters);
50+
results = reader.Retrieve();
51+
Assert.That(GetScanNumbers(results), Is.EqualTo(new List<int> { 1, 5, 7 }));
52+
53+
//Combination of intervals and individual scans
54+
parameters.scans = "-2,5,7-10,15,46-";
55+
reader = new ProxiSpectrumReader(parameters);
56+
results = reader.Retrieve();
57+
Assert.That(GetScanNumbers(results), Is.EqualTo(new List<int> { 1, 2, 5, 7, 8, 9, 10, 15, 46, 47, 48 }));
58+
}
59+
60+
private List<int> GetScanNumbers(List<ProxiSpectrum> results)
61+
{
62+
List<int> scanNumbers = new List<int>();
63+
64+
foreach (var result in results)
65+
{
66+
result.attributes.Where(a => a.Name == "scan number")
67+
.ToList()
68+
.ForEach(a => scanNumbers.Add(int.Parse(a.Value)));
69+
}
70+
71+
return scanNumbers;
72+
}
73+
}
74+
}

ThermoRawFileParserTest/UtilTests.cs

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
1-
using System.Text.RegularExpressions;
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Text.RegularExpressions;
24
using NUnit.Framework;
5+
using ThermoRawFileParser;
6+
using ThermoRawFileParser.Util;
37

48
namespace ThermoRawFileParserTest
59
{
@@ -11,7 +15,7 @@ public void TestRegex()
1115
{
1216
const string filterString = "ITMS + c NSI r d Full ms2 [email protected] [259.0000-1934.0000]";
1317
const string pattern = @"ms2 (.*?)@";
14-
18+
1519
Match result = Regex.Match(filterString, pattern);
1620
if (result.Success)
1721
{
@@ -22,5 +26,37 @@ public void TestRegex()
2226
Assert.Fail();
2327
}
2428
}
29+
30+
[Test]
31+
public void TestNumberIterator()
32+
{
33+
NumberIterator iterator;
34+
iterator = new NumberIterator();
35+
Assert.That(new List<int>(iterator.IterateScans()),
36+
Is.EqualTo(new List<int>()));
37+
38+
iterator = new NumberIterator("1, 2,3- 5, 7, 9 - 10", 1, 100);
39+
Assert.That(new List<int>(iterator.IterateScans()),
40+
Is.EqualTo(new List<int> { 1, 2, 3, 4, 5, 7, 9, 10 }));
41+
42+
iterator = new NumberIterator(null, 1, 5);
43+
Assert.That(new List<int>(iterator.IterateScans()),
44+
Is.EqualTo(new List<int> { 1, 2, 3, 4, 5 }));
45+
46+
iterator = new NumberIterator(" - ", 1, 5);
47+
Assert.That(new List<int>(iterator.IterateScans()),
48+
Is.EqualTo(new List<int> { 1, 2, 3, 4, 5 }));
49+
50+
iterator = new NumberIterator("- 5, 9 - ", 1, 12);
51+
Assert.That(new List<int>(iterator.IterateScans()),
52+
Is.EqualTo(new List<int> { 1, 2, 3, 4, 5, 9, 10, 11, 12 }));
53+
54+
Assert.Throws(typeof(Exception), () => new NumberIterator("1, 2, 2-5", 1, 10));
55+
Assert.Throws(typeof(Exception), () => new NumberIterator("3, -5", 1, 10));
56+
Assert.Throws(typeof(Exception), () => new NumberIterator("3 -,7", 1, 10));
57+
Assert.Throws(typeof(Exception), () => new NumberIterator("a,-,7", 1, 10));
58+
59+
}
60+
2561
}
2662
}

0 commit comments

Comments
 (0)