Skip to content

Commit 9bd769c

Browse files
authored
Merge pull request #89 from compomics/mslevels
Mslevels
2 parents 2328239 + 9f84c58 commit 9bd769c

File tree

6 files changed

+172
-12
lines changed

6 files changed

+172
-12
lines changed

MainClass.cs

Lines changed: 73 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
using ThermoRawFileParser.Query;
1010
using ThermoRawFileParser.XIC;
1111
using System.Globalization;
12+
using System.Collections.Generic;
13+
using System.Text.RegularExpressions;
1214

1315
namespace ThermoRawFileParser
1416
{
@@ -17,7 +19,7 @@ public static class MainClass
1719
private static readonly ILog Log =
1820
LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType);
1921

20-
public const string Version = "1.3.0";
22+
public const string Version = "1.3.1";
2123

2224
public static void Main(string[] args)
2325
{
@@ -411,7 +413,7 @@ private static void RegularParametersParsing(string[] args)
411413
},
412414
{
413415
"a|allDetectors",
414-
"Extract additonal detector data: UV/PDA etc",
416+
"Extract additional detector data: UV/PDA etc",
415417
v => parseInput.AllDetectors = v != null
416418
},
417419
{
@@ -422,6 +424,16 @@ private static void RegularParametersParsing(string[] args)
422424
"e|ignoreInstrumentErrors", "Ignore missing properties by the instrument.",
423425
v => parseInput.IgnoreInstrumentErrors = v != null
424426
},
427+
{
428+
"L=|msLevel=",
429+
"Select MS levels (MS1, MS2, etc) included in the output, should be a comma-separated list of integers ( 1,2,3 ) and/or intervals ( 1-3 ), open-end intervals ( 1- ) are allowed",
430+
v => parseInput.MsLevel = ParseMsLevel(v)
431+
},
432+
{
433+
"P|mgfPrecursor",
434+
"Include precursor scan number in MGF file TITLE",
435+
v => parseInput.MGFPrecursor = v != null
436+
},
425437
{
426438
"u:|s3_url:",
427439
"Optional property to write directly the data into S3 Storage.",
@@ -756,5 +768,64 @@ private static void ShowHelp(string message, OptionException optionException, Op
756768
optionSet.WriteOptionDescriptions(Console.Error);
757769
Environment.Exit(-1);
758770
}
771+
772+
private static HashSet<int> ParseMsLevel(string inputString)
773+
{
774+
HashSet<int> result = new HashSet<int>();
775+
Regex valid = new Regex(@"^[\d,\-\s]+$");
776+
Regex interval = new Regex(@"^\s*(\d+)?\s*(-)?\s*(\d+)?\s*$");
777+
778+
if (!valid.IsMatch(inputString))
779+
throw new OptionException("Invalid characters in msLevel key", "msLevel");
780+
781+
foreach (var piece in inputString.Split(new char[] { ',' }))
782+
{
783+
try
784+
{
785+
int start;
786+
int end;
787+
788+
var intervalMatch = interval.Match(piece);
789+
790+
if (!intervalMatch.Success)
791+
throw new OptionException();
792+
793+
if (intervalMatch.Groups[2].Success) //it is interval
794+
{
795+
if (intervalMatch.Groups[1].Success)
796+
start = Math.Max(1, int.Parse(intervalMatch.Groups[1].Value));
797+
else
798+
start = 1;
799+
800+
if (intervalMatch.Groups[3].Success)
801+
end = Math.Min(10, int.Parse(intervalMatch.Groups[3].Value));
802+
else
803+
end = 10;
804+
}
805+
else
806+
{
807+
if (intervalMatch.Groups[1].Success)
808+
end = start = int.Parse(intervalMatch.Groups[1].Value);
809+
else
810+
throw new OptionException();
811+
812+
if (intervalMatch.Groups[3].Success)
813+
throw new OptionException();
814+
}
815+
816+
for (int l = start; l <= end; l++)
817+
{
818+
result.Add(l);
819+
}
820+
}
821+
822+
catch (Exception ex)
823+
{
824+
throw new OptionException(String.Format("Cannot parse part of msLevel input: '{0}'", piece), "msLevel", ex);
825+
}
826+
}
827+
828+
return result;
829+
}
759830
}
760831
}

ParseInput.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
using System;
2+
using System.Collections.Generic;
23
using System.IO;
34
using ThermoRawFileParser.Writer;
45

56
namespace ThermoRawFileParser
67
{
78
public class ParseInput
89
{
10+
//all ms levels
11+
private readonly HashSet<int> allLevels = new HashSet<int>(new int[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 });
12+
913
/// <summary>
1014
/// The RAW file path.
1115
/// </summary>
@@ -71,6 +75,10 @@ public string RawFilePath
7175

7276
public bool IgnoreInstrumentErrors { get; set; }
7377

78+
public HashSet<int> MsLevel { get; set; }
79+
80+
public bool MGFPrecursor { get; set; }
81+
7482
private S3Loader S3Loader { get; set; }
7583

7684
public string S3AccessKeyId { get; set; }
@@ -101,6 +109,7 @@ public ParseInput()
101109
LogFormat = LogFormat.DEFAULT;
102110
IgnoreInstrumentErrors = false;
103111
AllDetectors = false;
112+
MsLevel = allLevels;
104113
}
105114

106115
public ParseInput(string rawFilePath, string rawDirectoryPath, string outputDirectory, OutputFormat outputFormat

Properties/AssemblyInfo.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
[assembly: AssemblyConfiguration("")]
1010
[assembly: AssemblyCompany("")]
1111
[assembly: AssemblyProduct("ThermoRawFileParser")]
12-
[assembly: AssemblyCopyright("Copyright © 2017")]
12+
[assembly: AssemblyCopyright("Copyright © 2017 - 2020")]
1313
[assembly: AssemblyTrademark("")]
1414
[assembly: AssemblyCulture("")]
1515

@@ -31,7 +31,7 @@
3131
// You can specify all the values or you can default the Build and Revision Numbers
3232
// by using the '*' as shown below:
3333
// [assembly: AssemblyVersion("1.0.*")]
34-
[assembly: AssemblyVersion("1.2.3.0")]
35-
[assembly: AssemblyFileVersion("1.2.3.0")]
34+
[assembly: AssemblyVersion("1.3.1.0")]
35+
[assembly: AssemblyFileVersion("1.3.1.0")]
3636

3737
[assembly: log4net.Config.XmlConfigurator(ConfigFile = "log4net.config")]

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,11 @@ optional subcommands are xic|query (use [subcommand] -h for more info]):
7676
verbose.
7777
-e, --ignoreInstrumentErrors
7878
Ignore missing properties by the instrument.
79+
-L, --msLevel=VALUE Select MS levels (MS1, MS2, etc) included in the
80+
output, should be a comma-separated list of
81+
integers ( 1,2,3 ) and/or intervals ( 1-3 ),
82+
open-end intervals ( 1- ) are allowed
83+
-P, --mgfPrecursor Include precursor scan number in MGF file TITLE
7984
-u, --s3_url[=VALUE] Optional property to write directly the data into
8085
S3 Storage.
8186
-k, --s3_accesskeyid[=VALUE]

Writer/MgfSpectrumWriter.cs

Lines changed: 67 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
using System;
22
using System.Globalization;
3+
using System.Linq;
34
using System.Reflection;
5+
using System.Text.RegularExpressions;
46
using log4net;
7+
using ThermoFisher.CommonCore.Data;
58
using ThermoFisher.CommonCore.Data.Business;
69
using ThermoFisher.CommonCore.Data.FilterEnums;
710
using ThermoFisher.CommonCore.Data.Interfaces;
11+
using ThermoRawFileParser.Util;
812

913
namespace ThermoRawFileParser.Writer
1014
{
@@ -16,10 +20,20 @@ public class MgfSpectrumWriter : SpectrumWriter
1620
private const string PositivePolarity = "+";
1721
private const string NegativePolarity = "-";
1822

23+
//filter string
24+
private const string FilterStringIsolationMzPattern = @"ms2 (.*?)@";
25+
26+
//precursor scan number for MS2 scans
27+
private int _precursorMs1ScanNumber;
28+
29+
// Precursor scan number (value) and isolation m/z (key) for reference in the precursor element of an MS3 spectrum
30+
private readonly LimitedSizeDictionary<string, int> _precursorMs2ScanNumbers = new LimitedSizeDictionary<string, int>(40);
31+
1932
// Precursor scan number for reference in the precursor element of an MS2 spectrum
2033

2134
public MgfSpectrumWriter(ParseInput parseInput) : base(parseInput)
2235
{
36+
ParseInput.MsLevel.Remove(1); //MS1 spectra are not supposed to be in MGF
2337
}
2438

2539
/// <inheritdoc />
@@ -35,7 +49,7 @@ public override void Write(IRawDataPlus rawFile, int firstScanNumber, int lastSc
3549
{
3650
if (ParseInput.LogFormat == LogFormat.DEFAULT)
3751
{
38-
var scanProgress = (int) ((double) scanNumber / (lastScanNumber - firstScanNumber + 1) * 100);
52+
var scanProgress = (int)((double)scanNumber / (lastScanNumber - firstScanNumber + 1) * 100);
3953
if (scanProgress % ProgressPercentageStep == 0)
4054
{
4155
if (scanProgress != lastScanProgress)
@@ -59,17 +73,65 @@ public override void Write(IRawDataPlus rawFile, int firstScanNumber, int lastSc
5973
// Get the scan event for this scan number
6074
var scanEvent = rawFile.GetScanEventForScanNumber(scanNumber);
6175

76+
// precursor reference
77+
var spectrumRef = "";
78+
79+
//keeping track of precursor scan
80+
switch (scanFilter.MSOrder)
81+
{
82+
case MSOrderType.Ms:
83+
84+
// Keep track of scan number for precursor reference
85+
_precursorMs1ScanNumber = scanNumber;
86+
87+
break;
88+
case MSOrderType.Ms2:
89+
// Keep track of scan number and isolation m/z for precursor reference
90+
var result = Regex.Match(scanEvent.ToString(), FilterStringIsolationMzPattern);
91+
if (result.Success)
92+
{
93+
if (_precursorMs2ScanNumbers.ContainsKey(result.Groups[1].Value))
94+
{
95+
_precursorMs2ScanNumbers.Remove(result.Groups[1].Value);
96+
}
97+
98+
_precursorMs2ScanNumbers.Add(result.Groups[1].Value, scanNumber);
99+
}
100+
101+
spectrumRef = ConstructSpectrumTitle((int)Device.MS, 1, _precursorMs1ScanNumber);
102+
break;
103+
104+
case MSOrderType.Ms3:
105+
var precursorMs2ScanNumber = _precursorMs2ScanNumbers.Keys.FirstOrDefault(
106+
isolationMz => scanEvent.ToString().Contains(isolationMz));
107+
if (!precursorMs2ScanNumber.IsNullOrEmpty())
108+
{
109+
spectrumRef = ConstructSpectrumTitle((int)Device.MS, 1, _precursorMs2ScanNumbers[precursorMs2ScanNumber]);
110+
}
111+
else
112+
{
113+
throw new InvalidOperationException("Couldn't find a MS2 precursor scan for MS3 scan " + scanEvent);
114+
}
115+
break;
116+
117+
default:
118+
break;
119+
}
120+
121+
62122
// don't include MS1 spectra
63-
if (scanFilter.MSOrder != MSOrderType.Ms)
123+
if (ParseInput.MsLevel.Contains((int)scanFilter.MSOrder))
64124
{
65125
IReaction reaction = GetReaction(scanEvent, scanNumber);
66126

67127
Writer.WriteLine("BEGIN IONS");
68-
Writer.WriteLine($"TITLE={ConstructSpectrumTitle((int)Device.MS, 1, scanNumber)}");
128+
if
129+
(ParseInput.MGFPrecursor) Writer.WriteLine($"TITLE={ConstructSpectrumTitle((int)Device.MS, 1, scanNumber)} [PRECURSOR={spectrumRef}]");
130+
else
131+
Writer.WriteLine($"TITLE={ConstructSpectrumTitle((int)Device.MS, 1, scanNumber)}");
69132
Writer.WriteLine($"SCANS={scanNumber}");
70133
Writer.WriteLine(
71134
$"RTINSECONDS={(time * 60).ToString(CultureInfo.InvariantCulture)}");
72-
73135
// trailer extra data list
74136
var trailerData = rawFile.GetTrailerExtraInformation(scanNumber);
75137
int? charge = null;
@@ -91,7 +153,7 @@ public override void Write(IRawDataPlus rawFile, int firstScanNumber, int lastSc
91153
CultureInfo.CurrentCulture);
92154
}
93155

94-
if (trailerData.Labels[i] == "MS" + (int) scanFilter.MSOrder + " Isolation Width:")
156+
if (trailerData.Labels[i] == "MS" + (int)scanFilter.MSOrder + " Isolation Width:")
95157
{
96158
isolationWidth = double.Parse(trailerData.Values[i], NumberStyles.Any,
97159
CultureInfo.CurrentCulture);

Writer/MzMlSpectrumWriter.cs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,10 @@ public override void Write(IRawDataPlus rawFile, int firstScanNumber, int lastSc
334334
}
335335

336336
var spectrum = ConstructMSSpectrum(scanNumber);
337-
if (spectrum != null)
337+
338+
var level = int.Parse(spectrum.cvParam.Where(p => p.accession == "MS:1000511").First().value);
339+
340+
if (spectrum != null && ParseInput.MsLevel.Contains(level)) //applying MS level filter
338341
{
339342
spectrum.index = index.ToString();
340343
if (_doIndexing)
@@ -590,7 +593,17 @@ private string GetTotalScanNumber()
590593

591594
_rawFile.SelectInstrument(Device.MS, 1);
592595

593-
numScans += 1 + _rawFile.RunHeader.LastSpectrum - _rawFile.RunHeader.FirstSpectrum;
596+
var levelFilter = _rawFile.GetFilterFromString("");
597+
598+
foreach (var level in ParseInput.MsLevel)
599+
{
600+
levelFilter.MSOrder = (MSOrderType)level;
601+
602+
var filteredScans = _rawFile.GetFilteredScansListByScanRange(levelFilter, _rawFile.RunHeader.FirstSpectrum, _rawFile.RunHeader.LastSpectrum);
603+
604+
numScans += filteredScans.Count;
605+
}
606+
594607

595608
if (ParseInput.AllDetectors)
596609
{

0 commit comments

Comments
 (0)