1717#include < unordered_map>
1818#include < unordered_set>
1919
20- enum class MatchType : unsigned char {
21- NONE,
22- MATCH
23- };
2420typedef std::tuple<unsigned , std::string> FileLength;
2521typedef const std::string* StringPtr;
2622typedef std::unordered_map<unsigned long , std::vector<StringPtr>> HashToFiles;
@@ -78,13 +74,14 @@ namespace {
7874 }
7975 }
8076
81- std::tuple<std::vector<SourceFile>, std::vector<MatchType >, unsigned , unsigned > LoadSourceFiles (
77+ std::tuple<std::vector<SourceFile>, std::vector<bool >, unsigned , unsigned > LoadSourceFiles (
8278 const std::vector<std::string>& lines,
8379 unsigned minChars,
84- bool ignorePrepStuff) {
80+ bool ignorePrepStuff,
81+ std::ostream& log) {
8582
8683 std::vector<SourceFile> sourceFiles;
87- std::vector<MatchType > matrix;
84+ std::vector<bool > matrix;
8885 size_t maxLinesPerFile = 0 ;
8986 int files = 0 ;
9087 unsigned long locsTotal = 0 ;
@@ -140,7 +137,7 @@ namespace {
140137 throw std::runtime_error (stream.str ().c_str ());
141138 }
142139
143- std::cout
140+ log
144141 << lines.size ()
145142 << " done.\n\n " ;
146143 // Generate matrix large enough for all files
@@ -169,27 +166,27 @@ namespace {
169166 bool xml,
170167 const SourceFile& source1,
171168 const SourceFile& source2,
172- std::ostream& outFile ) {
169+ std::ostream& out ) {
173170 unsigned duplicateLines = 0 ;
174171 if (xml) {
175- outFile
172+ out
176173 << " <set LineCount=\" " << count << " \" >"
177174 << std::endl;
178175 int startLineNumber1 = source1.GetLine (line1).GetLineNumber ();
179176 int endLineNumber1 = source1.GetLine (line1 + count).GetLineNumber ();
180- outFile
177+ out
181178 << " <block SourceFile=\" " << source1.GetFilename ()
182179 << " \" StartLineNumber=\" " << startLineNumber1
183180 << " \" EndLineNumber=\" " << endLineNumber1 << " \" />"
184181 << std::endl;
185182 int startLineNumber2 = source2.GetLine (line2).GetLineNumber ();
186183 int endLineNumber2 = source2.GetLine (line2 + count).GetLineNumber ();
187- outFile
184+ out
188185 << " <block SourceFile=\" " << source2.GetFilename ()
189186 << " \" StartLineNumber=\" " << startLineNumber2
190187 << " \" EndLineNumber=\" " << endLineNumber2 << " \" />"
191188 << std::endl;
192- outFile
189+ out
193190 << " <lines xml:space=\" preserve\" >"
194191 << std::endl;
195192 for (int j = 0 ; j < count; j++) {
@@ -208,27 +205,27 @@ namespace {
208205 // > --> >
209206 StringUtil::StrSub (tmpstr, " >" , " >" , -1 );
210207
211- outFile << " <line Text=\" " << tmpstr << " \" />" << std::endl;
208+ out << " <line Text=\" " << tmpstr << " \" />" << std::endl;
212209 duplicateLines++;
213210 }
214211
215- outFile << " </lines>" << std::endl;
216- outFile << " </set>" << std::endl;
212+ out << " </lines>" << std::endl;
213+ out << " </set>" << std::endl;
217214 } else {
218- outFile
215+ out
219216 << source1.GetFilename ()
220217 << " (" << source1.GetLine (line1).GetLineNumber () << " )"
221218 << std::endl;
222- outFile
219+ out
223220 << source2.GetFilename ()
224221 << " (" << source2.GetLine (line2).GetLineNumber () << " )"
225222 << std::endl;
226223 for (int j = 0 ; j < count; j++) {
227- outFile << source1.GetLine (j + line1).GetLine () << std::endl;
224+ out << source1.GetLine (j + line1).GetLine () << std::endl;
228225 duplicateLines++;
229226 }
230227
231- outFile << std::endl;
228+ out << std::endl;
232229 }
233230
234231 return duplicateLines;
@@ -237,21 +234,21 @@ namespace {
237234 ProcessResult Process (
238235 const SourceFile& source1,
239236 const SourceFile& source2,
240- std::vector<MatchType >& matrix,
237+ std::vector<bool >& matrix,
241238 const Options& options,
242239 std::ostream& outFile) {
243240 size_t m = source1.GetNumOfLines ();
244241 size_t n = source2.GetNumOfLines ();
245242
246243 // Reset matrix data
247- std::fill (std::begin (matrix), std::begin (matrix) + m * n, MatchType::NONE );
244+ std::fill (std::begin (matrix), std::begin (matrix) + m * n, false );
248245
249246 // Compute matrix
250247 for (size_t y = 0 ; y < m; y++) {
251248 auto & line = source1.GetLine (y);
252249 for (size_t x = 0 ; x < n; x++) {
253250 if (line == source2.GetLine (x)) {
254- matrix[x + n * y] = MatchType::MATCH ;
251+ matrix[x + n * y] = true ;
255252 }
256253 }
257254 }
@@ -273,7 +270,7 @@ namespace {
273270 unsigned seqLen = 0 ;
274271 size_t maxX = std::min (n, m - y);
275272 for (size_t x = 0 ; x < maxX; x++) {
276- if (matrix[x + n * (y + x)] == MatchType::MATCH ) {
273+ if (matrix[x + n * (y + x)]) {
277274 seqLen++;
278275 } else {
279276 if (seqLen >= lMinBlockSize) {
@@ -321,7 +318,7 @@ namespace {
321318 unsigned seqLen = 0 ;
322319 size_t maxY = std::min (m, n - x);
323320 for (size_t y = 0 ; y < maxY; y++) {
324- if (matrix[x + y + n * y] == MatchType::MATCH ) {
321+ if (matrix[x + y + n * y]) {
325322 seqLen++;
326323 } else {
327324 if (seqLen >= lMinBlockSize) {
@@ -360,9 +357,27 @@ namespace {
360357}
361358
362359void Duplo::Run (const Options& options) {
363- std::ofstream outfile (
364- options.GetOutputFilename ().c_str (), std::ios::out | std::ios::binary);
365- if (!outfile) {
360+ std::streambuf* buf;
361+ std::streambuf* logbuf;
362+ std::ofstream of;
363+ if (options.GetOutputFilename () == " -" ) {
364+ buf = std::cout.rdbuf ();
365+ if (options.GetOutputXml () == false ) {
366+ logbuf = std::cout.rdbuf ();
367+ }
368+ else {
369+ logbuf = 0 ;
370+ }
371+ }
372+ else {
373+ of.open (options.GetOutputFilename ().c_str (), std::ios::out | std::ios::binary);
374+ buf = of.rdbuf ();
375+ logbuf = std::cout.rdbuf ();
376+ }
377+
378+ std::ostream out (buf);
379+ std::ostream log (logbuf);
380+ if (!out) {
366381 std::ostringstream stream;
367382 stream
368383 << " Error: Can't open file: "
@@ -371,19 +386,22 @@ void Duplo::Run(const Options& options) {
371386 throw std::runtime_error (stream.str ().c_str ());
372387 }
373388
374- std::cout << " Loading and hashing files ... " << std::flush;
389+ log << " Loading and hashing files ... " << std::flush;
375390
376391 if (options.GetOutputXml ()) {
377- outfile
392+ out
378393 << " <?xml version=\" 1.0\" ?>"
379394 << std::endl
380395 << " <duplo>"
381396 << std::endl;
382397 }
383398
384399 auto lines = LoadFileList (options.GetListFilename ());
385- auto [sourceFiles, matrix, files, locsTotal] =
386- LoadSourceFiles (lines, options.GetMinChars (), options.GetIgnorePrepStuff ());
400+ auto [sourceFiles, matrix, files, locsTotal] = LoadSourceFiles (
401+ lines,
402+ options.GetMinChars (),
403+ options.GetIgnorePrepStuff (),
404+ log);
387405 auto numFilesToCheck = options.GetFilesToCheck () > 0 ? std::min (options.GetFilesToCheck (), sourceFiles.size ()): sourceFiles.size ();
388406
389407 // hash maps
@@ -410,14 +428,13 @@ void Duplo::Run(const Options& options) {
410428 }
411429 }
412430
413- std::cout << left.GetFilename ();
414431 ProcessResult processResult =
415432 Process (
416433 left,
417434 left,
418435 matrix,
419436 options,
420- outfile );
437+ out );
421438
422439 // files to compare are those that have matching lines
423440 for (unsigned j = i + 1 ; j < sourceFiles.size (); j++) {
@@ -430,25 +447,31 @@ void Duplo::Run(const Options& options) {
430447 right,
431448 matrix,
432449 options,
433- outfile );
450+ out );
434451 }
435452 }
436453
437- if (processResult.Blocks () > 0 ) {
438- std::cout << " found: " << processResult.Blocks () << " block(s)" << std::endl;
439- } else {
440- std::cout << " nothing found." << std::endl;
454+ if (options.GetOutputXml () == false ) {
455+ if (processResult.Blocks () > 0 ) {
456+ log
457+ << left.GetFilename ()
458+ << " found: " << processResult.Blocks () << " block(s)" << std::endl;
459+ } else {
460+ log
461+ << left.GetFilename ()
462+ << " nothing found." << std::endl;
463+ }
441464 }
442465
443466 processResultTotal << processResult;
444467 }
445468
446469 if (options.GetOutputXml ()) {
447- outfile
470+ out
448471 << " </duplo>"
449472 << std::endl;
450473 } else {
451- outfile
474+ out
452475 << " Configuration:"
453476 << std::endl
454477 << " Number of files: "
0 commit comments