Skip to content

Commit a04737a

Browse files
committed
Merge branch 'forks'
2 parents a8809ab + a131325 commit a04737a

38 files changed

+232
-146
lines changed

.github/workflows/ccpp.yml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,16 +43,23 @@ jobs:
4343
with:
4444
node-version: 18
4545

46-
- name: build
46+
- name: setup
4747
run: |
4848
npm install -g bats
49+
50+
- name: build
51+
run: |
4952
export DUPLO_VERSION=`cat ./uploads/tag.txt`
5053
mkdir -p build
5154
pushd build
5255
cmake .. -DDUPLO_VERSION=\"$DUPLO_VERSION\" -DCMAKE_BUILD_TYPE=Release
5356
make
5457
popd
5558
zip --junk-paths duplo-linux build/duplo
59+
60+
- name: test
61+
run: bats --recursive tests
62+
5663
- name: upload linux artifact
5764
uses: actions/upload-artifact@v4
5865
with:
@@ -216,4 +223,3 @@ jobs:
216223
asset_path: ./uploads/windows-build/duplo-windows.zip
217224
asset_name: duplo-windows.zip
218225
asset_content_type: application/zip
219-

CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,8 @@ set_target_properties(duplo PROPERTIES
1212
)
1313

1414
target_compile_definitions(duplo PRIVATE DUPLO_VERSION=${DUPLO_VERSION})
15+
target_include_directories(duplo PRIVATE src/include/)
1516

1617
if(NOT MSVC)
1718
target_compile_options(duplo PRIVATE -Wall -Werror)
1819
endif()
19-
20-

Duplo.vcxproj

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -125,31 +125,31 @@
125125
<ClCompile Include="src\TextFile.cpp" />
126126
</ItemGroup>
127127
<ItemGroup>
128-
<ClInclude Include="src\ArgumentParser.h" />
129-
<ClInclude Include="src\CstyleCommentsFilter.h" />
130-
<ClInclude Include="src\CstyleUtils.h" />
131-
<ClInclude Include="src\Duplo.h" />
132-
<ClInclude Include="src\FileTypeBase.h" />
133-
<ClInclude Include="src\FileTypeFactory.h" />
134-
<ClInclude Include="src\FileType_Ada.h" />
135-
<ClInclude Include="src\FileType_C.h" />
136-
<ClInclude Include="src\FileType_CS.h" />
137-
<ClInclude Include="src\FileType_Java.h" />
138-
<ClInclude Include="src\FileType_S.h" />
139-
<ClInclude Include="src\FileType_Unknown.h" />
140-
<ClInclude Include="src\FileType_VB.h" />
141-
<ClInclude Include="src\Fwd.h" />
142-
<ClInclude Include="src\HashUtil.h" />
143-
<ClInclude Include="src\IFileType.h" />
144-
<ClInclude Include="src\ILineFilter.h" />
145-
<ClInclude Include="src\NoopLineFilter.h" />
146-
<ClInclude Include="src\Options.h" />
147-
<ClInclude Include="src\SourceFile.h" />
148-
<ClInclude Include="src\SourceLine.h" />
149-
<ClInclude Include="src\StringUtil.h" />
150-
<ClInclude Include="src\TextFile.h" />
128+
<ClInclude Include="src\include\ArgumentParser.h" />
129+
<ClInclude Include="src\include\CstyleCommentsFilter.h" />
130+
<ClInclude Include="src\include\CstyleUtils.h" />
131+
<ClInclude Include="src\include\Duplo.h" />
132+
<ClInclude Include="src\include\FileTypeBase.h" />
133+
<ClInclude Include="src\include\FileTypeFactory.h" />
134+
<ClInclude Include="src\include\FileType_Ada.h" />
135+
<ClInclude Include="src\include\FileType_C.h" />
136+
<ClInclude Include="src\include\FileType_CS.h" />
137+
<ClInclude Include="src\include\FileType_Java.h" />
138+
<ClInclude Include="src\include\FileType_S.h" />
139+
<ClInclude Include="src\include\FileType_Unknown.h" />
140+
<ClInclude Include="src\include\FileType_VB.h" />
141+
<ClInclude Include="src\include\Fwd.h" />
142+
<ClInclude Include="src\include\HashUtil.h" />
143+
<ClInclude Include="src\include\IFileType.h" />
144+
<ClInclude Include="src\include\ILineFilter.h" />
145+
<ClInclude Include="src\include\NoopLineFilter.h" />
146+
<ClInclude Include="src\include\Options.h" />
147+
<ClInclude Include="src\include\SourceFile.h" />
148+
<ClInclude Include="src\include\SourceLine.h" />
149+
<ClInclude Include="src\include\StringUtil.h" />
150+
<ClInclude Include="src\include\TextFile.h" />
151151
</ItemGroup>
152152
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
153153
<ImportGroup Label="ExtensionTargets">
154154
</ImportGroup>
155-
</Project>
155+
</Project>

README.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44

55
**Updates:**
66

7-
🔥 v1 add build on Windows (thanks [@chausner](https://github.com/chausner)!) <br/>
8-
v0.8 adds improved Java support
7+
- 🔥 v1.1 improve memory usage (grabbed from [@nachose fork](https://github.com/nachose/Duplo)), also re-enabled tests and organized code
8+
- 🚀 v1.0 add build on Windows (thanks [@chausner](https://github.com/chausner)!)
9+
- v0.8 adds improved Java support
910

1011
🙌 Help needed! See [8.3](#83-additional-language-support) on how to support more languages.
1112

@@ -102,8 +103,8 @@ filenames into this command. A complete commandline sample will be shown below.
102103

103104
### 4.2. Pre-built binaries
104105

105-
Duplo is also available as a pre-built binary for (Alpine) Linux, macOS and
106-
Windows. Grab the executable from the
106+
Duplo is also available as a pre-built binary for (Alpine) Linux, macOS and
107+
Windows. Grab the executable from the
107108
[releases](https://github.com/dlidstrom/Duplo/releases) page.
108109

109110
You can of course build from source as well.

src/Duplo.cpp

Lines changed: 64 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,6 @@
1717
#include <unordered_map>
1818
#include <unordered_set>
1919

20-
enum class MatchType : unsigned char {
21-
NONE,
22-
MATCH
23-
};
2420
typedef std::tuple<unsigned, std::string> FileLength;
2521
typedef const std::string* StringPtr;
2622
typedef std::unordered_map<unsigned long, std::vector<StringPtr>> HashToFiles;
@@ -78,13 +74,14 @@ namespace {
7874
}
7975
}
8076

81-
std::tuple<std::vector<SourceFile>, std::vector<MatchType>, unsigned, unsigned> LoadSourceFiles(
77+
std::tuple<std::vector<SourceFile>, std::vector<bool>, unsigned, unsigned> LoadSourceFiles(
8278
const std::vector<std::string>& lines,
8379
unsigned minChars,
84-
bool ignorePrepStuff) {
80+
bool ignorePrepStuff,
81+
std::ostream& log) {
8582

8683
std::vector<SourceFile> sourceFiles;
87-
std::vector<MatchType> matrix;
84+
std::vector<bool> matrix;
8885
size_t maxLinesPerFile = 0;
8986
int files = 0;
9087
unsigned long locsTotal = 0;
@@ -140,7 +137,7 @@ namespace {
140137
throw std::runtime_error(stream.str().c_str());
141138
}
142139

143-
std::cout
140+
log
144141
<< lines.size()
145142
<< " done.\n\n";
146143
// Generate matrix large enough for all files
@@ -169,27 +166,27 @@ namespace {
169166
bool xml,
170167
const SourceFile& source1,
171168
const SourceFile& source2,
172-
std::ostream& outFile) {
169+
std::ostream& out) {
173170
unsigned duplicateLines = 0;
174171
if (xml) {
175-
outFile
172+
out
176173
<< " <set LineCount=\"" << count << "\">"
177174
<< std::endl;
178175
int startLineNumber1 = source1.GetLine(line1).GetLineNumber();
179176
int endLineNumber1 = source1.GetLine(line1 + count).GetLineNumber();
180-
outFile
177+
out
181178
<< " <block SourceFile=\"" << source1.GetFilename()
182179
<< "\" StartLineNumber=\"" << startLineNumber1
183180
<< "\" EndLineNumber=\"" << endLineNumber1 << "\"/>"
184181
<< std::endl;
185182
int startLineNumber2 = source2.GetLine(line2).GetLineNumber();
186183
int endLineNumber2 = source2.GetLine(line2 + count).GetLineNumber();
187-
outFile
184+
out
188185
<< " <block SourceFile=\"" << source2.GetFilename()
189186
<< "\" StartLineNumber=\"" << startLineNumber2
190187
<< "\" EndLineNumber=\"" << endLineNumber2 << "\"/>"
191188
<< std::endl;
192-
outFile
189+
out
193190
<< " <lines xml:space=\"preserve\">"
194191
<< std::endl;
195192
for (int j = 0; j < count; j++) {
@@ -208,27 +205,27 @@ namespace {
208205
// > --> &gt;
209206
StringUtil::StrSub(tmpstr, "&gt;", ">", -1);
210207

211-
outFile << " <line Text=\"" << tmpstr << "\"/>" << std::endl;
208+
out << " <line Text=\"" << tmpstr << "\"/>" << std::endl;
212209
duplicateLines++;
213210
}
214211

215-
outFile << " </lines>" << std::endl;
216-
outFile << " </set>" << std::endl;
212+
out << " </lines>" << std::endl;
213+
out << " </set>" << std::endl;
217214
} else {
218-
outFile
215+
out
219216
<< source1.GetFilename()
220217
<< "(" << source1.GetLine(line1).GetLineNumber() << ")"
221218
<< std::endl;
222-
outFile
219+
out
223220
<< source2.GetFilename()
224221
<< "(" << source2.GetLine(line2).GetLineNumber() << ")"
225222
<< std::endl;
226223
for (int j = 0; j < count; j++) {
227-
outFile << source1.GetLine(j + line1).GetLine() << std::endl;
224+
out << source1.GetLine(j + line1).GetLine() << std::endl;
228225
duplicateLines++;
229226
}
230227

231-
outFile << std::endl;
228+
out << std::endl;
232229
}
233230

234231
return duplicateLines;
@@ -237,21 +234,21 @@ namespace {
237234
ProcessResult Process(
238235
const SourceFile& source1,
239236
const SourceFile& source2,
240-
std::vector<MatchType>& matrix,
237+
std::vector<bool>& matrix,
241238
const Options& options,
242239
std::ostream& outFile) {
243240
size_t m = source1.GetNumOfLines();
244241
size_t n = source2.GetNumOfLines();
245242

246243
// Reset matrix data
247-
std::fill(std::begin(matrix), std::begin(matrix) + m * n, MatchType::NONE);
244+
std::fill(std::begin(matrix), std::begin(matrix) + m * n, false);
248245

249246
// Compute matrix
250247
for (size_t y = 0; y < m; y++) {
251248
auto& line = source1.GetLine(y);
252249
for (size_t x = 0; x < n; x++) {
253250
if (line == source2.GetLine(x)) {
254-
matrix[x + n * y] = MatchType::MATCH;
251+
matrix[x + n * y] = true;
255252
}
256253
}
257254
}
@@ -273,7 +270,7 @@ namespace {
273270
unsigned seqLen = 0;
274271
size_t maxX = std::min(n, m - y);
275272
for (size_t x = 0; x < maxX; x++) {
276-
if (matrix[x + n * (y + x)] == MatchType::MATCH) {
273+
if (matrix[x + n * (y + x)]) {
277274
seqLen++;
278275
} else {
279276
if (seqLen >= lMinBlockSize) {
@@ -321,7 +318,7 @@ namespace {
321318
unsigned seqLen = 0;
322319
size_t maxY = std::min(m, n - x);
323320
for (size_t y = 0; y < maxY; y++) {
324-
if (matrix[x + y + n * y] == MatchType::MATCH) {
321+
if (matrix[x + y + n * y]) {
325322
seqLen++;
326323
} else {
327324
if (seqLen >= lMinBlockSize) {
@@ -360,9 +357,27 @@ namespace {
360357
}
361358

362359
void Duplo::Run(const Options& options) {
363-
std::ofstream outfile(
364-
options.GetOutputFilename().c_str(), std::ios::out | std::ios::binary);
365-
if (!outfile) {
360+
std::streambuf* buf;
361+
std::streambuf* logbuf;
362+
std::ofstream of;
363+
if (options.GetOutputFilename() == "-") {
364+
buf = std::cout.rdbuf();
365+
if (options.GetOutputXml() == false) {
366+
logbuf = std::cout.rdbuf();
367+
}
368+
else {
369+
logbuf = 0;
370+
}
371+
}
372+
else {
373+
of.open(options.GetOutputFilename().c_str(), std::ios::out | std::ios::binary);
374+
buf = of.rdbuf();
375+
logbuf = std::cout.rdbuf();
376+
}
377+
378+
std::ostream out(buf);
379+
std::ostream log(logbuf);
380+
if (!out) {
366381
std::ostringstream stream;
367382
stream
368383
<< "Error: Can't open file: "
@@ -371,19 +386,22 @@ void Duplo::Run(const Options& options) {
371386
throw std::runtime_error(stream.str().c_str());
372387
}
373388

374-
std::cout << "Loading and hashing files ... " << std::flush;
389+
log << "Loading and hashing files ... " << std::flush;
375390

376391
if (options.GetOutputXml()) {
377-
outfile
392+
out
378393
<< "<?xml version=\"1.0\"?>"
379394
<< std::endl
380395
<< "<duplo>"
381396
<< std::endl;
382397
}
383398

384399
auto lines = LoadFileList(options.GetListFilename());
385-
auto [sourceFiles, matrix, files, locsTotal] =
386-
LoadSourceFiles(lines, options.GetMinChars(), options.GetIgnorePrepStuff());
400+
auto [sourceFiles, matrix, files, locsTotal] = LoadSourceFiles(
401+
lines,
402+
options.GetMinChars(),
403+
options.GetIgnorePrepStuff(),
404+
log);
387405
auto numFilesToCheck = options.GetFilesToCheck() > 0 ? std::min(options.GetFilesToCheck(), sourceFiles.size()): sourceFiles.size();
388406

389407
// hash maps
@@ -410,14 +428,13 @@ void Duplo::Run(const Options& options) {
410428
}
411429
}
412430

413-
std::cout << left.GetFilename();
414431
ProcessResult processResult =
415432
Process(
416433
left,
417434
left,
418435
matrix,
419436
options,
420-
outfile);
437+
out);
421438

422439
// files to compare are those that have matching lines
423440
for (unsigned j = i + 1; j < sourceFiles.size(); j++) {
@@ -430,25 +447,31 @@ void Duplo::Run(const Options& options) {
430447
right,
431448
matrix,
432449
options,
433-
outfile);
450+
out);
434451
}
435452
}
436453

437-
if (processResult.Blocks() > 0) {
438-
std::cout << " found: " << processResult.Blocks() << " block(s)" << std::endl;
439-
} else {
440-
std::cout << " nothing found." << std::endl;
454+
if (options.GetOutputXml() == false) {
455+
if (processResult.Blocks() > 0) {
456+
log
457+
<< left.GetFilename()
458+
<< " found: " << processResult.Blocks() << " block(s)" << std::endl;
459+
} else {
460+
log
461+
<< left.GetFilename()
462+
<< " nothing found." << std::endl;
463+
}
441464
}
442465

443466
processResultTotal << processResult;
444467
}
445468

446469
if (options.GetOutputXml()) {
447-
outfile
470+
out
448471
<< "</duplo>"
449472
<< std::endl;
450473
} else {
451-
outfile
474+
out
452475
<< "Configuration:"
453476
<< std::endl
454477
<< " Number of files: "

src/Main.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ namespace {
6666
std::cout << " -d ignore file pairs with same name\n";
6767
std::cout << " -xml output file in XML\n";
6868
std::cout << " INPUT_FILELIST input filelist (specify '-' to read from stdin)\n";
69-
std::cout << " OUTPUT_FILE output file\n";
69+
std::cout << " OUTPUT_FILE output file (specify '-' to output to stdout)\n";
7070

7171
std::cout << "\nVERSION\n";
7272
std::cout << " " << VERSION << "\n";
File renamed without changes.

0 commit comments

Comments
 (0)