Skip to content

Commit 6ad81a4

Browse files
committed
refactor getNextRow and rename some SourceStats attributes
1 parent 4fe3580 commit 6ad81a4

File tree

5 files changed

+61
-54
lines changed

5 files changed

+61
-54
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -631,7 +631,7 @@ This returns the change stats once completed.
631631
The options parameter can be either a standard output (console, null), a string filename, a URL or an OutputOptions.
632632
633633
Note that it can throw the UnorderedStreamsError exception if it detects that the streams are not properly ordered by the specified keys.
634-
Note that it can throw the UniqueKeyViolationError exception if it detects that a stream has duplicate keys wich violates the primary keys specified in the options.
634+
Note that it can throw the UniqueKeyViolationError exception if it detects that a stream has duplicate keys which violates the primary keys specified in the options.
635635
636636
### DifferContext methods
637637
@@ -666,14 +666,14 @@ This returns the change stats once completed.
666666
The options parameter can be either a standard output (console, null), a string filename, a URL or an OutputOptions.
667667
668668
Note that it can throw the UnorderedStreamsError exception if it detects that the streams are not properly ordered by the specified keys.
669-
Note that it can throw the UniqueKeyViolationError exception if it detects that a stream has duplicate keys wich violates the primary keys specified in the options.
669+
Note that it can throw the UniqueKeyViolationError exception if it detects that a stream has duplicate keys which violates the primary keys specified in the options.
670670
671671
#### diffs
672672
673673
Enumerates the differences between the old and new sources.
674674
675675
Note that it can throw the UnorderedStreamsError exception if it detects that the streams are not properly ordered by the specified keys.
676-
Note that it can throw the UniqueKeyViolationError exception if it detects that a stream has duplicate keys wich violates the primary keys specified in the options.
676+
Note that it can throw the UniqueKeyViolationError exception if it detects that a stream has duplicate keys which violates the primary keys specified in the options.
677677
678678
### JSON input format
679679

package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "tabular-data-differ",
3-
"version": "1.1.4",
3+
"version": "1.1.5",
44
"description": "A very efficient library for diffing two sorted streams of tabular data, such as CSV files.",
55
"keywords": [
66
"table",

src/differ.test.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1882,8 +1882,8 @@ Note that you can resolve this conflict automatically using the duplicateKeyHand
18821882
same: 5
18831883
});
18841884
expect(ctx.oldSourceStats).toEqual({
1885-
parsedRows: 12,
1886-
duplicateParsedRows: 3,
1885+
rows: 12,
1886+
duplicateRows: 3,
18871887
uniqueRows: 9,
18881888
uniqueRowsWithDuplicates: 3,
18891889
duplicationPercent: 25,
@@ -1893,8 +1893,8 @@ Note that you can resolve this conflict automatically using the duplicateKeyHand
18931893
averageDuplicatesPerUniqueKey: 1
18941894
});
18951895
expect(ctx.newSourceStats).toEqual({
1896-
parsedRows: 10,
1897-
duplicateParsedRows: 3,
1896+
rows: 10,
1897+
duplicateRows: 3,
18981898
uniqueRows: 7,
18991899
uniqueRowsWithDuplicates: 2,
19001900
duplicationPercent: 30,

src/differ.ts

Lines changed: 51 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,8 @@ export type DuplicateKeyHandler = (rows: Row[]) => Row;
173173
export type DuplicateKeyHandling = 'fail' |'keepFirstRow' | 'keepLastRow' | DuplicateKeyHandler;
174174

175175
export class SourceStats {
176-
parsedRows = 0;
177-
duplicateParsedRows = 0;
176+
rows = 0;
177+
duplicateRows = 0;
178178
uniqueRows = 0;
179179
uniqueRowsWithDuplicates = 0;
180180

@@ -185,12 +185,12 @@ export class SourceStats {
185185
minDuplicatesPerUniqueKey = 0;
186186
averageDuplicatesPerUniqueKey = 0;
187187

188-
incParsedRows() {
189-
this.parsedRows += 1;
188+
incRows() {
189+
this.rows += 1;
190190
}
191191

192-
incDuplicateParsedRows() {
193-
this.duplicateParsedRows += 1;
192+
incDuplicateRows() {
193+
this.duplicateRows += 1;
194194
}
195195

196196
incUniqueRows() {
@@ -212,10 +212,10 @@ export class SourceStats {
212212

213213
calcStats() {
214214
if (this.uniqueRowsWithDuplicates) {
215-
this.averageDuplicatesPerUniqueKey = roundDecimals(this.duplicateParsedRows / this.uniqueRowsWithDuplicates, 4);
215+
this.averageDuplicatesPerUniqueKey = roundDecimals(this.duplicateRows / this.uniqueRowsWithDuplicates, 4);
216216
}
217-
if (this.parsedRows) {
218-
this.duplicationPercent = roundDecimals((this.duplicateParsedRows / this.parsedRows) * 100, 4);
217+
if (this.rows) {
218+
this.duplicationPercent = roundDecimals((this.duplicateRows / this.rows) * 100, 4);
219219
}
220220
if ( this.uniqueRows) {
221221
this.uniqueRowDuplicationPercent = roundDecimals((this.uniqueRowsWithDuplicates / this.uniqueRows) * 100, 4);
@@ -393,7 +393,7 @@ export class Differ {
393393

394394
/**
395395
* Iterates over the changes and sends them to the submitted output.
396-
* @param options a standard ouput such as console or null, a string filename, a URL or a custom OutputOptions.
396+
* @param options a standard output such as console or null, a string filename, a URL or a custom OutputOptions.
397397
* @returns the change stats once all the changes have been processed.
398398
* Note that the stats might be different from getStats() when there is a filter in the output options,
399399
* as the differ stats are updated by the iterator which doesn't have any filter.
@@ -511,7 +511,7 @@ export class DifferContext {
511511

512512
/**
513513
* Iterates over the changes and sends them to the submitted output.
514-
* @param options a standard ouput such as console or null, a string filename, A URL or a custom OutputOptions.
514+
* @param options a standard output such as console or null, a string filename, A URL or a custom OutputOptions.
515515
* @returns the change stats once all the changes have been processed.
516516
* Note that the stats might be different from "DiffContext.stats" when there is a filter in the output options,
517517
* as the context stats are updated by the iterator which doesn't have any filter.
@@ -682,16 +682,53 @@ export class DifferContext {
682682
}
683683
return result;
684684
}
685+
686+
async readDuplicatesOf(source: BufferedFormatReader, stats: SourceStats, row: Row): Promise<Row[]> {
687+
const duplicateRows: Row[] = [];
688+
duplicateRows.push(row);
689+
stats.incUniqueRowsWithDuplicates();
690+
let duplicateCount = 0;
691+
let isDuplicate = true;
692+
while(isDuplicate) {
693+
const duplicateRow = await source.readRow();
694+
if (duplicateRow) {
695+
duplicateCount += 1;
696+
stats.incRows();
697+
stats.incDuplicateRows();
698+
if (this.duplicateKeyHandling !== 'keepFirstRow') {
699+
// we don't need to accumulate duplicate rows when we just have to return the first row!
700+
duplicateRows.push(duplicateRow);
701+
}
702+
if (this.duplicateKeyHandling === 'keepLastRow') {
703+
// we don't need to accumulate the previous rows when we just have to return the last row!
704+
duplicateRows.shift();
705+
}
706+
if (duplicateRows.length > this.duplicateRowBufferSize) {
707+
if (this.options.duplicateRowBufferOverflow) {
708+
// remove the first entry when we can overflow
709+
duplicateRows.shift();
710+
} else {
711+
throw new Error('Too many duplicate rows');
712+
}
713+
}
714+
}
715+
const nextRow = await source.peekRow();
716+
isDuplicate = !!nextRow && this.comparer(this.keys, nextRow, row) === 0;
717+
}
718+
stats.incDuplicates(duplicateCount);
719+
stats.calcStats();
720+
return duplicateRows;
721+
}
685722

686723
async getNextRow(source: BufferedFormatReader, stats: SourceStats): Promise<Row | undefined> {
687724
const row = await source.readRow();
688725
if (!row) {
689726
return row;
690727
}
691-
stats.incParsedRows();
728+
stats.incRows();
692729
stats.incUniqueRows();
693730
if (this.duplicateKeyHandling === 'fail') {
694-
// Note that it will be further processed in ensureRowsAreInAscendingOrder
731+
// Note that it will be further processed in ensureRowsAreInAscendingOrder and throw a UniqueKeyViolationError exception
695732
return row;
696733
}
697734
const nextRow = await source.peekRow();
@@ -700,37 +737,7 @@ export class DifferContext {
700737
}
701738
let isDuplicate = this.comparer(this.keys, nextRow, row) === 0;
702739
if (isDuplicate) {
703-
const duplicateRows: Row[] = [];
704-
duplicateRows.push(row);
705-
stats.incUniqueRowsWithDuplicates();
706-
let duplicateCount = 0;
707-
while(isDuplicate) {
708-
const duplicateRow = await source.readRow();
709-
if (duplicateRow) {
710-
duplicateCount += 1;
711-
stats.incParsedRows();
712-
stats.incDuplicateParsedRows();
713-
if (this.duplicateKeyHandling !== 'keepFirstRow') {
714-
// we don't need to accumulate duplicate rows when we just have to return the first row!
715-
duplicateRows.push(duplicateRow);
716-
}
717-
if (this.duplicateKeyHandling === 'keepLastRow') {
718-
// we don't need to accumulate the previous rows when we just have to return the last row!
719-
duplicateRows.shift();
720-
}
721-
if (duplicateRows.length > this.duplicateRowBufferSize) {
722-
if (this.options.duplicateRowBufferOverflow) {
723-
// remove the first entry when we can overflow
724-
duplicateRows.shift();
725-
} else {
726-
throw new Error('Too many duplicate rows');
727-
}
728-
}
729-
}
730-
const nextRow = await source.peekRow();
731-
isDuplicate = !!nextRow && this.comparer(this.keys, nextRow, row) === 0;
732-
}
733-
stats.incDuplicates(duplicateCount);
740+
const duplicateRows = await this.readDuplicatesOf(source, stats, row);
734741
if (this.duplicateKeyHandling === 'keepFirstRow') {
735742
return duplicateRows[0];
736743
}

0 commit comments

Comments
 (0)