diff --git a/cmd/gedcom/diff.go b/cmd/gedcom/diff.go index c5e0b97..1c12377 100644 --- a/cmd/gedcom/diff.go +++ b/cmd/gedcom/diff.go @@ -80,6 +80,8 @@ func runDiffCommand() { "The Google Analytics ID, like 'UA-78454410-2'.") flag.BoolVar(&optionProgress, "progress", false, "Show progress bar.") + //No reference because it is currently being accessed in SimpleNode.Equals(), so the only way to pass it to Equals() would be to pass it in every single SimpleNode. + flag.Bool("ancestry-source-matching", false, "Match Ancestry.com sources by Ancestry Source ID (_APID) instead of default matching algorithm (GEDCOM SOUR).") flag.IntVar(&optionJobs, "jobs", 1, util.CLIDescription(`Number of jobs to run in parallel. If you are comparing large trees this will make the process @@ -279,8 +281,7 @@ func runDiffCommand() { diffProgress := make(chan gedcom.Progress) page := html.NewDiffPage(comparisons, filterFlags, optionGoogleAnalyticsID, - optionShow, optionSort, diffProgress, compareOptions, html.LivingVisibilityShow) - + optionShow, optionSort, diffProgress, compareOptions, html.LivingVisibilityShow, optionLeftGedcomFile, optionRightGedcomFile) go func() { _, err = page.WriteHTMLTo(out) if err != nil { diff --git a/html/core/page.go b/html/core/page.go index 1d92edf..0f99877 100644 --- a/html/core/page.go +++ b/html/core/page.go @@ -31,6 +31,7 @@ func (c *Page) WriteHTMLTo(w io.Writer) (int64, error) { +
`) diff --git a/html/core/table.go b/html/core/table.go index bfa533b..2f491e0 100644 --- a/html/core/table.go +++ b/html/core/table.go @@ -18,7 +18,7 @@ func NewTable(tableClass string, content ...Component) *Table { } func (c *Table) WriteHTMLTo(w io.Writer) (int64, error) { - n := appendSprintf(w, ``, c.tableClass) + n := appendSprintf(w, `
`, c.tableClass) n += appendComponent(w, NewComponents(c.content...)) n += appendString(w, "
") diff --git a/html/diff_page.go b/html/diff_page.go index 47c39ad..7f73fea 100644 --- a/html/diff_page.go +++ b/html/diff_page.go @@ -33,9 +33,11 @@ type DiffPage struct { progress chan gedcom.Progress compareOptions *gedcom.IndividualNodesCompareOptions visibility LivingVisibility + leftGedcomPath string + rightGedcomPath string } -func NewDiffPage(comparisons gedcom.IndividualComparisons, filterFlags *gedcom.FilterFlags, googleAnalyticsID string, show, sort string, progress chan gedcom.Progress, compareOptions *gedcom.IndividualNodesCompareOptions, visibility LivingVisibility) *DiffPage { +func NewDiffPage(comparisons gedcom.IndividualComparisons, filterFlags *gedcom.FilterFlags, googleAnalyticsID string, show, sort string, progress chan gedcom.Progress, compareOptions *gedcom.IndividualNodesCompareOptions, visibility LivingVisibility, leftGedcomPath string, rightGedcomPath string) *DiffPage { return &DiffPage{ comparisons: comparisons, filterFlags: filterFlags, @@ -45,6 +47,8 @@ func NewDiffPage(comparisons gedcom.IndividualComparisons, filterFlags *gedcom.F progress: progress, compareOptions: compareOptions, visibility: visibility, + leftGedcomPath: leftGedcomPath, + rightGedcomPath: rightGedcomPath, } } @@ -178,7 +182,11 @@ func (c *DiffPage) WriteHTMLTo(w io.Writer) (int64, error) { } // The index at the top of the page. - rows := []core.Component{} + var rows []core.Component + numOnlyLeft := 0 + numOnlyRight := 0 + numSimilar := 0 + numEqual := 0 for _, comparison := range precalculatedComparisons { weightedSimilarity := c.weightedSimilarity(comparison.comparison) @@ -186,43 +194,51 @@ func (c *DiffPage) WriteHTMLTo(w io.Writer) (int64, error) { rightClass := "" switch { - case comparison.comparison.Left != nil && comparison.comparison.Right == nil: + case comparison.comparison.Left != nil && comparison.comparison.Right == nil: //right is missing leftClass = "bg-warning" + numOnlyLeft++ - case comparison.comparison.Left == nil && comparison.comparison.Right != nil: + case comparison.comparison.Left == nil && comparison.comparison.Right != nil: //left is missing rightClass = "bg-primary" + numOnlyRight++ - case weightedSimilarity < 1: + case weightedSimilarity < 1: //neither are missing, but they aren't identical leftClass = "bg-info" rightClass = "bg-info" + numSimilar++ - case c.filterFlags.HideEqual: + case c.filterFlags.HideEqual: //are identical, but user said to hide equals + numEqual++ continue + default: + numEqual++ } + rows = append(rows, c.getRow(comparison, leftClass, rightClass, weightedSimilarity)) + } - leftNameAndDates := NewIndividualNameAndDatesLink(comparison.comparison.Left, c.visibility, "") - rightNameAndDates := NewIndividualNameAndDatesLink(comparison.comparison.Right, c.visibility, "") - - left := core.NewTableCell(leftNameAndDates).Class(leftClass) - right := core.NewTableCell(rightNameAndDates).Class(rightClass) - - middle := core.NewTableCell(core.NewText("")) - if weightedSimilarity != 0 { - similarityString := fmt.Sprintf("%.2f%%", weightedSimilarity*100) - middle = core.NewTableCell(core.NewText(similarityString)). - Class("text-center " + leftClass) - } - - tableRow := core.NewTableRow(left, middle, right) - - rows = append(rows, tableRow) + leftHeader := fmt.Sprint(c.leftGedcomPath, " (", numOnlyLeft, " only in left)") + rightHeader := fmt.Sprint(c.rightGedcomPath, " (", numOnlyRight, " only in right)") + class := "text-center" + attr := map[string]string{} + headerTag := "h5" + wereHidden := "" + if c.filterFlags.HideEqual { + wereHidden = " - were hidden" } + middleHeader := fmt.Sprint("Similarity score", " (", numSimilar, " similar, and ", numEqual, " equal", wereHidden, ")") + header := []core.Component{core.NewTableRow( + core.NewTableCell( + core.NewTag(headerTag, attr, core.NewText(leftHeader))).Class(class), + core.NewTableCell( + core.NewTag(headerTag, attr, core.NewText(middleHeader))).Class(class), + core.NewTableCell( + core.NewTag(headerTag, attr, core.NewText(rightHeader))).Class(class))} // Individual pages components := []core.Component{ core.NewSpace(), core.NewCard(core.NewText("Individuals"), core.CardNoBadgeCount, - core.NewTable("", rows...)), + core.NewTable("", append(header, rows...)...)), core.NewSpace(), } for _, comparison := range precalculatedComparisons { @@ -236,6 +252,24 @@ func (c *DiffPage) WriteHTMLTo(w io.Writer) (int64, error) { ).WriteHTMLTo(w) } +func (c *DiffPage) getRow(comparison *IndividualCompare, leftClass string, rightClass string, weightedSimilarity float64) *core.TableRow { + + leftNameAndDates := NewIndividualNameAndDatesLink(comparison.comparison.Left, c.visibility, "") + rightNameAndDates := NewIndividualNameAndDatesLink(comparison.comparison.Right, c.visibility, "") + + left := core.NewTableCell(leftNameAndDates).Class(leftClass) + right := core.NewTableCell(rightNameAndDates).Class(rightClass) + + middle := core.NewTableCell(core.NewText("")) + if weightedSimilarity != 0 { + similarityString := fmt.Sprintf("%.2f%%", weightedSimilarity*100) + middle = core.NewTableCell(core.NewText(similarityString)). + Class("text-center " + leftClass) + } + + return core.NewTableRow(left, middle, right) +} + func (c *DiffPage) shouldSkip(comparison *IndividualCompare) bool { switch c.show { case DiffPageShowAll: diff --git a/html/diff_page_test.go b/html/diff_page_test.go index a479ecc..4450edb 100644 --- a/html/diff_page_test.go +++ b/html/diff_page_test.go @@ -44,7 +44,7 @@ func TestDiffPage_WriteHTMLTo(t *testing.T) { compareOptions := gedcom.NewIndividualNodesCompareOptions() component := html.NewDiffPage(comparisons, filterFlags, googleAnalyticsID, html.DiffPageShowAll, html.DiffPageSortHighestSimilarity, nil, - compareOptions, html.LivingVisibilityPlaceholder) + compareOptions, html.LivingVisibilityPlaceholder, "left", "right") buf := bytes.NewBuffer(nil) component.WriteHTMLTo(buf) diff --git a/node_diff.go b/node_diff.go index 79bb9a6..e7c5058 100644 --- a/node_diff.go +++ b/node_diff.go @@ -137,7 +137,6 @@ type NodeDiff struct { // R 2 DATE Aft. 2001 // R 2 PLAC Surry, England // R 1 NAME J. /Smith/ -// func CompareNodes(left, right Node) *NodeDiff { result := &NodeDiff{} @@ -263,7 +262,6 @@ func (nd *NodeDiff) String() string { // LR 1 DEAT | true // LR 2 PLAC England | true // R 1 NAME J. /Smith/ | false -// func (nd *NodeDiff) IsDeepEqual() bool { leftIsNil := IsNil(nd.Left) rightIsNil := IsNil(nd.Right) @@ -345,7 +343,7 @@ func (nd *NodeDiff) LeftNode() Node { // RightNode returns the flattening Node value that favors the right side. // -// To favor means to return the Left value when both the Left and Right are set. +// To favor means to return the Right value when both the Left and Right are set. func (nd *NodeDiff) RightNode() Node { n := nd.Right diff --git a/simple_node.go b/simple_node.go index 62553c8..1ecfdc5 100644 --- a/simple_node.go +++ b/simple_node.go @@ -3,6 +3,7 @@ package gedcom import ( "bytes" "encoding/json" + "flag" "fmt" "sync" ) @@ -61,7 +62,7 @@ func (node *SimpleNode) Identifier() string { if node == nil { return "" } - + return fmt.Sprintf("@%s@", node.pointer) } @@ -88,6 +89,22 @@ func (node *SimpleNode) Equals(node2 Node) bool { return false } + useAncestrySourceMatching := flag.Lookup("ancestry-source-matching").Value.String() //indexes a map CommandLine.formal + //if both Ancestry sources, only check if their _APID is the same + if useAncestrySourceMatching == "true" && node.Tag().String() == "Source" && tag.String() == "Source" { + if node.Value() == node2.Value() { //if they have the same source id, then no need to check the apid + return true + } + for _, leftNode := range node.Nodes() { + for _, rightNode := range node2.Nodes() { + if leftNode.Tag().String() == "_APID" && + rightNode.Tag().String() == "_APID" && + rightNode.Value() == leftNode.Value() { + return true + } + } + } + } value := node2.Value() if node.value != value { return false diff --git a/simple_node_test.go b/simple_node_test.go index 541acf3..ae7ca75 100644 --- a/simple_node_test.go +++ b/simple_node_test.go @@ -64,6 +64,51 @@ func TestSimpleNode_Equals(t *testing.T) { } } +func TestAncestryNode_Equals(t *testing.T) { + //test when source ids are the same + original := GetAncestryIndividual("@S291470533@", "@S291470520@", "@S291470520@", "@S291470520@", "@S291470520@") + assert.True(t, gedcom.DeepEqualNodes(gedcom.newDocumentFromString(original).Nodes(), gedcom.newDocumentFromString(original).Nodes())) + + //test when source ids are different, but _apid stays the same + left := gedcom.newDocumentFromString(GetAncestryIndividual("@S222222222@", "@S444444444@", "@S666666666@", "@S888888888@", "@S111111111@")) + right := gedcom.newDocumentFromString(GetAncestryIndividual("@S333333333@", "@S555555555@", "@S777777777@", "@S999999999@", "@S000000000@")) + assert.True(t, gedcom.DeepEqualNodes(left.Nodes(), right.Nodes())) +} + +// This is an actual gedcom entry, hope that's ok. +func GetAncestryIndividual(source1 string, source2 string, source3 string, source4 string, source5 string) string { + return "0 @I152151456706@ INDI" + + "\n1 NAME Jacob /Yourow/" + + "\n2 GIVN Jacob" + + "\n2 SURN Yourow" + + "\n2 SOUR " + source1 + + "\n3 PAGE New York City Municipal Archives; New York, New York; Borough: Manhattan; Volume Number: 13" + + "\n3 _APID 1,61406::6159341" + + "\n2 SOUR " + source2 + + "\n3 PAGE Year: 1930; Census Place: Bronx, Bronx, New York; Page: 42A; Enumeration District: 0430; FHL microfilm: 2341213" + + "\n3 _APID 1,6224::30826480" + + "\n1 SEX M" + + "\n1 FAMS @F89@" + + "\n1 BIRT" + + "\n2 DATE abt 1888" + + "\n2 PLAC Russia" + + "\n2 SOUR " + source3 + + "\n3 PAGE Year: 1930; Census Place: Bronx, Bronx, New York; Page: 42A; Enumeration District: 0430; FHL microfilm: 2341213" + + "\n3 _APID 1,6224::30826480" + + "\n1 EVEN" + + "\n2 TYPE Arrival" + + "\n2 DATE 1905" + + "\n2 SOUR " + source4 + + "\n3 PAGE Year: 1930; Census Place: Bronx, Bronx, New York; Page: 42A; Enumeration District: 0430; FHL microfilm: 2341213" + + "\n3 _APID 1,6224::30826480" + + "\n1 RESI Marital Status: Married; Relation to Head: Head" + + "\n2 DATE 1930" + + "\n2 PLAC Bronx, Bronx, New York, USA" + + "\n2 SOUR " + source5 + + "\n3 PAGE Year: 1930; Census Place: Bronx, Bronx, New York; Page: 42A; Enumeration District: 0430; FHL microfilm: 2341213" + + "\n3 _APID 1,6224::30826480" +} + func TestSimpleNode_Tag(t *testing.T) { Tag := tf.Function(t, (*gedcom.SimpleNode).Tag)