Skip to content

Commit e034b63

Browse files
committed
Handle more complex data
1 parent 39841ab commit e034b63

File tree

3 files changed

+31
-11
lines changed

3 files changed

+31
-11
lines changed

bin/extract

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,13 @@ sub extract_family_info
118118
my $children_text = $1;
119119
$children_text =~ s/, grandmother.+//;
120120
$family{children} = extract_people_section($children_text);
121+
} elsif($text =~ /sons,?\s+([a-z]+)\s+and\s+([a-z]+)/i) {
122+
my @children;
123+
push @children, { name => $1, sex => 'M' }, { name => $2, sex => 'M' };
124+
if($text =~ /daughter,?\s([a-z]+)/i) {
125+
push @children, { 'name' => $1, 'sex' => 'F' }
126+
}
127+
$family{children} = \@children if @children;
121128
} else {
122129
my @children;
123130
while($text =~ /\b(son|daughter)s?,\s*([A-Z][a-z]+(?:\s+\([A-Z][a-z]+\))?)\s*(?:and their children ([^.;]+))?/g) {
@@ -127,8 +134,11 @@ sub extract_family_info
127134
if(my @grandchildren = $grandkids ? split /\s*,\s*|\s+and\s+/, $grandkids : ()) {
128135
push @children, {
129136
name => $child,
137+
sex => $sex,
130138
grandchildren => \@grandchildren,
131139
};
140+
} elsif(($sex eq 'F') && ($child =~ /(.+)\s+\((.+)\)/)) {
141+
push @children, { name => $1, sex => 'F', spouse => { name => $2, sex => 'M' } }
132142
} else {
133143
push @children, { name => $child, sex => $sex }
134144
}

gedcom

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13685,6 +13685,13 @@ sub extract_family_info
1368513685
my $children_text = $1;
1368613686
$children_text =~ s/, grandmother.+//;
1368713687
$family{children} = extract_people_section($children_text);
13688+
} elsif($text =~ /sons,?\s+([a-z]+)\s+and\s+([a-z]+)/i) {
13689+
my @children;
13690+
push @children, { name => $1, sex => 'M' }, { name => $2, sex => 'M' };
13691+
if($text =~ /daughter,?\s([a-z]+)/i) {
13692+
push @children, { 'name' => $1, 'sex' => 'F' }
13693+
}
13694+
$family{children} = \@children if @children;
1368813695
} else {
1368913696
my @children;
1369013697
while($text =~ /\b(son|daughter)s?,\s*([A-Z][a-z]+(?:\s+\([A-Z][a-z]+\))?)\s*(?:and their children ([^.;]+))?/g) {
@@ -13694,8 +13701,11 @@ sub extract_family_info
1369413701
if(my @grandchildren = $grandkids ? split /\s*,\s*|\s+and\s+/, $grandkids : ()) {
1369513702
push @children, {
1369613703
name => $child,
13704+
sex => $sex,
1369713705
grandchildren => \@grandchildren,
1369813706
};
13707+
} elsif(($sex eq 'F') && ($child =~ /(.+)\s+\((.+)\)/)) {
13708+
push @children, { name => $1, sex => 'F', spouse => { name => $2, sex => 'M' } }
1369913709
} else {
1370013710
push @children, { name => $child, sex => $sex }
1370113711
}

tests/extract.t

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -141,20 +141,20 @@ cmp_deeply($foo,
141141
}, 'sisters' => [
142142
{ 'name' => 'Phyllis Huestis' }
143143
], 'children' => [
144-
{ 'name' => 'Boyd R', 'sex' => 'M' },
145-
{ 'name' => 'Anne (Malcolm) Maxwell', 'sex' => 'F' }
146-
], 'brothers' => [
144+
{ 'name' => 'Anne', 'sex' => 'F', spouse => { 'name' => 'Malcolm', 'sex' => 'M' } }, # spouse should be 'Malcolm Maxwell'
145+
{ 'name' => 'Boyd', 'sex' => 'M' }, # should be Boyd R
146+
], 'brothers' => [
147147
{
148-
'status' => 'living',
149-
'name' => 'Ralph N.'
148+
'status' => 'living',
149+
'name' => 'Ralph N.'
150150
}
151-
], 'death' => {
151+
], 'death' => {
152152
'place' => 'home',
153153
'date' => 'April 16, 2007'
154-
}, 'birth' => {
154+
}, 'birth' => {
155155
'place' => 'Fredericton',
156156
'date' => 'Jul 19, 1925'
157-
}, 'spouse' => [
157+
}, 'spouse' => [
158158
{
159159
'name' => 'Ruth Howland',
160160
'married' => {
@@ -189,9 +189,9 @@ diag(Data::Dumper->new([$foo])->Dump()) if($ENV{'TEST_VERBOSE'});
189189
cmp_deeply($foo,
190190
{
191191
'children' => [
192-
{ 'name' => 'Christopher Cloud', 'sex' => 'M' },
193-
{ 'name' => 'Thomas Cloud', 'sex' => 'M' },
194-
{ 'name' => 'Marsha Cloud', 'sex' => 'F' }
192+
{ 'name' => 'Christopher', 'sex' => 'M' },
193+
{ 'name' => 'Thomas', 'sex' => 'M' },
194+
{ 'name' => 'Marsha', 'sex' => 'F' }
195195
],
196196
'birth' => {
197197
'date' => '1937/05/21',

0 commit comments

Comments
 (0)