Skip to content

Commit a32041d

Browse files
committed
Extract more info from obits
1 parent 555c717 commit a32041d

File tree

3 files changed

+70
-5
lines changed

3 files changed

+70
-5
lines changed

bin/extract

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,27 @@ sub extract_family_info
127127
$family{children} = \@children if @children;
128128
} else {
129129
my @children;
130+
131+
if($text =~ /\ssons,\s*(.*?);/s) {
132+
my $sons_text = $1;
133+
while($sons_text =~ /([\w. ]+?),\s*([\w. ]+?)(?:\s+and|\z)/g) {
134+
push @children, {
135+
name => $1,
136+
location => $2,
137+
sex => 'M',
138+
};
139+
}
140+
}
141+
if($text =~ /\sdaughters?,\s*Mrs\.\s+(.+?)\s+(\w+),\s+([^;]+)\sand/) {
142+
push @children, {
143+
name => $1,
144+
location => $3,
145+
sex => 'F',
146+
spouse => { 'name' => $2, sex => 'M' }
147+
};
148+
}
149+
$family{children} = \@children if @children;
150+
130151
while($text =~ /\b(son|daughter)s?,\s*([A-Z][a-z]+(?:\s+\([A-Z][a-z]+\))?)\s*(?:and their children ([^.;]+))?/g) {
131152
my $sex = $1 eq 'son' ? 'M' : 'F';
132153
my $child = $2;
@@ -139,7 +160,7 @@ sub extract_family_info
139160
};
140161
} elsif(($sex eq 'F') && ($child =~ /(.+)\s+\((.+)\)/)) {
141162
push @children, { name => $1, sex => 'F', spouse => { name => $2, sex => 'M' } }
142-
} else {
163+
} elsif($child ne 'Mrs') {
143164
push @children, { name => $child, sex => $sex }
144165
}
145166
}
@@ -312,7 +333,7 @@ sub extract_family_info
312333
push @{$family{'spouse'}}, { name => $1 }
313334
} elsif($text =~ /\bsurvived by her husband ([^.,;]+)/i) {
314335
push @{$family{'spouse'}}, { name => $1, 'status' => 'living', 'sex' => 'M' }
315-
} elsif($text =~ /\bsurvived by his wife ([^.,;]+)/i) {
336+
} elsif($text =~ /\bsurvived by his wife[,\s]+([^.,;]+)/i) {
316337
push @{$family{'spouse'}}, { name => $1, 'status' => 'living', 'sex' => 'F' }
317338
}
318339

gedcom

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13694,6 +13694,27 @@ sub extract_family_info
1369413694
$family{children} = \@children if @children;
1369513695
} else {
1369613696
my @children;
13697+
13698+
if($text =~ /\ssons,\s*(.*?);/s) {
13699+
my $sons_text = $1;
13700+
while($sons_text =~ /([\w. ]+?),\s*([\w. ]+?)(?:\s+and|\z)/g) {
13701+
push @children, {
13702+
name => $1,
13703+
location => $2,
13704+
sex => 'M',
13705+
};
13706+
}
13707+
}
13708+
if($text =~ /\sdaughters?,\s*Mrs\.\s+(.+?)\s+(\w+),\s+([^;]+)\sand/) {
13709+
push @children, {
13710+
name => $1,
13711+
location => $3,
13712+
sex => 'F',
13713+
spouse => { 'name' => $2, sex => 'M' }
13714+
};
13715+
}
13716+
$family{children} = \@children if @children;
13717+
1369713718
while($text =~ /\b(son|daughter)s?,\s*([A-Z][a-z]+(?:\s+\([A-Z][a-z]+\))?)\s*(?:and their children ([^.;]+))?/g) {
1369813719
my $sex = $1 eq 'son' ? 'M' : 'F';
1369913720
my $child = $2;
@@ -13706,7 +13727,7 @@ sub extract_family_info
1370613727
};
1370713728
} elsif(($sex eq 'F') && ($child =~ /(.+)\s+\((.+)\)/)) {
1370813729
push @children, { name => $1, sex => 'F', spouse => { name => $2, sex => 'M' } }
13709-
} else {
13730+
} elsif($child ne 'Mrs') {
1371013731
push @children, { name => $child, sex => $sex }
1371113732
}
1371213733
}
@@ -13879,7 +13900,7 @@ sub extract_family_info
1387913900
push @{$family{'spouse'}}, { name => $1 }
1388013901
} elsif($text =~ /\bsurvived by her husband ([^.,;]+)/i) {
1388113902
push @{$family{'spouse'}}, { name => $1, 'status' => 'living', 'sex' => 'M' }
13882-
} elsif($text =~ /\bsurvived by his wife ([^.,;]+)/i) {
13903+
} elsif($text =~ /\bsurvived by his wife[,\s]+([^.,;]+)/i) {
1388313904
push @{$family{'spouse'}}, { name => $1, 'status' => 'living', 'sex' => 'F' }
1388413905
}
1388513906

tests/extract.t

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,30 @@ From bmdsonline.co.uk: MAXEY (Dudley). Peacefully at home on 1st March, Hilary
220220
STR
221221

222222
$foo = extract_family_info($str);
223-
diag(Data::Dumper->new([$foo])->Dump());
223+
diag(Data::Dumper->new([$foo])->Dump()) if($ENV{'TEST_VERBOSE'});
224+
225+
cmp_deeply($foo,
226+
{
227+
'grandchildren' => [
228+
'Dale',
229+
'Caitlin',
230+
'Aidan'
231+
], 'aunt' => [
232+
{ 'name' => 'Winnie' }
233+
], 'children' => [
234+
{ 'name' => 'Noel' },
235+
{ 'name' => 'Sarah' }
236+
], 'funeral' => {
237+
'date' => 'Wednesday 9th March',
238+
'location' => 'Preston Cemetery',
239+
'time' => '1.15pm'
240+
}, 'siblings' => [
241+
{ 'name' => 'Gillian' },
242+
{ 'name' => 'Adrian' }
243+
]
244+
}
245+
);
246+
224247

225248
$str = <<'STR';
226249
Fort Wayne Journal Gazette, 20 February 1977: Word has been received of the death of Charles F. Harris, 72, of 2717 Lynn Ave. He died at the Fort Myers (Fla.) Community Hospital after a two week illness. Mr. Harris was a native of Fort Wayne, and had lived here most of his life. He retired from International Harvester Co. in 1965 after 31 years' service. He is survived by his wife, Ruth; two sons, Jack R., Grabill and Ralph E., Yoder; one daughter, Mrs. Arlene J. Gevara, Fort Wayne and one sister, Mrs. Alice Duncan, Englewood, Fla. Services will be at 10 a.m. Wednesday at D. O. McComb & sons Lakeside Park Funeral Home, with calling from 7 to 9 p.m. Tuesday. Burial will be in Prairie Grove Cemetery

0 commit comments

Comments
 (0)