Skip to content

Commit 501495d

Browse files
authored
Remove Cherokee bible from churchio (#94)
1 parent fe72a30 commit 501495d

2 files changed

Lines changed: 1 addition & 15 deletions

File tree

Lib/corpuscrawler/crawl_chr.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,4 @@
2020
def crawl(crawler):
2121
out = crawler.get_output(language='chr')
2222
crawl_udhr(crawler, out, filename='udhr_chr_uppercase.txt')
23-
crawler.crawl_churchio(out, bible_id='chr-cherokee')
23+
# TODO(#93): Add a Cherokee bible from a different source

Lib/corpuscrawler/util.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -358,20 +358,6 @@ def crawl_abc_net_au(self, out, program_id):
358358
out.write('# Publication-Date: %s\n' % pubdate)
359359
out.write('\n'.join(paras) + '\n')
360360

361-
def crawl_churchio(self, out, bible_id):
362-
url = ('https://raw.githubusercontent.com/churchio/open-bibles/' +
363-
('master/%s.usfx.xml' % bible_id))
364-
doc = etree.fromstring(self.fetch(url).content)
365-
for book in doc.findall('book'):
366-
paras = ''.join(book.itertext()).splitlines()
367-
paras = [' '.join(p.split()) for p in paras]
368-
out.write('# Location: %s#%s\n' % (url, book.attrib['id']))
369-
out.write('# Genre: Religion\n')
370-
out.write(
371-
'# License: '
372-
'https://creativecommons.org/publicdomain/mark/1.0/\n')
373-
out.write('\n'.join(paras) + '\n')
374-
375361
def crawl_aps_dz(self, out, prefix):
376362
urls = set()
377363
if prefix in {'arb/'}:

0 commit comments

Comments
 (0)