Skip to content

Commit a22eb75

Browse files
authored
Merge pull request #11 from freud-digital/man_status
json and tei update: status, doc type, man type
2 parents 2e613bb + 18e6034 commit a22eb75

File tree

2 files changed

+73
-5
lines changed

2 files changed

+73
-5
lines changed

freud_api_crawler/freud_api_crawler.py

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import time
55
import glob
66
from collections import defaultdict
7+
from datetime import datetime
78

89
import requests
910
import lxml.etree as ET
@@ -393,12 +394,54 @@ def process_page(self, page_json):
393394

394395
def get_man_json_dump(self, lmt=True):
395396
json_dump = {}
396-
json_dump["id"] = f"bibl__{self.manifestation_id}"
397-
json_dump["browser_url"] = f"{self.browser}{self.manifestation_folder}"
397+
json_dump['id'] = f"bibl__{self.manifestation_id}"
398+
json_dump['browser_url'] = f"{self.browser}{self.manifestation_folder}"
398399
man_type = self.manifestation['data']['type'].replace('--', '/')
399-
json_dump["url"] = f"{self.endpoint}{man_type}/{self.manifestation_id}"
400+
json_dump['url'] = f"{self.endpoint}{man_type}/{self.manifestation_id}"
400401
json_dump['man_title'] = self.md__title
401402
json_dump['signature'] = self.manifestation_signatur
403+
try:
404+
field_status = self.manifestation['data']['attributes']['field_status_umschrift']
405+
if field_status == 2:
406+
field_name = "complete"
407+
field_status = str(field_status)
408+
elif field_status is None:
409+
field_name = "proposed"
410+
field_status = "0"
411+
else:
412+
field_name = "undefined"
413+
field_status = str(field_status)
414+
d = datetime.now()
415+
dt = f"{d.year}-{d.month}-{d.day}"
416+
json_dump['status'] = {
417+
"id": field_status,
418+
"name": field_name,
419+
"date": dt
420+
}
421+
except (KeyError, TypeError):
422+
json_dump['status'] = {}
423+
try:
424+
man_type_name = self.manifestation_typ['data']['attributes']['name']
425+
man_type_id = self.manifestation_typ['data']['id']
426+
man_type_type = self.manifestation_typ['data']['type']
427+
json_dump['man_doc_type'] = {
428+
"id": man_type_id,
429+
"type": man_type_type,
430+
"name": man_type_name
431+
}
432+
except (KeyError, TypeError):
433+
json_dump['man_doc_type'] = {}
434+
try:
435+
doc_type_name = self.doc_component['data']['attributes']['name']
436+
doc_type_id = self.doc_component['data']['id']
437+
doc_type_type = self.doc_component['data']['type']
438+
json_dump['doc_component'] = {
439+
"id": doc_type_id,
440+
"type": doc_type_type,
441+
"name": doc_type_name
442+
}
443+
except (KeyError, TypeError):
444+
json_dump['doc_component'] = {}
402445
try:
403446
json_dump['note_i'] = self.manifestation['data']['attributes']['field_anmerkung_intern_']['processed']
404447
except (KeyError, TypeError):
@@ -990,6 +1033,8 @@ def __init__(
9901033
self.sprache = self.get_fields_any('field_sprache')
9911034
self.type = self.get_fields_any('field_publication_type')
9921035
self.edition = self.get_fields_any('field_edition')
1036+
self.doc_component = self.get_fields_any('field_doc_component')
1037+
self.manifestation_typ = self.get_fields_any('field_manifestation_typ')
9931038
# first level publication
9941039
self.publication = self.get_fields_any('field_published_in')
9951040
self.pub_publisher = self.get_fields_any_any('field_publisher', self.publication)

freud_api_crawler/templates/tei.xml

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,13 +79,23 @@
7979
</licence>
8080
</availability>
8181
</publicationStmt>
82-
{% if man.note_i %}
8382
<notesStmt>
83+
{% if man.note_i %}
8484
<note type="comment_intern">
8585
{{ man.note_i }}
8686
</note>
87+
{% endif %}
88+
{% if man.doc_component %}
89+
<note type="doc_component">
90+
{{ man.doc_component.name }}
91+
</note>
92+
{% endif %}
93+
{% if man.man_doc_type %}
94+
<note type="man_type">
95+
{{ man.man_doc_type.name }}
96+
</note>
97+
{% endif %}
8798
</notesStmt>
88-
{% endif %}
8999
<sourceDesc>
90100
<listBibl>
91101
<head>Publikationsgeschichte</head>
@@ -473,6 +483,19 @@
473483
<language ident="de">Deutsch</language>
474484
</langUsage>
475485
</profileDesc>
486+
{% if man.status %}
487+
<revisionDesc status="{{ man.status.name }}">
488+
<change who="#dstoxreiter" when="{{ man.status.date }}">
489+
{% if man.status.name == "complete" %}
490+
Der Status 'complete' entspricht einer abgeschlossenen Korrektur der OCR Umschrift.
491+
{% elif man.status.name == "proposed" %}
492+
Der Status 'proposed' entspricht einer nicht abgeschlossenen Korrektur der OCR Umschrift.
493+
{% elif man.status.name == "undefined" %}
494+
Der Status 'undefined' entspricht einem unklaren Status der OCR Umschrift.
495+
{% endif %}
496+
</change>
497+
</revisionDesc>
498+
{% endif %}
476499
</teiHeader>
477500
<text>
478501
<body>

0 commit comments

Comments
 (0)