Skip to content

Commit 26a261c

Browse files
committed
MCPClient: fix matching originals to manual derivatives
Fixes the matching of original files to their manually created preservation derivatives in a manualNormalization/preservation/ directory. Previous behaviour would find a match and all other Files that matched by having a path that was a superfix of the original path. This would trigger an exception. This fixes the issue by returning the shortest match. It also adds print statements to better understand what has happened through the Tasks GUI.
1 parent 4d39aa0 commit 26a261c

File tree

3 files changed

+32
-6
lines changed

3 files changed

+32
-6
lines changed

src/MCPClient/lib/clientScripts/archivematicaCreateMETS2.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -890,7 +890,12 @@ def createFileSec(directoryPath, parentDiv, baseDirectoryPath,
890890

891891
elif use in ("preservation", "text/ocr"):
892892
# Derived files should be in the original file's group
893-
d = Derivation.objects.get(derived_file_id=f.uuid)
893+
try:
894+
d = Derivation.objects.get(derived_file_id=f.uuid)
895+
except Derivation.DoesNotExist:
896+
print('Fatal error: unable to locate a Derivation object'
897+
' where the derived file is {}'.format(f.uuid))
898+
raise
894899
GROUPID = "Group-" + d.source_file_id
895900

896901
elif use == "service":

src/MCPClient/lib/clientScripts/manualNormalizationCreateMetadataAndRestructure.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,9 @@
117117
e = Event.objects.get(event_type="normalization", file_uuid=original_file)
118118
e.event_outcome_detail = dstR
119119
e.save()
120+
print('Updated the eventOutcomeDetailNote of an existing normalization'
121+
' Event for file {}. Not creating a Derivation object'.format(
122+
fileUUID))
120123
except Event.DoesNotExist:
121124
# No normalization event was created in normalize.py - probably manually
122125
# normalized during Ingest
@@ -129,12 +132,16 @@
129132
eventDetail="manual normalization",
130133
eventOutcome="",
131134
eventOutcomeDetailNote=dstR)
135+
print('Created a manual normalization Event for file {}.'.format(
136+
original_file.uuid))
132137

133138
# Add linking information between files
134139
# Assuming that if an event already exists, then the derivation does as well
135140
databaseFunctions.insertIntoDerivations(
136141
sourceFileUUID=original_file.uuid,
137142
derivedFileUUID=fileUUID,
138143
relatedEventUUID=derivationEventUUID)
144+
print('Created a Derivation for original file {}, derived file {}, and'
145+
' event {}'.format(original_file.uuid, fileUUID, derivationEventUUID))
139146

140147
exit(0)

src/MCPClient/lib/clientScripts/normalize.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -147,13 +147,27 @@ def check_manual_normalization(opts):
147147
"%SIPDirectory%objects/manualNormalization/access/")
148148
else:
149149
return None
150-
try:
151-
# FIXME: SQL uses removedtime=0. Cannot get Django to express this
152-
return File.objects.get(sip=opts.sip_uuid, currentlocation__startswith=path) # removedtime = 0
153-
except (File.DoesNotExist, File.MultipleObjectsReturned):
150+
151+
# FIXME: SQL uses removedtime=0. Cannot get Django to express this
152+
print('Checking for a manually normalized file by trying to get the'
153+
' unique file that matches SIP UUID {} and whose currentlocation'
154+
' value starts with this path: {}.'.format(opts.sip_uuid, path))
155+
matches = File.objects.filter( # removedtime = 0
156+
sip=opts.sip_uuid, currentlocation__startswith=path)
157+
if not matches:
154158
# No file with the correct path found, assume not manually normalized
159+
print('No such file found.')
155160
return None
156-
return None
161+
if len(matches) > 1:
162+
# If multiple matches, the shortest one should be the correct one. E.g.,
163+
# if original is /a/b/abc.NEF then /a/b/abc.tif and /a/b/abc_1.tif will
164+
# both match but /a/b/abc.tif is the correct match.
165+
print('Multiple files matching path {} found. Returning the shortest'
166+
' one.')
167+
ret = sorted(matches, key=lambda f: f.currentlocation)[0]
168+
print('Returning file at {}'.format(ret.currentlocation))
169+
return ret
170+
return matches[0]
157171

158172

159173
def once_normalized(command, opts, replacement_dict):

0 commit comments

Comments
 (0)