Skip to content

Commit 37cae32

Browse files
authored
Add flag index_files_content to AIP profile and remove index from storage policy (#2436)
* Add flag index_files_content to AIP profile and remove index from storage policy * Remove index field from storage policy serializers * Add new db migration for storage policy
1 parent 38430aa commit 37cae32

File tree

11 files changed

+116
-17
lines changed

11 files changed

+116
-17
lines changed

ESSArch_Core/configuration/admin.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,6 @@ class StoragePolicyAdmin(NestedModelAdmin):
142142
'validate_checksum',
143143
'validate_xml',
144144
'ingest_delete',
145-
'index',
146145
'receive_extract_sip',
147146
'cache_minimum_capacity',
148147
'cache_maximum_age',
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Generated by Django 5.2.9 on 2025-12-04 10:13
2+
3+
from django.db import migrations
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('configuration', '0033_alter_storagepolicy_index_and_more'),
10+
]
11+
12+
operations = [
13+
migrations.RemoveField(
14+
model_name='storagepolicy',
15+
name='index',
16+
),
17+
]

ESSArch_Core/configuration/models.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -200,16 +200,12 @@ class StoragePolicy(models.Model):
200200
)
201201

202202
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
203-
204-
index = models.BooleanField(default=False)
205-
206203
cache_minimum_capacity = models.IntegerField(
207204
'Minimum size (bytes) available on cache before deleting content', default=0,
208205
)
209206
cache_maximum_age = models.IntegerField(
210207
'Maximum age (days) of content before deletion from cache, resets on access', default=0,
211208
)
212-
213209
policy_id = models.CharField('Policy ID', max_length=32, unique=True)
214210
policy_name = models.CharField('Policy Name', max_length=255)
215211
policy_stat = models.BooleanField('Policy Status', default=True)

ESSArch_Core/configuration/serializers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ def create(self, validated_data):
221221
class Meta:
222222
model = StoragePolicy
223223
fields = (
224-
"id", "index",
224+
"id",
225225
"cache_minimum_capacity", "cache_maximum_age",
226226
"policy_id", "policy_name",
227227
"policy_stat", "ais_project_name", "ais_project_id",
@@ -247,7 +247,7 @@ class Meta:
247247
class StoragePolicyNestedSerializer(StoragePolicySerializer):
248248
class Meta(StoragePolicySerializer.Meta):
249249
fields = (
250-
"id", "index",
250+
"id",
251251
"cache_minimum_capacity", "cache_maximum_age",
252252
"policy_id", "policy_name",
253253
"policy_stat", "ais_project_name", "ais_project_id",

ESSArch_Core/ip/models.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1221,8 +1221,11 @@ def create_preservation_workflow(self):
12211221

12221222
profile_type = self.get_package_type_display().lower()
12231223
index_files = self.get_profile_data(profile_type).get('index_files', True)
1224+
index_files_content = self.get_profile_data(profile_type).get('index_files_content', True)
12241225
index_cits = self.get_profile_data(profile_type).get('index_cits', True)
1225-
if index_files is True or index_files == 'True' or index_cits is True or index_cits == 'True':
1226+
if (index_files is True or index_files == 'True' or
1227+
index_files_content is True or index_files_content == 'True' or
1228+
index_cits is True or index_cits == 'True'):
12261229
write_to_search_index = True
12271230
else:
12281231
write_to_search_index = False
@@ -2800,6 +2803,7 @@ def write_to_search_index(self, task):
28002803
indexed_files = ct_importer.import_content(cts, ip=self)
28012804

28022805
index_files = self.get_profile_data(profile_type).get('index_files', True)
2806+
index_files_content = self.get_profile_data(profile_type).get('index_files_content', True)
28032807
if index_files is True or index_files == 'True':
28042808
group = None
28052809
try:
@@ -2812,7 +2816,7 @@ def write_to_search_index(self, task):
28122816
for root, dirs, files in walk(srcdir):
28132817
for d in dirs:
28142818
src = os.path.join(root, d)
2815-
index_path(self, src, group=group)
2819+
index_path(self, src, group=group, index_file_content=index_files_content)
28162820

28172821
for f in files:
28182822
src = os.path.join(root, f)
@@ -2821,7 +2825,7 @@ def write_to_search_index(self, task):
28212825
indexed_files.remove(src)
28222826
except ValueError:
28232827
# file has not been indexed, index it
2824-
index_path(self, src, group=group)
2828+
index_path(self, src, group=group, index_file_content=index_files_content)
28252829

28262830
InformationPackageDocument.from_obj(self).save()
28272831

ESSArch_Core/search/ingest.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,13 @@
2121
)
2222

2323

24-
def index_document(tag_version, filepath):
24+
def index_document(tag_version, filepath, index_file_content=True):
2525
logger = logging.getLogger('essarch.search.ingest')
2626
exclude_file_format_from_indexing_content = settings.EXCLUDE_FILE_FORMAT_FROM_INDEXING_CONTENT
2727

2828
fid = FormatIdentifier()
2929
(format_name, format_version, format_registry_key) = fid.identify_file_format(filepath)
30-
if format_registry_key not in exclude_file_format_from_indexing_content:
31-
index_file_content = True
32-
else:
30+
if format_registry_key in exclude_file_format_from_indexing_content:
3331
index_file_content = False
3432

3533
ip = tag_version.tag.information_package
@@ -84,7 +82,7 @@ def index_directory(tag_version, dirpath):
8482
return doc, tag_version
8583

8684

87-
def index_path(ip, path, parent=None, group=None):
85+
def index_path(ip, path, parent=None, group=None, index_file_content=True):
8886
"""
8987
Indexes the file or directory at path to elasticsearch
9088
@@ -113,7 +111,7 @@ def index_path(ip, path, parent=None, group=None):
113111
tag_version.elastic_index = 'document'
114112
# TODO: minimize db queries
115113
tag_version.type = TagVersionType.objects.get_or_create(name='document', archive_type=False)[0]
116-
doc, tag_version = index_document(tag_version, path)
114+
doc, tag_version = index_document(tag_version, path, index_file_content)
117115
tag_version.save()
118116
else:
119117
tag_version.elastic_index = 'directory'

ESSArch_Core/templates/eu/EU_AIP_v204.json

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,23 @@
305305
"popover-trigger": "'mouseenter'"
306306
}
307307
},
308+
{
309+
"key": "index_files_content",
310+
"type": "select",
311+
"defaultValue": true,
312+
"hidden": false,
313+
"templateOptions": {
314+
"required": false,
315+
"disabled": false,
316+
"label": "Index files content",
317+
"options": [{"name": "Yes","value": true},{"name": "No","value": false}]
318+
},
319+
"ngModelElAttrs": {
320+
"uib-popover": "Index files content",
321+
"popover-placement": "bottom",
322+
"popover-trigger": "'mouseenter'"
323+
}
324+
},
308325
{
309326
"key": "index_cits",
310327
"type": "select",

ESSArch_Core/templates/eu/EU_CSIP_AIP_v204.json

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,23 @@
305305
"popover-trigger": "'mouseenter'"
306306
}
307307
},
308+
{
309+
"key": "index_files_content",
310+
"type": "select",
311+
"defaultValue": true,
312+
"hidden": false,
313+
"templateOptions": {
314+
"required": false,
315+
"disabled": false,
316+
"label": "Index files content",
317+
"options": [{"name": "Yes","value": true},{"name": "No","value": false}]
318+
},
319+
"ngModelElAttrs": {
320+
"uib-popover": "Index files content",
321+
"popover-placement": "bottom",
322+
"popover-trigger": "'mouseenter'"
323+
}
324+
},
308325
{
309326
"key": "index_cits",
310327
"type": "select",
@@ -313,7 +330,7 @@
313330
"templateOptions": {
314331
"required": false,
315332
"disabled": false,
316-
"label": "Indexera CITS",
333+
"label": "Index CITS",
317334
"options": [{"name": "Yes","value": true},{"name": "No","value": false}]
318335
},
319336
"ngModelElAttrs": {

ESSArch_Core/templates/no/NO_AIP_v10.json

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1006,6 +1006,23 @@
10061006
"popover-trigger": "'mouseenter'"
10071007
}
10081008
},
1009+
{
1010+
"key": "index_files_content",
1011+
"type": "select",
1012+
"defaultValue": true,
1013+
"hidden": false,
1014+
"templateOptions": {
1015+
"required": false,
1016+
"disabled": false,
1017+
"label": "Indexera filernas innehåll",
1018+
"options": [{"name": "Yes","value": true},{"name": "No","value": false}]
1019+
},
1020+
"ngModelElAttrs": {
1021+
"uib-popover": "Indexera innehåll i leveransen. Tillägg till specifikationen",
1022+
"popover-placement": "bottom",
1023+
"popover-trigger": "'mouseenter'"
1024+
}
1025+
},
10091026
{
10101027
"key": "index_cits",
10111028
"type": "select",

ESSArch_Core/templates/se/SE_AIP_transparent_v12.json

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,23 @@
408408
"popover-trigger": "'mouseenter'"
409409
}
410410
},
411+
{
412+
"key": "index_files_content",
413+
"type": "select",
414+
"defaultValue": true,
415+
"hidden": false,
416+
"templateOptions": {
417+
"required": false,
418+
"disabled": false,
419+
"label": "Indexera filernas innehåll",
420+
"options": [{"name": "Yes","value": true},{"name": "No","value": false}]
421+
},
422+
"ngModelElAttrs": {
423+
"uib-popover": "Indexera innehåll i leveransen. Tillägg till specifikationen",
424+
"popover-placement": "bottom",
425+
"popover-trigger": "'mouseenter'"
426+
}
427+
},
411428
{
412429
"key": "index_cits",
413430
"type": "select",

0 commit comments

Comments
 (0)