1111import io .micrometer .observation .Observation ;
1212import io .micrometer .observation .ObservationRegistry ;
1313import java .net .URI ;
14+ import java .util .List ;
1415import java .util .Objects ;
1516import java .util .Set ;
1617import java .util .Spliterator ;
2627import org .databiosphere .workspacedataservice .dao .JobDao ;
2728import org .databiosphere .workspacedataservice .dataimport .ImportDetails ;
2829import org .databiosphere .workspacedataservice .dataimport .ImportDetailsRetriever ;
30+ import org .databiosphere .workspacedataservice .dataimport .ImportRequirements ;
31+ import org .databiosphere .workspacedataservice .dataimport .ImportRequirementsFactory ;
32+ import org .databiosphere .workspacedataservice .dataimport .protecteddatasupport .ProtectedDataSupport ;
2933import org .databiosphere .workspacedataservice .dataimport .snapshotsupport .MultiCloudSnapshotSupportFactory ;
3034import org .databiosphere .workspacedataservice .dataimport .snapshotsupport .SnapshotSupport ;
3135import org .databiosphere .workspacedataservice .jobexec .JobDataMapReader ;
@@ -66,6 +70,8 @@ public class PfbQuartzJob extends QuartzJob {
6670 private final ImportDetailsRetriever importDetailsRetriever ;
6771 private final ImportMetrics importMetrics ;
6872 private final DrsService drsService ;
73+ private final ProtectedDataSupport protectedDataSupport ;
74+ private final ImportRequirementsFactory importRequirementsFactory ;
6975
7076 public PfbQuartzJob (
7177 JobDao jobDao ,
@@ -78,7 +84,8 @@ public PfbQuartzJob(
7884 MultiCloudSnapshotSupportFactory snapshotSupportFactory ,
7985 DataImportProperties dataImportProperties ,
8086 ImportDetailsRetriever importDetailsRetriever ,
81- DrsService drsService ) {
87+ DrsService drsService ,
88+ ProtectedDataSupport protectedDataSupport ) {
8289 super (jobDao , observationRegistry , dataImportProperties );
8390 this .recordSourceFactory = recordSourceFactory ;
8491 this .recordSinkFactory = recordSinkFactory ;
@@ -88,6 +95,9 @@ public PfbQuartzJob(
8895 this .importDetailsRetriever = importDetailsRetriever ;
8996 this .importMetrics = importMetrics ;
9097 this .drsService = drsService ;
98+ this .protectedDataSupport = protectedDataSupport ;
99+ this .importRequirementsFactory =
100+ new ImportRequirementsFactory (dataImportProperties .getSources ());
91101 }
92102
93103 @ Override
@@ -109,15 +119,36 @@ protected void executeInternal(UUID jobId, JobExecutionContext context) {
109119
110120 ImportDetails details = importDetailsRetriever .fetch (jobId , jobData , PrefixStrategy .PFB );
111121
122+ // Early PFB content inspection to determine if auth domains should be applied
123+ // This is HTTP connection #0 to the PFB.
124+ logger .info ("Inspecting PFB content for auth domain requirements..." );
125+ Set <UUID > snapshotIds = withPfbStream (uri , this ::findSnapshots );
126+ logger .info ("Found {} unique snapshot IDs in PFB" , snapshotIds .size ());
127+ boolean hasNresConsent = false ;
128+ if (snapshotIds .isEmpty ()) {
129+ hasNresConsent = withPfbStream (uri , this ::hasNresConsentGroup );
130+ logger .info ("NRES consent group present in PFB: {}" , hasNresConsent );
131+ }
132+ ImportRequirements requirements = importRequirementsFactory .getRequirementsForImport (uri );
133+
134+ // If we skipped auth domains earlier, but now there is neither an NRES consent group nor any
135+ // snapshots,
136+ // apply the configured auth domains now.
137+ if (!requirements .requiredAuthDomainGroups ().isEmpty ()
138+ && !requirements .alwaysApplyAuthDomains ()
139+ && snapshotIds .isEmpty ()
140+ && !hasNresConsent ) {
141+ logger .info ("Applying auth domain groups based on PFB content analysis..." );
142+ protectedDataSupport .addAuthDomainGroupsToWorkspace (
143+ details .workspaceId (), requirements .requiredAuthDomainGroups ());
144+ }
145+
112146 // Find all the snapshot ids in the PFB, then create or verify references from the
113147 // workspace to the snapshot for each of those snapshot ids.
114148 // This will throw an exception if there are policy conflicts between the workspace
115149 // and the snapshots.
116150 //
117- // This is HTTP connection #1 to the PFB.
118- logger .info ("Finding snapshots in this PFB..." );
119- Set <UUID > snapshotIds = withPfbStream (uri , this ::findSnapshots );
120-
151+ // This is HTTP connection #1 to the PFB (reusing snapshotIds from earlier).
121152 logger .info ("Linking snapshots..." );
122153 linkSnapshots (snapshotIds , details .workspaceId ());
123154
@@ -261,4 +292,49 @@ private UUID maybeUuid(String input) {
261292 return null ;
262293 }
263294 }
295+
296+ /** Check if the PFB contains any anvil_dataset records with consent_group set to NRES */
297+ boolean hasNresConsentGroup (DataFileStream <GenericRecord > dataStream ) {
298+ Stream <GenericRecord > recordStream =
299+ StreamSupport .stream (
300+ Spliterators .spliteratorUnknownSize (dataStream .iterator (), Spliterator .ORDERED ), false );
301+
302+ // Collect all consent groups from anvil_dataset records
303+ List <Object > consentGroups =
304+ recordStream
305+ .filter (rec -> "anvil_dataset" .equals (rec .get ("name" ).toString ()))
306+ .map (rec -> rec .get ("object" ))
307+ .filter (GenericRecord .class ::isInstance )
308+ .map (GenericRecord .class ::cast )
309+ .filter (anvilDataset -> anvilDataset .hasField ("consent_group" ))
310+ .map (anvilDataset -> anvilDataset .get ("consent_group" ))
311+ .filter (Objects ::nonNull )
312+ .filter (
313+ consentGroup -> {
314+ if (consentGroup instanceof java .util .Collection ) {
315+ return !((java .util .Collection <?>) consentGroup ).isEmpty ();
316+ } else {
317+ return !consentGroup .toString ().isEmpty ();
318+ }
319+ })
320+ .toList ();
321+
322+ // Must have at least one consent group AND all must be NRES
323+ if (consentGroups .isEmpty ()) {
324+ return false ;
325+ }
326+
327+ return consentGroups .stream ()
328+ .allMatch (
329+ consentGroup -> {
330+ if (consentGroup instanceof java .util .Collection ) {
331+ return ((java .util .Collection <?>) consentGroup )
332+ .stream ()
333+ .filter (Objects ::nonNull )
334+ .allMatch (item -> "NRES" .equals (String .valueOf (item )));
335+ } else {
336+ return "NRES" .equals (String .valueOf (consentGroup ));
337+ }
338+ });
339+ }
264340}
0 commit comments