37
37
#include " json.hpp"
38
38
#include " cpp_json_kw.hpp"
39
39
40
+ #include < tuple>
41
+ #include < string>
42
+ #include < set>
40
43
41
44
using namespace std ::string_literals;
42
45
@@ -242,6 +245,66 @@ namespace irods {
242
245
generate_delay_execution_parameters ());
243
246
}
244
247
248
+
249
+
250
+
251
+ class path_format_error : public std ::runtime_error {
252
+ public:
253
+ path_format_error (const std::string & s) : std::runtime_error(s.c_str()) {}
254
+ };
255
+
256
+ template <typename T>
257
+ class path_calc_and_cache {
258
+ public:
259
+ using string = std::string;
260
+ using idx_infos = std::set<T>;
261
+ private:
262
+ std::map<string, idx_infos> m;
263
+ using calc_function = std::function<idx_infos (const string& s)> ;
264
+ calc_function calc_;
265
+ public:
266
+ path_calc_and_cache (calc_function f): calc_{f}
267
+ {
268
+ }
269
+
270
+ idx_infos accum (const string &path)
271
+ {
272
+ auto pos = path.find_last_of (" /" );
273
+ if (path.size () < 1 || pos == string::npos)
274
+ throw path_format_error{ " Couldn't parse: " s + path };
275
+ if (path == " /" )
276
+ return calc (" /" );
277
+ auto parent_len = (pos == 0 ) ? 1 :pos;
278
+ auto st = accum (path.substr (0 ,parent_len));
279
+ auto st_i = calc (path);
280
+ st.insert (st_i.begin (), st_i.end ());
281
+ return st;
282
+ }
283
+
284
+ idx_infos calc (const string & p) { // Get individual contribution for last elem of path 'p'.
285
+ // Use cached value if it exists.
286
+ idx_infos i;
287
+ try {
288
+ i = m.at (p);
289
+ }
290
+ catch (const std::out_of_range &e) {
291
+ i = (this ->calc_ )(p);
292
+ }
293
+ m[p] = i;
294
+ return i;
295
+ }
296
+
297
+ };
298
+
299
+ class index_info final : public std::tuple<std::string,std::string,std::string> {
300
+ public:
301
+ using string = std::string;
302
+ using tuple::tuple;
303
+ string& index_name () { return std::get<0 >(*this ); }
304
+ string& index_type () { return std::get<1 >(*this ); }
305
+ string& index_tech () { return std::get<2 >(*this ); }
306
+ };
307
+
245
308
// - Starting at _collection_name , recurse over every sub-element of the tree
246
309
// - (including data objects and collections and starting with the root).
247
310
// - Call schedule_policy_event_for_object for every object or collection
@@ -257,6 +320,28 @@ namespace irods {
257
320
namespace fsvr = irods::experimental::filesystem::server;
258
321
using fsp = fs::path;
259
322
rsComm_t& comm = *rei_->rsComm ;
323
+ auto f = [&] (const std::string & collname)
324
+ {
325
+ std::set<index_info> s;
326
+ irods::query q {&comm,
327
+ boost::str (boost::format (" select META_COLL_ATTR_NAME,META_COLL_ATTR_VALUE,META_COLL_ATTR_UNITS where"
328
+ " COLL_NAME = '%s' and META_COLL_ATTR_NAME = '%s'" ) % collname % config_.index )};
329
+ for (const auto &row: q) {
330
+ std::string idx_name, idx_type, idx_tech;
331
+ std::tie (idx_name, idx_type) = irods::indexing::parse_indexer_string (row[1 ]);
332
+ idx_tech = row[2 ];
333
+ s.insert ( { idx_name, idx_type, idx_tech } );
334
+ }
335
+ return s;
336
+ };
337
+ path_calc_and_cache <index_info> idx_info_cache {f};
338
+
339
+ /* DWM - delete
340
+ rodsLog( LOG_NOTICE, "DWM - index_type [%s]" , _index_type.c_str() );
341
+ rodsLog( LOG_NOTICE, "DWM - index_name [%s]" , _index_name.c_str() );
342
+ rodsLog( LOG_NOTICE, "DWM - indexer [%s]" , _indexer.c_str());
343
+ */
344
+ bool Search_parent_tags = (_indexer.empty () && _index_name.empty ());
260
345
261
346
const auto indexing_resources = get_indexing_resource_names ();
262
347
const auto policy_name = operation_and_index_types_to_policy_name (
@@ -315,73 +400,103 @@ namespace irods {
315
400
: std::runtime_error{e} {}
316
401
};
317
402
403
+ try {
404
+ for (auto path = start_path; ; ++iter) {
405
+ const auto s = fsvr::status (comm,path);
406
+ bool is_collection = fsvr::is_collection (s);
407
+ bool is_data_object = fsvr::is_data_object (s);
408
+ if (is_data_object || is_collection) {
409
+ try {
410
+ std::string resc_name;
411
+ if (is_data_object) {
412
+ resc_name = get_indexing_resource_name_for_object (
413
+ path.string (),
414
+ indexing_resources);
415
+ }
318
416
319
- for (auto path = start_path; ; ++iter) {
320
- const auto s = fsvr::status (comm,path);
321
- bool is_collection = fsvr::is_collection (s);
322
- bool is_data_object = fsvr::is_data_object (s);
323
- if (is_data_object || is_collection) {
324
- try {
325
- std::string resc_name;
326
- if (is_data_object) {
327
- resc_name = get_indexing_resource_name_for_object (
328
- path.string (),
329
- indexing_resources);
330
- }
331
-
332
- if (job_limit > 0 && n_jobs >= job_limit) {
333
- // The job limit parameter should be a large number, in the thousands or more perhaps, but small
334
- // enough so that indexing your largest collections doesn't fill up all of virtual memory.
335
- for (;;) {
336
- query<rsComm_t> qobj{comm_, JOB_QUERY_STRING, 1 };
337
- for (const auto & row: qobj) {
338
- auto count = std::stol ( row[0 ] );
339
- if (count > job_max) { throw job_limit_precision{}; }
340
- n_jobs = count;
341
- break ;
342
- }
343
- // The approach to throttling is simply to wait until the number of delayed tasks falls
344
- // down to the LOW_WATER_MARK and then exit the wait loop to fill up the task queue again.
345
- // Because we're already in a delayed task, this does not impact the plugin's response time.
346
- if (n_jobs > LOW_WATER_MARK) {
347
- sleep (1 );
348
- }
349
- else {
350
- break ;
417
+ // if job limit is exceeded, wait before spawning more jobs
418
+
419
+ if (job_limit > 0 && n_jobs >= job_limit) {
420
+ // The job limit parameter should be a large number, in the thousands or more perhaps, but small
421
+ // enough so that indexing your largest collections doesn't fill up all of virtual memory.
422
+ for (;;) {
423
+ query<rsComm_t> qobj{comm_, JOB_QUERY_STRING, 1 };
424
+ for (const auto & row: qobj) {
425
+ auto count = std::stol ( row[0 ] );
426
+ if (count > job_max) { throw job_limit_precision{}; }
427
+ n_jobs = count;
428
+ break ;
429
+ }
430
+ // The approach to throttling is simply to wait until the number of delayed tasks falls
431
+ // down to the LOW_WATER_MARK and then exit the wait loop to fill up the task queue again.
432
+ // Because we're already in a delayed task, this does not impact the plugin's response time.
433
+ if (n_jobs > LOW_WATER_MARK) {
434
+ sleep (1 );
435
+ }
436
+ else {
437
+ break ;
438
+ }
351
439
}
352
440
}
353
- }
354
-
355
- if ( ! (is_collection && _index_type == " full_text" )) {
356
- schedule_policy_event_for_object (
357
- policy_name,
358
- path.string (),
359
- _user_name,
360
- EMPTY_RESOURCE_NAME,
361
- _indexer,
362
- _index_name,
363
- _index_type,
364
- generate_delay_execution_parameters (),
365
- {},{},{},
366
- {{ " job_category_tag" , unique_key }} );
367
441
368
- ++n_jobs;
442
+ if ( Search_parent_tags ) {
443
+ // if (_index_type == "metadata" ) {
444
+ auto info_set = idx_info_cache.accum ( path.string () );
445
+ for (auto info: info_set) {
446
+ irods::log ( LOG_NOTICE, fmt::format (" DWM - reindexing for perms - name/type/tech=[{}][{}][{}]" ,
447
+ info.index_name (), info.index_type (), info.index_tech ()) );
448
+ schedule_policy_event_for_object (
449
+ policy_name,
450
+ path.string (),
451
+ _user_name,
452
+ EMPTY_RESOURCE_NAME,
453
+ info.index_tech (),
454
+ info.index_name (),
455
+ info.index_type (),
456
+ generate_delay_execution_parameters (),
457
+ {},{},{},
458
+ {{ " job_category_tag" , unique_key }} );
459
+ ++n_jobs;
460
+ }
461
+ // } else { throw std::runtime_error {"Parent tags search mode but index type is not 'metadata'"}; }
462
+ }
463
+ else if ( ! (is_collection && _index_type == " full_text" )) { // full_text is meaningless for collection objects
464
+ schedule_policy_event_for_object (
465
+ policy_name,
466
+ path.string (),
467
+ _user_name,
468
+ EMPTY_RESOURCE_NAME,
469
+ _indexer,
470
+ _index_name,
471
+ _index_type,
472
+ generate_delay_execution_parameters (),
473
+ {},{},{},
474
+ {{ " job_category_tag" , unique_key }} );
475
+
476
+ ++n_jobs;
477
+ }
369
478
}
370
- }
371
- catch (const exception & _e) {
372
- rodsLog (
373
- LOG_ERROR,
374
- " failed to find indexing resource (error code=[%ld]) for object [%s]" ,static_cast <long >(_e.code ()),
375
- path.string ().c_str ());
376
- }
377
- catch (const std::runtime_error & e) {
378
- irods::log (LOG_ERROR,fmt::format (" Abort indexing collection: {}" ,e.what ()));
379
- break ;
380
- }
381
- if (iter != iter_end) { path = iter->path (); }
382
- else { break ; }
383
- } // if collection or data object
384
- } // for path
479
+ catch (const exception & _e) {
480
+ rodsLog (
481
+ LOG_ERROR,
482
+ " failed to find indexing resource (error code=[%ld]) for object [%s]" ,static_cast <long >(_e.code ()),
483
+ path.string ().c_str ());
484
+ }
485
+ catch (const std::runtime_error & e) {
486
+ irods::log (LOG_ERROR,fmt::format (" Abort indexing collection: {}" ,e.what ()));
487
+ break ;
488
+ }
489
+ if (iter != iter_end) { path = iter->path (); }
490
+ else { break ; }
491
+ } // if collection or data object
492
+ } // for path
493
+ }
494
+ catch ( const path_format_error & f ) {
495
+ rodsLog (LOG_ERROR , " path_format_error DWM" );
496
+ }
497
+ catch ( const std::runtime_error & f ) {
498
+ rodsLog (LOG_ERROR , " wrong index type DWM" );
499
+ }
385
500
} // schedule_policy_events_for_collection
386
501
387
502
/*
0 commit comments