Skip to content

Commit 3efbd4b

Browse files
committed
[_81] calculate user and group permissions
preliminaries for processing mod_access pep still remains to make indexer::schedule_policy_events_for_collection do the work of discovering the tags for each object in an efficient way. index_info (extension of string triple) efficient permissions re-indexing for ichmod -r better naming index_type must not be empty [_81] passing test
1 parent 3cc08e3 commit 3efbd4b

File tree

3 files changed

+453
-110
lines changed

3 files changed

+453
-110
lines changed

es_mapping.json

+14
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,20 @@
2727
"type": "date",
2828
"format": "epoch_second"
2929
},
30+
"creator": {
31+
"type": "keyword"
32+
},
33+
"userPermissions": {
34+
"type": "nested",
35+
"properties": {
36+
"permission": {
37+
"type": "keyword"
38+
},
39+
"user": {
40+
"type": "keyword"
41+
}
42+
}
43+
},
3044
"metadataEntries": {
3145
"type": "nested",
3246
"properties": {

indexing_utilities.cpp

+177-62
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@
3737
#include "json.hpp"
3838
#include "cpp_json_kw.hpp"
3939

40+
#include <tuple>
41+
#include <string>
42+
#include <set>
4043

4144
using namespace std::string_literals;
4245

@@ -242,6 +245,66 @@ namespace irods {
242245
generate_delay_execution_parameters());
243246
}
244247

248+
249+
250+
251+
class path_format_error: public std::runtime_error {
252+
public:
253+
path_format_error(const std::string & s) : std::runtime_error(s.c_str()) {}
254+
};
255+
256+
template <typename T>
257+
class path_calc_and_cache {
258+
public:
259+
using string = std::string;
260+
using idx_infos = std::set<T>;
261+
private:
262+
std::map<string, idx_infos> m;
263+
using calc_function = std::function<idx_infos (const string& s)> ;
264+
calc_function calc_;
265+
public:
266+
path_calc_and_cache(calc_function f): calc_{f}
267+
{
268+
}
269+
270+
idx_infos accum (const string &path)
271+
{
272+
auto pos = path.find_last_of("/");
273+
if (path.size() < 1 || pos == string::npos)
274+
throw path_format_error{ "Couldn't parse: "s + path };
275+
if (path == "/")
276+
return calc("/");
277+
auto parent_len = (pos == 0) ? 1 :pos;
278+
auto st = accum(path.substr(0,parent_len));
279+
auto st_i = calc(path);
280+
st.insert (st_i.begin(), st_i.end());
281+
return st;
282+
}
283+
284+
idx_infos calc(const string & p) { // Get individual contribution for last elem of path 'p'.
285+
// Use cached value if it exists.
286+
idx_infos i;
287+
try{
288+
i = m.at(p);
289+
}
290+
catch(const std::out_of_range &e) {
291+
i = (this->calc_)(p);
292+
}
293+
m[p] = i;
294+
return i;
295+
}
296+
297+
};
298+
299+
class index_info final : public std::tuple<std::string,std::string,std::string> {
300+
public:
301+
using string = std::string;
302+
using tuple::tuple;
303+
string& index_name() { return std::get<0>(*this); }
304+
string& index_type() { return std::get<1>(*this); }
305+
string& index_tech() { return std::get<2>(*this); }
306+
};
307+
245308
// - Starting at _collection_name , recurse over every sub-element of the tree
246309
// - (including data objects and collections and starting with the root).
247310
// - Call schedule_policy_event_for_object for every object or collection
@@ -257,6 +320,28 @@ namespace irods {
257320
namespace fsvr = irods::experimental::filesystem::server;
258321
using fsp = fs::path;
259322
rsComm_t& comm = *rei_->rsComm;
323+
auto f = [&] (const std::string & collname)
324+
{
325+
std::set<index_info> s;
326+
irods::query q {&comm,
327+
boost::str(boost::format("select META_COLL_ATTR_NAME,META_COLL_ATTR_VALUE,META_COLL_ATTR_UNITS where"
328+
" COLL_NAME = '%s' and META_COLL_ATTR_NAME = '%s'") % collname % config_.index)};
329+
for (const auto &row: q) {
330+
std::string idx_name, idx_type, idx_tech;
331+
std::tie(idx_name, idx_type) = irods::indexing::parse_indexer_string (row[1]);
332+
idx_tech = row[2];
333+
s.insert ( { idx_name, idx_type, idx_tech } );
334+
}
335+
return s;
336+
};
337+
path_calc_and_cache <index_info> idx_info_cache {f};
338+
339+
/* DWM - delete
340+
rodsLog( LOG_NOTICE, "DWM - index_type [%s]" , _index_type.c_str() );
341+
rodsLog( LOG_NOTICE, "DWM - index_name [%s]" , _index_name.c_str() );
342+
rodsLog( LOG_NOTICE, "DWM - indexer [%s]" , _indexer.c_str());
343+
*/
344+
bool Search_parent_tags = (_indexer.empty() && _index_name.empty());
260345

261346
const auto indexing_resources = get_indexing_resource_names();
262347
const auto policy_name = operation_and_index_types_to_policy_name(
@@ -315,73 +400,103 @@ namespace irods {
315400
: std::runtime_error{e} {}
316401
};
317402

403+
try {
404+
for (auto path = start_path; ; ++iter) {
405+
const auto s = fsvr::status(comm,path);
406+
bool is_collection = fsvr::is_collection(s);
407+
bool is_data_object = fsvr::is_data_object(s);
408+
if (is_data_object || is_collection) {
409+
try {
410+
std::string resc_name;
411+
if (is_data_object) {
412+
resc_name = get_indexing_resource_name_for_object(
413+
path.string(),
414+
indexing_resources);
415+
}
318416

319-
for (auto path = start_path; ; ++iter) {
320-
const auto s = fsvr::status(comm,path);
321-
bool is_collection = fsvr::is_collection(s);
322-
bool is_data_object = fsvr::is_data_object(s);
323-
if (is_data_object || is_collection) {
324-
try {
325-
std::string resc_name;
326-
if (is_data_object) {
327-
resc_name = get_indexing_resource_name_for_object(
328-
path.string(),
329-
indexing_resources);
330-
}
331-
332-
if (job_limit > 0 && n_jobs >= job_limit) {
333-
// The job limit parameter should be a large number, in the thousands or more perhaps, but small
334-
// enough so that indexing your largest collections doesn't fill up all of virtual memory.
335-
for(;;) {
336-
query<rsComm_t> qobj{comm_, JOB_QUERY_STRING, 1};
337-
for (const auto & row: qobj) {
338-
auto count = std::stol( row[0] );
339-
if (count > job_max) { throw job_limit_precision{}; }
340-
n_jobs = count;
341-
break;
342-
}
343-
// The approach to throttling is simply to wait until the number of delayed tasks falls
344-
// down to the LOW_WATER_MARK and then exit the wait loop to fill up the task queue again.
345-
// Because we're already in a delayed task, this does not impact the plugin's response time.
346-
if (n_jobs > LOW_WATER_MARK) {
347-
sleep(1);
348-
}
349-
else {
350-
break;
417+
// if job limit is exceeded, wait before spawning more jobs
418+
419+
if (job_limit > 0 && n_jobs >= job_limit) {
420+
// The job limit parameter should be a large number, in the thousands or more perhaps, but small
421+
// enough so that indexing your largest collections doesn't fill up all of virtual memory.
422+
for(;;) {
423+
query<rsComm_t> qobj{comm_, JOB_QUERY_STRING, 1};
424+
for (const auto & row: qobj) {
425+
auto count = std::stol( row[0] );
426+
if (count > job_max) { throw job_limit_precision{}; }
427+
n_jobs = count;
428+
break;
429+
}
430+
// The approach to throttling is simply to wait until the number of delayed tasks falls
431+
// down to the LOW_WATER_MARK and then exit the wait loop to fill up the task queue again.
432+
// Because we're already in a delayed task, this does not impact the plugin's response time.
433+
if (n_jobs > LOW_WATER_MARK) {
434+
sleep(1);
435+
}
436+
else {
437+
break;
438+
}
351439
}
352440
}
353-
}
354-
355-
if ( ! (is_collection && _index_type == "full_text" )) {
356-
schedule_policy_event_for_object(
357-
policy_name,
358-
path.string(),
359-
_user_name,
360-
EMPTY_RESOURCE_NAME,
361-
_indexer,
362-
_index_name,
363-
_index_type,
364-
generate_delay_execution_parameters(),
365-
{},{},{},
366-
{{ "job_category_tag", unique_key }} );
367441

368-
++n_jobs;
442+
if ( Search_parent_tags ) {
443+
//if (_index_type == "metadata" ) {
444+
auto info_set = idx_info_cache.accum( path.string() );
445+
for (auto info: info_set) {
446+
irods::log( LOG_NOTICE, fmt::format("DWM - reindexing for perms - name/type/tech=[{}][{}][{}]",
447+
info.index_name(), info.index_type(), info.index_tech()) );
448+
schedule_policy_event_for_object(
449+
policy_name,
450+
path.string(),
451+
_user_name,
452+
EMPTY_RESOURCE_NAME,
453+
info.index_tech(),
454+
info.index_name(),
455+
info.index_type(),
456+
generate_delay_execution_parameters(),
457+
{},{},{},
458+
{{ "job_category_tag", unique_key }} );
459+
++n_jobs;
460+
}
461+
//} else { throw std::runtime_error {"Parent tags search mode but index type is not 'metadata'"}; }
462+
}
463+
else if ( ! (is_collection && _index_type == "full_text" )) { // full_text is meaningless for collection objects
464+
schedule_policy_event_for_object(
465+
policy_name,
466+
path.string(),
467+
_user_name,
468+
EMPTY_RESOURCE_NAME,
469+
_indexer,
470+
_index_name,
471+
_index_type,
472+
generate_delay_execution_parameters(),
473+
{},{},{},
474+
{{ "job_category_tag", unique_key }} );
475+
476+
++n_jobs;
477+
}
369478
}
370-
}
371-
catch(const exception& _e) {
372-
rodsLog(
373-
LOG_ERROR,
374-
"failed to find indexing resource (error code=[%ld]) for object [%s]",static_cast<long>(_e.code()),
375-
path.string().c_str());
376-
}
377-
catch (const std::runtime_error & e) {
378-
irods::log(LOG_ERROR,fmt::format("Abort indexing collection: {}",e.what()));
379-
break;
380-
}
381-
if (iter != iter_end) { path = iter->path(); }
382-
else { break; }
383-
} // if collection or data object
384-
} // for path
479+
catch(const exception& _e) {
480+
rodsLog(
481+
LOG_ERROR,
482+
"failed to find indexing resource (error code=[%ld]) for object [%s]",static_cast<long>(_e.code()),
483+
path.string().c_str());
484+
}
485+
catch (const std::runtime_error & e) {
486+
irods::log(LOG_ERROR,fmt::format("Abort indexing collection: {}",e.what()));
487+
break;
488+
}
489+
if (iter != iter_end) { path = iter->path(); }
490+
else { break; }
491+
} // if collection or data object
492+
} // for path
493+
}
494+
catch( const path_format_error & f ) {
495+
rodsLog(LOG_ERROR , "path_format_error DWM");
496+
}
497+
catch( const std::runtime_error & f ) {
498+
rodsLog(LOG_ERROR , "wrong index type DWM");
499+
}
385500
} // schedule_policy_events_for_collection
386501

387502
/*

0 commit comments

Comments
 (0)