Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 35 additions & 15 deletions src/scr.c
Original file line number Diff line number Diff line change
Expand Up @@ -1593,9 +1593,11 @@ static int scr_start_output(const char* name, int flags)
return SCR_SUCCESS;
}

/* detect files that have been registered by more than one process,
* drop filemap entries from all but one process */
static int scr_assign_ownership(scr_filemap* map, const scr_reddesc* rd)
/* rank ralative ownership of the filemap since multiple processes may
* have been writing to it. The rankings are used later for portions of
* the SCR implementation that require access to be made in the context of a
* single process. */
static int scr_rank_ownership(scr_filemap* map, const scr_reddesc* rd)
{
int rc = SCR_SUCCESS;

Expand Down Expand Up @@ -1667,7 +1669,14 @@ static int scr_assign_ownership(scr_filemap* map, const scr_reddesc* rd)
/* keep rank 0 for each file as its owner, remove any entry from the filemap
* for which we are not rank 0 */
int multiple_owner = 0;

for (i = 0; i < count; i++) {
scr_meta* meta = scr_meta_new();

scr_filemap_get_meta(map, mapfiles[i], meta);
scr_meta_set_group_ranks(meta, group_ranks[i]);
scr_meta_set_group_rank(meta, group_rank[i]);

/* check whether this file exists on multiple ranks */
if (group_ranks[i] > 1) {
/* found the same file on more than one rank */
Expand All @@ -1681,11 +1690,8 @@ static int scr_assign_ownership(scr_filemap* map, const scr_reddesc* rd)
}
}

/* only keep entry for this file in filemap if we're the
* first rank in the set of ranks that have this file */
if (group_rank[i] != 0) {
scr_filemap_remove_file(map, mapfiles[i]);
}
scr_filemap_set_meta(map, mapfiles[i], meta);
scr_meta_delete(&meta);
}

/* fatal error if any file is on more than one rank
Expand Down Expand Up @@ -1737,13 +1743,16 @@ static int scr_complete_output(int valid)
time_start = MPI_Wtime();
}

/* When using bypass mode or shared cache, we allow different procs to write to the same file,
* in which case, both should have registered the file in Route_file and thus
* have an entry in the file map. The proper thing to do here is to list the
* set of ranks that share a file, however, that requires fixing up lots of
* other parts of the code. For now, ensure that at most one rank lists the
* file in their file map. */
rc = scr_assign_ownership(scr_map, scr_rd);
/* When using bypass mode or shared cache, we allow different procs to write
* to the same file, in which case, both should have registered the file in
* Route_file and thus have an entry in the file map.
*
* Calling scr_rank_ownership allow us to distinguish between the files that
* were created by a single process versus the ones that are shared between
* many. Further, this function also designate a single process (rank) that
* may be used in the cases where there needs to be only once process
* doing work on the file. */
rc = scr_rank_ownership(scr_map, scr_rd);

/* count number of files, number of bytes, and record filesize for each file
* as written by this process */
Expand All @@ -1756,6 +1765,15 @@ static int scr_complete_output(int valid)
{
/* get the filename */
char* file = kvtree_elem_key(elem);

/*
* For now, we continue to process files as if they are only written by
* a single process. We will open this up soon once we have updated
* AXL to take advantage of it. */
if ( ! scr_leader_rank(scr_map, file) ) {
continue;
}

my_counts[0]++;

/* start with valid flag from caller for this file */
Expand Down Expand Up @@ -1787,6 +1805,7 @@ static int scr_complete_output(int valid)
/* fill in filesize and complete flag in the meta data for the file */
scr_meta* meta = scr_meta_new();
scr_filemap_get_meta(scr_map, file, meta);

scr_meta_set_filesize(meta, filesize);
scr_meta_set_complete(meta, file_valid);
if (stat_rc == 0) {
Expand Down Expand Up @@ -1879,6 +1898,7 @@ static int scr_complete_output(int valid)
}

/* apply redundancy scheme if we're still valid */

if (rc == SCR_SUCCESS) {
rc = scr_reddesc_apply(scr_map, scr_rd, scr_dataset_id);
}
Expand Down
8 changes: 8 additions & 0 deletions src/scr_cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,10 @@ int scr_cache_delete(scr_cache_index* cindex, int id)
/* get the filename */
char* file = kvtree_elem_key(file_elem);

if (! scr_leader_rank(map, file) ) {
continue;
}

/* verify that file mtime and ctime have not changed since scr_complete_output,
* which could idenitfy a bug in the user's code */
struct stat statbuf;
Expand Down Expand Up @@ -653,6 +657,10 @@ int scr_cache_check_files(const scr_cache_index* cindex, int id)
/* get the filename */
char* file = kvtree_elem_key(file_elem);

if ( ! scr_leader_rank(map, file) ) {
continue;
}

/* check that we can read the file */
if (scr_file_is_readable(file) != SCR_SUCCESS) {
failed_read = 1;
Expand Down
4 changes: 4 additions & 0 deletions src/scr_flush.c
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,10 @@ int scr_flush_prepare(const scr_cache_index* cindex, int id, kvtree* file_list)
/* get the filename */
char* file = kvtree_elem_key(elem);

if ( ! scr_leader_rank(map, file) ) {
continue;
}

/* read meta data for file and attach it to file list */
scr_meta* meta = scr_meta_new();
if (scr_filemap_get_meta(map, file, meta) == SCR_SUCCESS) {
Expand Down
2 changes: 2 additions & 0 deletions src/scr_keys.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,8 @@ Define common hash key strings
#define SCR_META_KEY_CKPT ("CKPT")
#define SCR_META_KEY_RANKS ("RANKS")
#define SCR_META_KEY_RANK ("RANK")
#define SCR_META_KEY_GROUP_RANKS ("GROUP_RANKS")
#define SCR_META_KEY_GROUP_RANK ("GROUP_RANK")
#define SCR_META_KEY_ORIG ("ORIG")
#define SCR_META_KEY_PATH ("PATH")
#define SCR_META_KEY_NAME ("NAME")
Expand Down
28 changes: 28 additions & 0 deletions src/scr_meta.c
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,20 @@ int scr_meta_set_crc32(scr_meta* meta, uLong crc)
return (rc == KVTREE_SUCCESS) ? SCR_SUCCESS : SCR_FAILURE;
}

/* set the ranks in the group for file */
int scr_meta_set_group_ranks(scr_meta* meta, int group_ranks)
{
int rc = kvtree_util_set_int(meta, SCR_META_KEY_GROUP_RANKS, group_ranks);
return (rc == KVTREE_SUCCESS) ? SCR_SUCCESS : SCR_FAILURE;
}

/* set the rank relative to the group for file */
int scr_meta_set_group_rank(scr_meta* meta, int group_rank)
{
int rc = kvtree_util_set_int(meta, SCR_META_KEY_GROUP_RANK, group_rank);
return (rc == KVTREE_SUCCESS) ? SCR_SUCCESS : SCR_FAILURE;
}

static void scr_stat_get_atimes(const struct stat* sb, uint64_t* secs, uint64_t* nsecs)
{
*secs = (uint64_t) sb->st_atime;
Expand Down Expand Up @@ -306,6 +320,20 @@ int scr_meta_get_crc32(const scr_meta* meta, uLong* crc)
return (rc == KVTREE_SUCCESS) ? SCR_SUCCESS : SCR_FAILURE;
}

/* get the ranks in the group for file */
int scr_meta_get_group_ranks(const scr_meta* meta, int* group_ranks)
{
int rc = kvtree_util_get_int(meta, SCR_META_KEY_GROUP_RANKS, group_ranks);
return (rc == KVTREE_SUCCESS) ? SCR_SUCCESS : SCR_FAILURE;
}

/* get the rank relative to the group for file */
int scr_meta_get_group_rank(const scr_meta* meta, int* group_rank)
{
int rc = kvtree_util_get_int(meta, SCR_META_KEY_GROUP_RANK, group_rank);
return (rc == KVTREE_SUCCESS) ? SCR_SUCCESS : SCR_FAILURE;
}

/*
=========================================
Check field values
Expand Down
12 changes: 12 additions & 0 deletions src/scr_meta.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,12 @@ int scr_meta_set_stat(scr_meta* meta, struct stat* statbuf);
/* set the crc32 field on meta */
int scr_meta_set_crc32(scr_meta* meta, uLong crc);

/* set the ranks in the group for file */
int scr_meta_set_group_ranks(scr_meta* meta, int group_ranks);

/* set the rank relative to the group for file */
int scr_meta_set_group_rank(scr_meta* meta, int group_rank);

/*
=========================================
Get field values
Expand Down Expand Up @@ -130,6 +136,12 @@ int scr_meta_get_complete(const scr_meta* meta, int* complete);
/* get the crc32 field in meta data, returns SCR_SUCCESS if a field is set */
int scr_meta_get_crc32(const scr_meta* meta, uLong* crc);

/* get the ranks in the group for file */
int scr_meta_get_group_ranks(const scr_meta* meta, int* group_ranks);

/* get the rank relative to the group for file */
int scr_meta_get_group_rank(const scr_meta* meta, int* group_rank);

/*
=========================================
Check field values
Expand Down
10 changes: 10 additions & 0 deletions src/scr_reddesc.c
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,11 @@ int scr_reddesc_apply(
/* get the filename */
char* file = kvtree_elem_key(file_elem);

/* Skip over shared files that we are not leaders of */
if ( ! scr_leader_rank(map, file) ) {
continue;
}

/* check the file */
if (! scr_bool_have_file(map, file)) {
scr_dbg(2, "File determined to be invalid: %s", file);
Expand Down Expand Up @@ -638,6 +643,11 @@ int scr_reddesc_apply(
/* get the filename */
char* file = kvtree_elem_key(file_elem);

/* Skip over shared files that we are not leaders of */
if ( ! scr_leader_rank(map, file) ) {
continue;
}

/* add file to the set */
if (ER_Add(set_id, file) != ER_SUCCESS) {
scr_err("Failed to add file to ER set: %s @ %s:%d", file, __FILE__, __LINE__);
Expand Down
21 changes: 21 additions & 0 deletions src/scr_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "scr.h"
#include "scr_err.h"
#include "scr_io.h"
#include "scr_filemap.h"
#include "scr_util.h"

#include <stdlib.h>
Expand Down Expand Up @@ -389,3 +390,23 @@ spath* scr_get_prefix(const char* str)

return prefix_path;
}

/* TODO: This needs to be moved somewhere else and probabbly renamed as well
*/
int scr_leader_rank(scr_filemap* map, const char *file)
{
scr_meta* meta = scr_meta_new();
int group_ranks;
int group_rank;

scr_filemap_get_meta(map, file, meta);
scr_meta_get_group_ranks(meta, &group_ranks);
scr_meta_get_group_rank(meta, &group_rank);

scr_meta_delete(&meta);

int rc = group_ranks == 1 || group_rank == 0;

return rc;
}

4 changes: 4 additions & 0 deletions src/scr_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@

#include "spath.h"
#include "kvtree.h"
#include "scr_filemap.h"

/* given a string, convert it to a double and write that value to val */
int scr_atod(const char* str, double* val);
Expand Down Expand Up @@ -82,4 +83,7 @@ int kvtree_write_path(const spath* path, const kvtree* tree);
* return spath of fully qualified path, user should free */
spath* scr_get_prefix(const char* prefix);

/* TODO: This needs to be moved somewhere else and probabbly renamed as well */
int scr_leader_rank(scr_filemap* map, const char *file);

#endif