diff --git a/tests/basic/ec/ec-inode-read-mask.t b/tests/basic/ec/ec-inode-read-mask.t new file mode 100644 index 0000000000..756fbdf4a4 --- /dev/null +++ b/tests/basic/ec/ec-inode-read-mask.t @@ -0,0 +1,166 @@ +#!/bin/bash +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../ec.rc + +EC_READMASK_XATTR="glusterfs.ec.readmask" + +# Parsing XML file to get count of READ calls made on bricks +get_brick_reads() { + local xml_file="$1" + local -n result_array=$2 # Reference to output array + + local index=0 + local brick_reads=0 + local inside_fop=0 + local read_hits=0 + local brick_found=0 + + while IFS= read -r line; do + if [[ $line =~ "" ]]; then + if ((brick_found)); then + result_array[index]=$brick_reads + ((index++)) + fi + brick_found=1 + brick_reads=0 + fi + + if [[ $line =~ "" ]]; then + inside_fop=1 + read_hits=0 + fi + + if [[ $inside_fop -eq 1 && $line =~ "READ" ]]; then + read_hits=1 + fi + + if [[ $inside_fop -eq 1 && $line =~ "" ]]; then + if ((read_hits)); then + brick_reads=$(echo "$line" | sed -E 's/.*([0-9]+)<\/hits>.*/\1/') + fi + fi + + if [[ $line =~ "" ]]; then + inside_fop=0 + fi + done < "$xml_file" + + if ((brick_found)); then + result_array[index]=$brick_reads + fi +} + +#Function to compare read_count arrays and verify that only specified bricks have modified read values +compare_arrays() { + local -n arr1=$1 + local -n arr2=$2 + local -a indices=("${@:3}") + + local length1=${#arr1[@]} + local length2=${#arr2[@]} + + + if [[ $length1 -ne $length2 ]]; then + echo "Array lengths differ" + return 1 + fi + + + local -A changed_indices + + for i in "${!arr1[@]}"; do + if [[ "${arr1[i]}" -ne "${arr2[i]}" ]]; then + changed_indices[$i]=1 + fi + done + + for i in "${!changed_indices[@]}"; do + if [[ ! " ${indices[@]} " =~ " $i " ]]; then + echo "Unexpected change at index $i" + return 1 + fi + done + + echo "Only specified indices changed" + return 0 +} + +validate_read() { + local mask="$1" + local space_sep_values="${mask//:/ }" + + $CLI volume profile $V0 info --xml > $tmpdir/preread.xml + local -a brick_reads_array_old + get_brick_reads $tmpdir/preread.xml brick_reads_array_old + echo ${brick_reads_array_old[@]} + + # Set readmask to bricks 0, 1, 3, 5, 8, 9 + if ! setfattr -n "$EC_READMASK_XATTR" -v "$mask" "$M0/newfile"; then + echo "Failed to set readmask xattr" + return 1 + fi + + dd if="$M0/newfile" of=/dev/null iflag=direct bs=4M + + sleep 1 + + $CLI volume profile $V0 info --xml > "$tmpdir/after_mask.xml" + local -a brick_reads_array_new + get_brick_reads "$tmpdir/after_mask.xml" brick_reads_array_new + echo "After readmask: ${brick_reads_array_new[@]}" + + compare_arrays brick_reads_array_new brick_reads_array_old "${space_sep_values[@]}" + return $? +} + +#Setup +cleanup +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info + +TEST mkdir -p $B0/${V0}{0,1,2,3,4,5,6,7,8,9} +TEST $CLI volume create $V0 disperse 10 redundancy 4 $H0:$B0/${V0}{0,1,2,3,4,5,6,7,8,9} + +EXPECT "$V0" volinfo_field $V0 'Volume Name' +EXPECT 'Created' volinfo_field $V0 'Status' +EXPECT '10' brick_count $V0 + +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status' + +# Mount FUSE with caching disabled +TEST $GFS -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "10" ec_child_up_count $V0 0 + +TEST $CLI volume profile $V0 start + + +# Create file +TEST dd if=/dev/urandom of=$M0/newfile bs=4M count=5 + +# Read without setting readmask xattr should not fail +TEST dd if=$M0/newfile of=/dev/null iflag=direct bs=4M + +# Create temporary directory +tmpdir=$(mktemp -d -t ${0##*/}.XXXXXX) + + +# Test 1: Read with mask set to bricks 0, 1, 3, 5, 8, 9 +TEST validate_read "0:1:3:5:8:9" + +# Test 2: Read with mask set to bricks 4, 5, 6, 7, 8, 9 +TEST validate_read "4:5:6:7:8:9" + +# Test 3: setfattr wont set invalid read_masks +TEST ! setfattr -n $EC_READMASK_XATTR -v "1:sm:snb:adi:as" $M0/newfile + +TEST ! setfattr -n $EC_READMASK_XATTR -v "0:1::2ab" $M0/newfile + +# Test 4: setfattr wont set read_mask in case insufficient bricks are provided +TEST ! setfattr -n $EC_READMASK_XATTR -v "0:1:2:3" $M0/newfile +TEST ! setfattr -n $EC_READMASK_XATTR -v "4:5" $M0/newfile + +rm -rf "$tmpdir" +cleanup; \ No newline at end of file diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c index 3124ebbad1..1cd5ef39c9 100644 --- a/xlators/cluster/ec/src/ec-helpers.c +++ b/xlators/cluster/ec/src/ec-helpers.c @@ -734,6 +734,40 @@ ec_inode_get(inode_t *inode, xlator_t *xl) return ctx; } +void +ec_inode_readmask_set(inode_t *inode, xlator_t *xl, uintptr_t read_mask) +{ + ec_inode_t *ctx = NULL; + + LOCK(&inode->lock); + + ctx = __ec_inode_get(inode, xl); + + if (ctx) + ctx->read_mask = read_mask; + + UNLOCK(&inode->lock); + +} + +uintptr_t +ec_inode_readmask_get(inode_t *inode, xlator_t *xl) +{ + ec_inode_t *ctx = NULL; + uintptr_t read_mask = 0; + + LOCK(&inode->lock); + + ctx = __ec_inode_get(inode, xl); + + if (ctx) + read_mask = ctx->read_mask; + + UNLOCK(&inode->lock); + + return read_mask; +} + ec_fd_t * __ec_fd_get(fd_t *fd, xlator_t *xl) { @@ -834,3 +868,71 @@ ec_is_metadata_fop (int32_t lock_kind, glusterfs_fop_t fop) } return _gf_false; }*/ + +gf_boolean_t +ec_is_readmask_xattr(dict_t *dict) +{ + data_t *dict_data = NULL; + if (dict_lookup(dict, EC_XATTR_READMASK, &dict_data) == 0){ + return _gf_true; + } + return _gf_false; +} + +uintptr_t +ec_parse_read_mask(ec_t *ec, char *read_mask_str, uintptr_t *read_mask_ptr, int32_t *op_errno_ptr, uint64_t msgid) +{ + char *mask = NULL; + char *maskptr = NULL; + char *saveptr = NULL; + char *id_str = NULL; + uintptr_t read_mask = 0; + int id = 0; + int ret = -1; + + mask = gf_strdup(read_mask_str); + if (!mask) { + *op_errno_ptr = ENOMEM; + goto out; + } + maskptr = mask; + + for (;;) { + id_str = strtok_r(maskptr, ":", &saveptr); + if (id_str == NULL) + break; + if (gf_string2int(id_str, &id)) { + gf_msg(ec->xl->name, GF_LOG_ERROR, 0, msgid, + "In read-mask \"%s\" id %s is not a valid integer", + read_mask_str, id_str); + + *op_errno_ptr = EINVAL; + goto out; + } + + if ((id < 0) || (id >= ec->nodes)) { + gf_msg(ec->xl->name, GF_LOG_ERROR, 0, msgid, + "In read-mask \"%s\" id %d is not in range [0 - %d]", + read_mask_str, id, ec->nodes - 1); + + *op_errno_ptr = EINVAL; + goto out; + } + read_mask |= (1UL << id); + maskptr = NULL; + } + + if (gf_bits_count(read_mask) < ec->fragments) { + gf_msg(ec->xl->name, GF_LOG_ERROR, 0, msgid, + "read-mask \"%s\" should contain at least %d ids", read_mask_str, + ec->fragments); + + *op_errno_ptr = EINVAL; + goto out; + } + *read_mask_ptr = read_mask; + ret = 0; +out: + GF_FREE(mask); + return ret; +} diff --git a/xlators/cluster/ec/src/ec-helpers.h b/xlators/cluster/ec/src/ec-helpers.h index 3a461b26cc..9b6d8d3b7b 100644 --- a/xlators/cluster/ec/src/ec-helpers.h +++ b/xlators/cluster/ec/src/ec-helpers.h @@ -70,6 +70,11 @@ __ec_fd_get(fd_t *fd, xlator_t *xl); ec_fd_t * ec_fd_get(fd_t *fd, xlator_t *xl); +void +ec_inode_readmask_set(inode_t *inode, xlator_t *xl, uintptr_t read_mask); +uintptr_t +ec_inode_readmask_get(inode_t *inode, xlator_t *xl); + static inline uint32_t ec_adjust_size_down(ec_t *ec, uint64_t *value, gf_boolean_t scale) { @@ -192,4 +197,10 @@ ec_filter_internal_xattrs(dict_t *xattr); int32_t ec_launch_replace_heal(ec_t *ec); +gf_boolean_t +ec_is_readmask_xattr(dict_t *dict); + +uintptr_t +ec_parse_read_mask(ec_t * ec, char *read_mask_str, uintptr_t *read_mask_ptr, int32_t *op_errno_ptr, uint64_t msgid); + #endif /* __EC_HELPERS_H__ */ diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c index dad5f4d701..0e177088b0 100644 --- a/xlators/cluster/ec/src/ec-inode-read.c +++ b/xlators/cluster/ec/src/ec-inode-read.c @@ -1372,9 +1372,14 @@ ec_manager_readv(ec_fop_data_t *fop, int32_t state) return EC_STATE_DISPATCH; case EC_STATE_DISPATCH: - if (ec->read_mask) { + uintptr_t inode_read_mask = ec_inode_readmask_get(fop->fd->inode, fop->xl); + if (inode_read_mask != 0) { + fop->mask &= inode_read_mask; + } + else if (ec->read_mask) { fop->mask &= ec->read_mask; } + ec_dispatch_min(fop); return EC_STATE_PREPARE_ANSWER; diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h index 72e98f1128..aadb4d6ffd 100644 --- a/xlators/cluster/ec/src/ec-messages.h +++ b/xlators/cluster/ec/src/ec-messages.h @@ -56,6 +56,6 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL, EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED, EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED, EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED, - EC_MSG_THREAD_CLEANUP_FAILED, EC_MSG_FD_BAD); + EC_MSG_THREAD_CLEANUP_FAILED, EC_MSG_FD_BAD, EC_MSG_INVALID_READMASK); #endif /* !_EC_MESSAGES_H_ */ diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h index c89af3d355..f23fcd0567 100644 --- a/xlators/cluster/ec/src/ec-types.h +++ b/xlators/cluster/ec/src/ec-types.h @@ -187,6 +187,7 @@ struct _ec_inode { struct list_head heal; ec_stripe_list_t stripe_cache; uint64_t bad_version; + uintptr_t read_mask; }; typedef int32_t (*fop_heal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t, diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c index 71c60ad72b..d1510fccc8 100644 --- a/xlators/cluster/ec/src/ec.c +++ b/xlators/cluster/ec/src/ec.c @@ -716,61 +716,22 @@ ec_statistics_init(ec_t *ec) static int ec_assign_read_mask(ec_t *ec, char *read_mask_str) { - char *mask = NULL; - char *maskptr = NULL; - char *saveptr = NULL; - char *id_str = NULL; - int id = 0; int ret = 0; + int op_errno = 0; uintptr_t read_mask = 0; if (!read_mask_str) { ec->read_mask = 0; - ret = 0; goto out; } - mask = gf_strdup(read_mask_str); - if (!mask) { - ret = -1; - goto out; - } - maskptr = mask; - - for (;;) { - id_str = strtok_r(maskptr, ":", &saveptr); - if (id_str == NULL) - break; - if (gf_string2int(id_str, &id)) { - gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL, - "In read-mask \"%s\" id %s is not a valid integer", - read_mask_str, id_str); - ret = -1; - goto out; - } - - if ((id < 0) || (id >= ec->nodes)) { - gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL, - "In read-mask \"%s\" id %d is not in range [0 - %d]", - read_mask_str, id, ec->nodes - 1); - ret = -1; - goto out; - } - read_mask |= (1UL << id); - maskptr = NULL; - } - - if (gf_bits_count(read_mask) < ec->fragments) { - gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL, - "read-mask \"%s\" should contain at least %d ids", read_mask_str, - ec->fragments); - ret = -1; + ret = ec_parse_read_mask(ec, read_mask_str, &read_mask, &op_errno, EC_MSG_XLATOR_INIT_FAIL); + if (ret < 0) { goto out; } + ec->read_mask = read_mask; - ret = 0; out: - GF_FREE(mask); return ret; } @@ -1321,14 +1282,57 @@ ec_gf_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, return 0; } +int32_t +ec_handle_readmask(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + ec_t* ec = this->private; + data_t *dict_data = NULL; + char *read_mask_str = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + uintptr_t read_mask = 0; + int ret = 0; + + if (!ec_is_readmask_xattr(dict)) { + return -1; + } + + ret = dict_get_str_sizen(dict, EC_XATTR_READMASK, &read_mask_str); + if (ret < 0) { + goto out; + } + + if (strcmp(read_mask_str, EC_DEFAULT_INODE_READ_MASK) == 0) { + ec_inode_readmask_set(loc->inode, this, read_mask); + op_ret = op_errno = 0; + goto out; + } + + ret = ec_parse_read_mask(ec, read_mask_str, &read_mask, &op_errno, EC_MSG_INVALID_READMASK); + if (ret < 0) { + op_ret = -1; + goto out; + } + ec_inode_readmask_set(loc->inode, this, read_mask); + op_ret = op_errno = 0; + +out: + STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xdata); + return 0; +} + int32_t ec_gf_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata) { int error = 0; - + EC_INTERNAL_XATTR_OR_GOTO("", dict, error, out); + if (ec_handle_readmask(frame, this, loc, dict, flags, xdata) == 0) + return 0; + ec_setxattr(frame, this, -1, EC_MINIMUM_MIN, default_setxattr_cbk, NULL, loc, dict, flags, xdata); diff --git a/xlators/cluster/ec/src/ec.h b/xlators/cluster/ec/src/ec.h index 6f6de6d598..55c07a3053 100644 --- a/xlators/cluster/ec/src/ec.h +++ b/xlators/cluster/ec/src/ec.h @@ -20,9 +20,11 @@ #define EC_XATTR_HEAL EC_XATTR_PREFIX "heal" #define EC_XATTR_HEAL_NEW EC_XATTR_PREFIX "heal-new" #define EC_XATTR_DIRTY EC_XATTR_PREFIX "dirty" +#define EC_XATTR_READMASK "glusterfs.ec.readmask" #define EC_STRIPE_CACHE_MAX_SIZE 10 #define EC_VERSION_SIZE 2 #define EC_SHD_INODE_LRU_LIMIT 10 +#define EC_DEFAULT_INODE_READ_MASK ":" #define EC_MAX_FRAGMENTS EC_METHOD_MAX_FRAGMENTS /* The maximum number of nodes is derived from the maximum allowed fragments