Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@ parameter in spdk_bdev_get_opts function. Two fields `small_buf_pool_size` and
`large_buf_pool_size` were added into spdk_bdev_opts, which were used to determine
the small and large buffer pool size of the whole bdev module.

New API `spdk_bdev_get_caps` has been added, it allows to get extended bdev module
capabilities.

New API functions `spdk_bdev_readv_blocks_with_md_ext` and `spdk_bdev_writev_blocks_with_md_ext`
have been added. These function accept `spdk_bdev_ext_io_opts` structure with extended IO request
options. `opts_size` member of this structure must be set to valid value.

### blob

An `opts_size` element was added in the `spdk_bs_opts` structure to solve the
Expand Down
141 changes: 141 additions & 0 deletions include/spdk/bdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -891,6 +891,88 @@ int spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_c
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg);

/**
* Callback used to get a Memory Key per IO request
*
* pd is input parameter and should point to a memory domain
* mkey is an output value
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add comments for other arguments

*/
typedef int (*spdk_bdev_io_get_mkey)(void *cb_arg, void *address, size_t length, void *pd,
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use forward declaration of ibv_pd structure

uint32_t *mkey);
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return 2 keys - local and remote?
Ask if we need 2 keys


enum spdk_bdev_ext_io_opts_mem_types {
/** Memory in IO request belongs to another memory domain and it is described by Memory Key.
* If this value is set then \b mkey structure in spdk_bdev_ext_io_opts_mem_type contains a callback
* and its argument that can be used to get a Memory Key */
SPDK_BDEV_EXT_IO_OPTS_MEM_TYPE_MEMORY_KEY = 0,
};

struct spdk_bdev_ext_io_opts_mem_type {
/** This value determines which part of union should be used. Provides extensibility of this structure */
enum spdk_bdev_ext_io_opts_mem_types type;
union {
struct {
spdk_bdev_io_get_mkey get_mkey_cb;
void *get_mkey_cb_arg;
} mkey;
} u;
};

enum spdk_bdev_ext_io_opts_flags {
/** This flag determines the type of memory passed in IO request.
* Refer to \ref spdk_bdev_ext_io_opts_mem_types for more information.
* If this flag is set in spdk_bdev_ext_io_opts then \b mem_type member of \b spdk_bdev_ext_io_opt
* should point to a structure that describes memory buffer */
SPDK_BDEV_EXT_IO_OPTS_MEM_TYPE = 1u << 0,
};

/**
* Structure with optional IO request parameters
*/
struct spdk_bdev_ext_io_opts {
/** Combination of bits defined in \b enum spdk_bdev_ext_io_opts_flags */
uint64_t flags;
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

flags -> comp_mask

/** Describes type of the memory used in IO request. Applicable for block devices that report
* SPDK_BDEV_CAP_EXT_MEMORY_TYPE_MKEY capability in \ref spdk_bdev_get_caps function
* This structure must be filled by the user if \b SPDK_BDEV_EXT_IO_OPTS_MEM_TYPE bit is set
* in \b flags member */
struct spdk_bdev_ext_io_opts_mem_type *mem_type;
};

/**
* Submit a read request to the bdev on the given channel. This differs from
* spdk_bdev_read by allowing the data buffer to be described in a scatter
* gather list. Some physical devices place memory alignment requirements on
* data or metadata and may not be able to directly transfer into the buffers
* provided. In this case, the request may fail. This function uses separate
* buffer for metadata transfer (valid only if bdev supports this mode).
*
* \ingroup bdev_io_submit_functions
*
* \param desc Block device descriptor.
* \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
* \param iov A scatter gather list of buffers to be read into.
* \param iovcnt The number of elements in iov.
* \param md Metadata buffer.
* \param offset_blocks The offset, in blocks, from the start of the block device.
* \param num_blocks The number of blocks to read.
* \param cb Called when the request is complete.
* \param cb_arg Argument passed to cb.
* \param opts Optional structure with extended IO request options.
*
* \return 0 on success. On success, the callback will always
* be called (even if the request ultimately failed). Return
* negated errno on failure, in which case the callback will not be called.
* * -EINVAL - offset_blocks and/or num_blocks are out of range or separate
* metadata is not supported or opts_size is incorrect
* * -ENOMEM - spdk_bdev_io buffer cannot be allocated
*/
int spdk_bdev_readv_blocks_with_md_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt, void *md,
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg,
struct spdk_bdev_ext_io_opts *opts);

/**
* Submit a write request to the bdev on the given channel.
*
Expand Down Expand Up @@ -1060,6 +1142,41 @@ int spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg);

/**
* Submit a write request to the bdev on the given channel. This differs from
* spdk_bdev_write by allowing the data buffer to be described in a scatter
* gather list. Some physical devices place memory alignment requirements on
* data or metadata and may not be able to directly transfer out of the buffers
* provided. In this case, the request may fail. This function uses separate
* buffer for metadata transfer (valid only if bdev supports this mode).
*
* \ingroup bdev_io_submit_functions
*
* \param desc Block device descriptor.
* \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
* \param iov A scatter gather list of buffers to be written from.
* \param iovcnt The number of elements in iov.
* \param md Metadata buffer.
* \param offset_blocks The offset, in blocks, from the start of the block device.
* \param num_blocks The number of blocks to write.
* \param cb Called when the request is complete.
* \param cb_arg Argument passed to cb.
* \param opts Optional structure with extended IO request options.
*
* \return 0 on success. On success, the callback will always
* be called (even if the request ultimately failed). Return
* negated errno on failure, in which case the callback will not be called.
* * -EINVAL - offset_blocks and/or num_blocks are out of range or separate
* metadata is not supported or opts_size is incorrect
* * -ENOMEM - spdk_bdev_io buffer cannot be allocated
* * -EBADF - desc not open for writing
*/
int spdk_bdev_writev_blocks_with_md_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt, void *md,
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg,
struct spdk_bdev_ext_io_opts *opts);

/**
* Submit a compare request to the bdev on the given channel.
*
Expand Down Expand Up @@ -1733,6 +1850,30 @@ void spdk_bdev_histogram_get(struct spdk_bdev *bdev, struct spdk_histogram_data
size_t spdk_bdev_get_media_events(struct spdk_bdev_desc *bdev_desc,
struct spdk_bdev_media_event *events, size_t max_events);

enum spdk_bdev_capability_type {
/** Bdev supports indirect memory access using Memory Key.
* That means that the user of ext bdev API can fill spdk_bdev_ext_io_opts_mem_type
* structure and set SPDK_BDEV_EXT_IO_OPTS_MEM_TYPE flag in spdk_bdev_ext_io_opts structure */
SPDK_BDEV_CAP_EXT_MEMORY_TYPE_MKEY = 1u << 0u,
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/** Bdev supports indirect memory access using Memory Key.

  • That means that the user of ext bdev API can fill spdk_bdev_ext_io_opts_mem_type
  • structure and set SPDK_BDEV_EXT_IO_OPTS_MEM_TYPE flag in spdk_bdev_ext_io_opts structure
  • That also means that bdev can work with regular memory buffers */

};

/** Describes capabilities of Block device */
struct spdk_bdev_capability {
/** Size of this structure in bytes, should be set by the user */
size_t size;
/** bitwise combination of \ref spdk_bdev_capability_type */
uint64_t flags;
};

/**
* Get bdev capabilities
*
* \param bdev Block device
* \param caps Capabilities of Block device to be filled by this function
* \return 0 on success, negated errno on failure.
*/
int spdk_bdev_get_caps(struct spdk_bdev *bdev, struct spdk_bdev_capability *caps);
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rename to
spdk_bdev_get_ext_caps


#ifdef __cplusplus
}
#endif
Expand Down
11 changes: 11 additions & 0 deletions include/spdk/bdev_module.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,9 @@ struct spdk_bdev_fn_table {

/** Get bdev module context. */
void *(*get_module_ctx)(void *ctx);

/** Get block device capabilities */
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

extended capabilities

void (*get_caps)(void *ctx, struct spdk_bdev_capability *caps);
};

/** bdev I/O completion status */
Expand Down Expand Up @@ -685,6 +688,14 @@ struct spdk_bdev_io {

/** Enables queuing parent I/O when no bdev_ios available for split children. */
struct spdk_bdev_io_wait_entry waitq_entry;

/** Copy of structure passed by the user in ext API */
struct spdk_bdev_ext_io_opts ext_opts;

/** Contains callbacks passed by the user in ext API. Content of this structure is valid if
* SPDK_BDEV_EXT_IO_OPTS_MEM_TYPE flag is set in \b ext_opts */
struct spdk_bdev_ext_io_opts_mem_type mem_type;

} internal;

/**
Expand Down
121 changes: 121 additions & 0 deletions include/spdk/nvme.h
Original file line number Diff line number Diff line change
Expand Up @@ -2545,6 +2545,90 @@ int spdk_nvme_ns_cmd_writev_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qp
spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata,
uint16_t apptag_mask, uint16_t apptag);

/**
* Callback used to get a Memory Key per IO request
*
* pd is input parameter and should point to a memory domain
* mkey is an output value
*/
typedef int (*spdk_nvme_ns_cmd_io_get_mkey)(void *cb_arg, void *address, size_t length, void *pd,
uint32_t *mkey);

enum spdk_nvme_ns_cmd_ext_io_opts_mem_types {
/** Memory in IO request belongs to another memory domain and it is described by Memory Key.
* If this value is set then \b mkey structure in spdk_nvme_ns_cmd_ext_io_opts_mem_type contains
* a callback and its argument that can be used to get a Memory Key */
SPDK_NVME_NS_CMD_EXT_IO_OPTS_MEM_TYPE_MEMORY_KEY = 0,
};

struct spdk_nvme_ns_cmd_ext_io_opts_mem_type {
/** This value determines which part of union should be used. Provides extensibility for this structure */
enum spdk_nvme_ns_cmd_ext_io_opts_mem_types type;
union {
struct {
spdk_nvme_ns_cmd_io_get_mkey get_mkey_cb;
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add void *cb_arg

} mkey;
} u;
};

enum spdk_nvme_ns_cmd_ext_io_opts_flags {
/** This flag determines the type of memory passed in IO request.
* Refer to \ref spdk_nvme_ns_cmd_ext_io_opts_mem_types for more information.
* If this flag is set in spdk_nvme_ns_cmd_ext_io_opts then \b mem_type member of
* \b spdk_nvme_ns_cmd_ext_io_opts should point to a structure that describes memory buffer */
SPDK_NVME_NS_CMD_EXT_IO_OPTS_MEM_TYPE = 1u << 0,
};

/**
* Structure with optional IO request parameters
*/
struct spdk_nvme_ns_cmd_ext_io_opts {
/** Combination of bits defined in \b enum spdk_nvme_ns_cmd_ext_io_opts_flags */
uint64_t flags;
/** Describes type of the memory used in IO request
* This structure must be filled by the user if \b SPDK_NVME_NS_CMD_EXT_IO_OPTS_MEM_TYPE bit is set
* in \b flags member. Used by RDMA transport, other transports ignore this extension */
struct spdk_nvme_ns_cmd_ext_io_opts_mem_type *mem_type;
};

/**
* Submit a write I/O to the specified NVMe namespace.
*
* The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
* The user must ensure that only one thread submits I/O on a given qpair at any
* given time.
*
* \param ns NVMe namespace to submit the write I/O
* \param qpair I/O queue pair to submit the request
* \param lba starting LBA to write the data
* \param lba_count length (in sectors) for the write operation
* \param cb_fn callback function to invoke when the I/O is completed
* \param cb_arg argument to pass to the callback function
* \param io_flags set flags, defined in nvme_spec.h, for this I/O
* \param reset_sgl_fn callback function to reset scattered payload
* \param next_sge_fn callback function to iterate each scattered
* payload memory segment
* \param metadata virtual address pointer to the metadata payload, the length
* of metadata is specified by spdk_nvme_ns_get_md_size()
* \param apptag_mask application tag mask.
* \param apptag application tag to use end-to-end protection information.
* \param opts Optional structure with extended IO request options.
*
* \return 0 if successfully submitted, negated errnos on the following error conditions:
* -EINVAL: The request is malformed.
* -ENOMEM: The request cannot be allocated.
* -ENXIO: The qpair is failed at the transport level.
* -EFAULT: Invalid address was specified as part of payload. cb_fn is also called
* with error status including dnr=1 in this case.
*/
int spdk_nvme_ns_cmd_writev_with_md_ext(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
uint64_t lba, uint32_t lba_count,
spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags,
spdk_nvme_req_reset_sgl_cb reset_sgl_fn,
spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata,
uint16_t apptag_mask, uint16_t apptag,
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Move apptag_mask, apptag and io_flags to ext_opts structure

struct spdk_nvme_ns_cmd_ext_io_opts *opts);

/**
* Submit a write I/O to the specified NVMe namespace.
*
Expand Down Expand Up @@ -2725,6 +2809,43 @@ int spdk_nvme_ns_cmd_readv_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpa
spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata,
uint16_t apptag_mask, uint16_t apptag);

/**
* Submit a read I/O to the specified NVMe namespace.
*
* The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
* The user must ensure that only one thread submits I/O on a given qpair at any given time.
*
* \param ns NVMe namespace to submit the read I/O
* \param qpair I/O queue pair to submit the request
* \param lba starting LBA to read the data
* \param lba_count length (in sectors) for the read operation
* \param cb_fn callback function to invoke when the I/O is completed
* \param cb_arg argument to pass to the callback function
* \param io_flags set flags, defined in nvme_spec.h, for this I/O
* \param reset_sgl_fn callback function to reset scattered payload
* \param next_sge_fn callback function to iterate each scattered
* payload memory segment
* \param metadata virtual address pointer to the metadata payload, the length
* of metadata is specified by spdk_nvme_ns_get_md_size()
* \param apptag_mask application tag mask.
* \param apptag application tag to use end-to-end protection information.
* \param opts Optional structure with extended IO request options.
*
* \return 0 if successfully submitted, negated errnos on the following error conditions:
* -EINVAL: The request is malformed.
* -ENOMEM: The request cannot be allocated.
* -ENXIO: The qpair is failed at the transport level.
* -EFAULT: Invalid address was specified as part of payload. cb_fn is also called
* with error status including dnr=1 in this case.
*/
int spdk_nvme_ns_cmd_readv_with_md_ext(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
uint64_t lba, uint32_t lba_count,
spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags,
spdk_nvme_req_reset_sgl_cb reset_sgl_fn,
spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata,
uint16_t apptag_mask, uint16_t apptag,
struct spdk_nvme_ns_cmd_ext_io_opts *opts);

/**
* Submits a read I/O to the specified NVMe namespace.
*
Expand Down
Loading