Skip to content

Commit 249910a

Browse files
author
mayastor-bors
committed
Merge #1902
1902: ci: update github runners to oci gh arc runners r=tiagolobocastro a=tiagolobocastro CNCF has hosted ephemeral GitHub runners on Oracle instead of the GitHub-hosted ones, which now incur a cost to use. Co-authored-by: mayastor-bors <[email protected]>
2 parents c94de54 + 603647e commit 249910a

File tree

18 files changed

+293
-28
lines changed

18 files changed

+293
-28
lines changed

.github/workflows/bdd.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ env:
99

1010
jobs:
1111
bdd-tests:
12-
runs-on: ubuntu-latest-16-cores
12+
runs-on: oracle-vm-16cpu-64gb-x86-64
1313
permissions:
1414
contents: read
1515
id-token: write
@@ -38,7 +38,7 @@ jobs:
3838

3939
- name: Pre-populate nix-shell
4040
run: |
41-
export NIX_PATH=nixpkgs=$(jq '.nixpkgs.url' nix/sources.json -r)
41+
export NIX_PATH=nixpkgs=$(jq '.nixpkgs.url' spdk-rs/nix/sources.json -r)
4242
echo "NIX_PATH=$NIX_PATH" >> $GITHUB_ENV
4343
nix-shell --run "echo" shell.nix
4444
@@ -61,6 +61,8 @@ jobs:
6161
done
6262
# for the coredump check
6363
sudo apt-get install gdb
64+
# Check if nvme config is valid, otherwise apply it
65+
sudo ./scripts/nvme-conf.sh --check --apply --overwrite
6466
6567
- name: Setup VENV
6668
run: nix-shell --run "./test/python/setup.sh"

.github/workflows/unit-int.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ env:
99

1010
jobs:
1111
int-tests:
12-
runs-on: ubuntu-latest-16-cores
12+
runs-on: oracle-vm-16cpu-64gb-x86-64
1313
permissions:
1414
contents: read
1515
id-token: write
@@ -38,7 +38,7 @@ jobs:
3838

3939
- name: Pre-populate nix-shell
4040
run: |
41-
export NIX_PATH=nixpkgs=$(jq '.nixpkgs.url' nix/sources.json -r)
41+
export NIX_PATH=nixpkgs=$(jq '.nixpkgs.url' spdk-rs/nix/sources.json -r)
4242
echo "NIX_PATH=$NIX_PATH" >> $GITHUB_ENV
4343
nix-shell --run "echo" shell.nix
4444
@@ -62,6 +62,8 @@ jobs:
6262
sudo modprobe $module
6363
done
6464
sudo apt-get install gdb
65+
# Check if nvme config is valid, otherwise apply it
66+
sudo ./scripts/nvme-conf.sh --check --apply --overwrite
6567
6668
- name: Run Rust Tests
6769
run: |

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

doc/test.md

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,29 @@ RUST_LOG=TRACE cargo test --features=io-engine-testing -- --test-threads 1 --noc
3838
> _**NOTE**_:
3939
> The flag --features=io-engine-testing ensures you run tests with features enabled only for testing purposes
4040
41+
Recent linux kernel versions no longer allow for mixing of nvme hostid's with different nvme hostnqn's, so you should
42+
ensure your system is configured with default values.
43+
Typically, this happens as long as you install nvme-cli on your host, but you can also ensure this by using our script:
44+
45+
```bash
46+
sudo ./scripts/nvme-conf.sh --check --apply
47+
NVME SYSCONFDIR : /etc/nvme
48+
| exists : true
49+
|= hostid : /etc/nvme/hostid
50+
| exists : true
51+
| content : 7b562a4c-61e8-ef10-a994-047c16834e32
52+
|= hostnqn : /etc/nvme/hostnqn
53+
| exists : true
54+
| content : nqn.2014-08.org.nvmexpress:uuid:7b562a4c-61e8-ef10-a994-047c16834e32
55+
56+
57+
INFO: /etc/nvme is valid
58+
```
59+
4160
## Testing your own SPDK version
4261

43-
To test your custom SPDK version please refer to the [spdk-rs documentation](https://github.com/openebs/spdk-rs/blob/develop/README.md#custom-spdk)
62+
To test your custom SPDK version please refer to
63+
the [spdk-rs documentation](https://github.com/openebs/spdk-rs/blob/develop/README.md#custom-spdk)
4464

4565
## Using PCIe NVMe devices in cargo tests while developing
4666

io-engine-tests/src/nvme.rs

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,27 @@ pub fn nvme_connect(
8888
transport: &str,
8989
must_succeed: bool,
9090
) -> ExitStatus {
91-
let status = Command::new("nvme")
92-
.args(["connect"])
91+
let mut comand = Command::new("nvme");
92+
let mut comand = comand.args(["connect"]);
93+
94+
if !std::path::Path::new("/etc/nvme/hostid").exists()
95+
|| !std::path::Path::new("/etc/nvme/hostnqn").exists()
96+
{
97+
match (
98+
std::env::var("NVME_HOSTID").ok(),
99+
std::env::var("NVME_HOSTNQN").ok(),
100+
) {
101+
(Some(hid), Some(hnqn)) => {
102+
tracing::warn!("/etc/nvme is not present, using {hid} and {hnqn}");
103+
comand = comand.args(["-I", &hid]).args(["-q", &hnqn])
104+
}
105+
_ => {
106+
panic!("/etc/nvme is not present and no env var NVME_HOSTID available")
107+
}
108+
}
109+
}
110+
111+
let status = comand
93112
.args(["-t", transport])
94113
.args(["-a", target_addr])
95114
.args(["-s", "8420"])

io-engine/src/bdev/nexus/nexus_persistence.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ impl<'n> Nexus<'n> {
211211
None => self.uuid().to_string(),
212212
};
213213

214+
// these are actually tries, not retries!
214215
let mut retry = PersistentStore::retries();
215216
let mut logged = false;
216217
loop {

io-engine/tests/nexus_child_retire.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ async fn nexus_child_retire_persist_unresponsive_with_fio() {
273273
cluster.test.thaw("etcd").await.unwrap();
274274

275275
assert!(
276-
tokio::time::timeout(Duration::from_secs(1), &mut r1)
276+
tokio::time::timeout(Duration::from_secs(2), &mut r1)
277277
.await
278278
.is_ok(),
279279
"I/O to nexus must proceed when ETCD is thawed"
@@ -334,15 +334,16 @@ const NEXUS_NAME: &str = "nexus_0";
334334
const NEXUS_UUID: &str = "cdc2a7db-3ac3-403a-af80-7fadc1581c47";
335335

336336
#[tokio::test]
337+
#[ignore]
337338
/// Test ETCD misbehaviour during a child retirement: a nexus must not ack I/Os
338339
/// to a client if a persistent store cannot be updated while a child is being
339340
/// retired.
340341
///
341342
/// [1] Create etcd, pools, replicas, and nexus.
342-
/// [2] Inject an fault to a replica.
343+
/// [2] Inject a fault to a replica.
343344
/// [3] Pause ETCD container.
344345
/// [4] Write to the nexus. A replica fails due to injected fault, and I/O on
345-
/// nexus must stuck.
346+
/// nexus must remain stuck.
346347
/// [5] Thaw ETCD container.
347348
/// [6] I/Os must now be acknowledged to the client.
348349
async fn nexus_child_retire_persist_unresponsive_with_bdev_io() {
@@ -399,7 +400,7 @@ async fn nexus_child_retire_persist_unresponsive_with_bdev_io() {
399400
"I/O to nexus must proceed when ETCD is thawed"
400401
);
401402

402-
// Check that 1st child is fauled, and 2nd is open.
403+
// Check that 1st child is faulted, and 2nd is open.
403404
assert!(matches!(
404405
nex.child_at(0).state(),
405406
ChildState::Faulted(FaultReason::IoError)

libnvme-rs/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,7 @@ version = "1.0"
2828
[dependencies.udev]
2929
features = ["hwdb", "mio"]
3030
version = "^0.9.1"
31+
32+
[dependencies.uuid]
33+
features = ["v4"]
34+
version = "1.11.0"

libnvme-rs/src/nvme_uri.rs

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
use std::{convert::TryFrom, io, os::raw::c_char, time::Duration};
2-
32
use url::{ParseError, Url};
43

54
use mio::{Events, Interest, Poll, Token};
@@ -127,15 +126,28 @@ impl NvmeTarget {
127126
/// Returns Ok on successful connect
128127
pub fn connect(&self) -> Result<(), NvmeError> {
129128
let r = NvmeRoot::new(unsafe { crate::nvme_scan(std::ptr::null()) });
129+
130+
// Note, if host id and hostnqn are not present in the system, then hostid is generated at
131+
// the kernel but becomes sticky and there seems no way of querying it.
130132
let hostid = NvmeStringWrapper::new(unsafe { crate::nvmf_hostid_from_file() });
133+
let hostid_gen = std::ffi::CString::new(uuid::Uuid::new_v4().to_string()).unwrap();
131134

132135
let hostnqn = match self.hostnqn_autogen {
133136
true => NvmeStringWrapper::new(unsafe { crate::nvmf_hostnqn_generate() }),
134137
false => NvmeStringWrapper::new(unsafe { crate::nvmf_hostnqn_from_file() }),
135138
};
136139

137-
let h =
138-
unsafe { crate::nvme_lookup_host(r.as_mut_ptr(), hostnqn.as_ptr(), hostid.as_ptr()) };
140+
let h = unsafe {
141+
crate::nvme_lookup_host(
142+
r.as_mut_ptr(),
143+
hostnqn.as_ptr(),
144+
if self.hostnqn_autogen {
145+
hostid_gen.as_ptr()
146+
} else {
147+
hostid.as_ptr()
148+
},
149+
)
150+
};
139151
if h.is_null() {
140152
return Err(NvmeError::LookupHostError { rc: -libc::ENOMEM });
141153
}

scripts/cargo-test.sh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ cleanup_handler
1717
trap cleanup_handler INT QUIT TERM HUP EXIT
1818

1919
export PATH=$PATH:${HOME}/.cargo/bin
20-
set -euxo pipefail
20+
set -euo pipefail
2121

2222
# Warn if rdma-rxe and nvme-rdme kernel modules are not
2323
# available. Absence of rdma-rxe can be ignored on hardware
@@ -30,6 +30,11 @@ if ! lsmod | grep -q nvme_rdma; then
3030
echo "Warning: nvme_rdma kernel module is not loaded. Please load it for rdma tests to work."
3131
fi
3232

33+
if ! "$SCRIPTDIR"/nvme-conf.sh --check; then
34+
echo "Warning: nvme configuration may not be valid, this can cause some test issues"
35+
exit 1
36+
fi
37+
3338
( cd jsonrpc && cargo test )
3439
# test dependencies
3540
cargo build --bins --features=io-engine-testing

0 commit comments

Comments
 (0)