Skip to content
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
e972c31
feat(ruby): replace Docker cluster with cluster_manager.py
alexr-bq Jun 3, 2026
450df43
fix(ci): build Valkey from source instead of using packages.valkey.io
alexr-bq Jun 3, 2026
dbe3d86
fix: resolve RuboCop offenses in test cluster infrastructure
alexr-bq Jun 3, 2026
3739a4f
refactor: rename test:valkey to test:standalone, run both by default
alexr-bq Jun 3, 2026
bb4eef3
refactor(ruby): unify test structure with shared ValkeyTests modules
alexr-bq Jun 4, 2026
d6e6dbd
fix(ruby): separate test classes for modules with custom setup methods
alexr-bq Jun 4, 2026
d9d3261
fix(ruby): add cluster-mode skips for cross-slot operations
alexr-bq Jun 4, 2026
c922ce6
chore: remove spec files and notes from git tracking
alexr-bq Jun 4, 2026
a2ac515
refactor(ruby): restore _test suffix to test/valkey module files
alexr-bq Jun 4, 2026
cd3433e
chore(ruby): exclude test/valkey and test/lint from lost_tests check
alexr-bq Jun 4, 2026
23a53b9
fix(ruby): add cluster-mode skips for MULTI/EXEC and cross-slot tests
alexr-bq Jun 4, 2026
12c61ec
fix(ruby): add flush delay in OpenTelemetry test setup
alexr-bq Jun 4, 2026
38a4471
fix(ruby): add cluster-mode skips for CrossSlot and destructive tests
alexr-bq Jun 4, 2026
ba844ca
style(ruby): fix rubocop offenses in test files
alexr-bq Jun 4, 2026
477f8f9
fix(ruby): use _new_client helper in test_client_kill tests
alexr-bq Jun 4, 2026
fa6d493
fix(ruby): skip CLIENT KILL tests in cluster mode
alexr-bq Jun 4, 2026
dd4c28e
Address PR feedback
alexr-bq Jun 9, 2026
42a48f2
chore(ruby): address PR review feedback
alexr-bq Jun 10, 2026
f814874
Merge branch 'main' into cluster-test-infra
alexr-bq Jun 10, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 24 additions & 69 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ jobs:
bundler-cache: true

- name: Run standalone tests
run: bundle exec rake test:valkey
run: bundle exec rake test:standalone
Comment thread
Aryex marked this conversation as resolved.

- name: Stop Valkey containers
if: always()
Expand All @@ -245,27 +245,27 @@ jobs:
- 3.1
- 3.0

services:
valkey-cluster:
image: grokzen/redis-cluster:7.0.15
ports:
- 7000:7000
- 7001:7001
- 7002:7002
- 7003:7003
- 7004:7004
- 7005:7005
options: >-
--health-cmd "redis-cli -p 7000 ping && redis-cli -p 7001 ping && redis-cli -p 7002 ping"
--health-interval 15s
--health-timeout 10s
--health-retries 15

steps:
- uses: actions/checkout@v4
with:
submodules: recursive

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'

- name: Build and install Valkey from source
run: |
echo "Building Valkey 8.0.0 from source..."
git clone https://github.com/valkey-io/valkey.git
cd valkey
git checkout 8.0.0
make BUILD_TLS=yes -j4
sudo make install
echo "Verifying Valkey installation..."
valkey-server --version

- name: Download native library
uses: actions/download-artifact@v4
with:
Expand All @@ -278,59 +278,14 @@ jobs:
ruby-version: ${{ matrix.ruby }}
bundler-cache: true

- name: Configure cluster nodes
run: |
echo "=== Disabling protected mode on all nodes ==="
for port in 7000 7001 7002 7003 7004 7005; do
echo "Configuring port $port..."
docker exec $(docker ps -q | head -n1) redis-cli -p $port config set protected-mode no || echo "Failed to configure port $port"
done

- name: Wait for cluster to be ready
run: |
echo "=== Waiting for Redis cluster to be fully ready ==="
max_attempts=60
attempt=1

while [ $attempt -le $max_attempts ]; do
echo "Attempt $attempt/$max_attempts: Checking cluster state..."

# Check if all nodes are responding
all_nodes_up=true
for port in 7000 7001 7002 7003 7004 7005; do
if ! docker exec $(docker ps -q | head -n1) redis-cli -p $port ping > /dev/null 2>&1; then
echo " Node $port is not responding"
all_nodes_up=false
break
fi
done

if [ "$all_nodes_up" = true ]; then
# Check cluster state
cluster_state=$(docker exec $(docker ps -q | head -n1) redis-cli -p 7000 cluster info | grep "cluster_state" | cut -d: -f2 | tr -d '\r')
echo " Cluster state: $cluster_state"

if [ "$cluster_state" = "ok" ]; then
echo "✅ Cluster is ready!"
break
fi
fi

echo " Cluster not ready yet, waiting 5 seconds..."
sleep 5
attempt=$((attempt + 1))
done

if [ $attempt -gt $max_attempts ]; then
echo "❌ Cluster failed to become ready within timeout"
echo "=== Final cluster info ==="
docker exec $(docker ps -q | head -n1) redis-cli -p 7000 cluster info || echo "Failed to get cluster info"
echo "=== Cluster nodes ==="
docker exec $(docker ps -q | head -n1) redis-cli -p 7000 cluster nodes || echo "Failed to get cluster nodes"
exit 1
fi

- name: Run cluster tests
run: bundle exec rake test:cluster
timeout-minutes: 25

- name: Upload cluster logs on failure
if: failure()
uses: actions/upload-artifact@v4
with:
name: cluster-logs-${{ matrix.ruby }}
path: valkey-glide/utils/clusters/
retention-days: 7
2 changes: 2 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,13 @@ Metrics/MethodLength:
- 'lib/valkey.rb'
- 'lib/valkey/opentelemetry.rb'
- 'lib/valkey/bindings.rb'
- 'lib/valkey/test_cluster.rb'
- 'test/**/*.rb'

Metrics/ClassLength:
Exclude:
- 'lib/valkey.rb'
- 'lib/valkey/test_cluster.rb'
- 'test/**/*.rb'

Metrics/BlockLength:
Expand Down
2 changes: 2 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,5 @@ gem "minitest", "~> 5.16"
gem "minitest-reporters", "~> 1.4"

gem "rubocop", "~> 1.21"

gem "rantly", "~> 2.0"
Comment thread
prateek-kumar-improving marked this conversation as resolved.
Outdated
11 changes: 8 additions & 3 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ task native: "native:build"
# =============================================================================

namespace :test do
groups = %i[valkey cluster]
groups = %i[standalone cluster]
Comment thread
Aryex marked this conversation as resolved.
groups.each do |group|
Rake::TestTask.new(group) do |t|
t.libs << "test"
Expand All @@ -117,10 +117,15 @@ namespace :test do
end
end

lost_tests = Dir["test/**/*_test.rb"] - groups.map { |g| Dir["test/#{g}/**/*_test.rb"] }.flatten
# Exclude module directories (valkey/, lint/) from lost_tests check
# These contain reusable test modules, not standalone test files
module_dirs = %w[valkey lint]
lost_tests = Dir["test/**/*_test.rb"] -
groups.map { |g| Dir["test/#{g}/**/*_test.rb"] }.flatten -
module_dirs.map { |d| Dir["test/#{d}/**/*_test.rb"] }.flatten
abort "The following test files are in no group:\n#{lost_tests.join("\n")}" unless lost_tests.empty?
end

task test: ["test:valkey"]
task test: ["test:standalone", "test:cluster"]

task default: :test
187 changes: 187 additions & 0 deletions lib/valkey/test_cluster.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
# frozen_string_literal: true

require "open3"

class Valkey
class TestCluster
class ScriptNotFoundError < StandardError; end

class PythonNotFoundError < StandardError; end

class ClusterStartError < StandardError; end

class OutputParseError < StandardError; end

attr_reader :addresses, :cluster_folder, :tls_cert_path, :tls_key_path, :tls_ca_cert_path

def initialize(
cluster_mode: false,
tls: false,
shard_count: 3,
replica_count: 1,
load_module: nil,
addresses: nil
)
@cluster_mode = cluster_mode
@tls = tls
@shard_count = shard_count
@replica_count = replica_count
@load_module = load_module
@cluster_folder = nil
@tls_cert_path = nil
@tls_key_path = nil
@tls_ca_cert_path = nil

if addresses
@addresses = addresses
else
start_cluster
ObjectSpace.define_finalizer(self, self.class.cleanup_proc(@cluster_folder, @tls))
end
end

def close
return unless @cluster_folder

ObjectSpace.undefine_finalizer(self)

cmd = ["python3", script_path]
cmd << "--tls" if @tls
cmd += ["stop", "--cluster-folder", @cluster_folder]

system(*cmd, out: File::NULL, err: File::NULL)

@cluster_folder = nil
@addresses = nil
end

def self.build_stop_command(cluster_folder, tls:)
root_dir = File.expand_path("../..", __dir__)
script = File.join(root_dir, "valkey-glide", "utils", "cluster_manager.py")
cmd = ["python3", script]
cmd << "--tls" if tls
cmd += ["stop", "--cluster-folder", cluster_folder]
cmd
end

def self.cleanup_proc(cluster_folder, tls)
proc do
return unless cluster_folder

root_dir = File.expand_path("../..", __dir__)
script = File.join(root_dir, "valkey-glide", "utils", "cluster_manager.py")
return unless File.exist?(script)

cmd = ["python3", script]
cmd << "--tls" if tls
cmd += ["stop", "--cluster-folder", cluster_folder]
system(*cmd, out: File::NULL, err: File::NULL)
end
end

def self.build_start_args(script_path:, cluster_mode:, tls:, shard_count:, replica_count:, load_module:)
[
"python3",
script_path,
*(tls ? ["--tls"] : []),
"start",
*(cluster_mode ? ["--cluster-mode"] : []),
"-n", shard_count.to_s,
"-r", replica_count.to_s,
*load_module&.flat_map { |m| ["--load-module", m] }
]
end

def self.parse_output(output)
cluster_folder = extract_output_value(output, "CLUSTER_FOLDER")
cluster_nodes = extract_output_value(output, "CLUSTER_NODES")

raise OutputParseError, "Missing CLUSTER_FOLDER in output" unless cluster_folder
raise OutputParseError, "Missing CLUSTER_NODES in output" unless cluster_nodes

addresses = parse_cluster_nodes(cluster_nodes)

{ cluster_folder: cluster_folder, addresses: addresses }
end

class << self
private

def extract_output_value(output, key)
match = output.match(/^#{key}=(.+)$/)
match&.[](1)&.strip
end

def parse_cluster_nodes(nodes_str)
nodes_str.split(",").map do |node|
parts = node.strip.rpartition(":")
host = parts[0]
port_str = parts[2]
raise OutputParseError, "Invalid node format: #{node}" if host.empty? || port_str.empty?

{ host: host, port: port_str.to_i }
end
end
end

private

def start_cluster
check_python_available
path = script_path

cmd = self.class.build_start_args(
script_path: path,
cluster_mode: @cluster_mode,
tls: @tls,
shard_count: @shard_count,
replica_count: @replica_count,
load_module: @load_module
)

stdout, stderr, status = Open3.capture3(*cmd)

raise ClusterStartError, "cluster_manager.py failed: #{stderr}" unless status.success?

result = self.class.parse_output(stdout)
@cluster_folder = result[:cluster_folder]
@addresses = result[:addresses]

return unless @tls

root_dir = File.expand_path("../..", __dir__)
@tls_cert_path = File.join(root_dir, "valkey-glide", "utils", "tls_crts", "server.crt")
@tls_key_path = File.join(root_dir, "valkey-glide", "utils", "tls_crts", "server.key")
@tls_ca_cert_path = File.join(root_dir, "valkey-glide", "utils", "tls_crts", "ca.crt")
end

def script_path
root_dir = File.expand_path("../..", __dir__)
submodule_dir = File.join(root_dir, "valkey-glide")
path = File.join(submodule_dir, "utils", "cluster_manager.py")

unless Dir.exist?(submodule_dir) && Dir.children(submodule_dir).any?
raise ScriptNotFoundError,
"valkey-glide submodule not initialized at #{submodule_dir}. " \
"Run: git submodule update --init --recursive"
end

unless File.exist?(path)
raise ScriptNotFoundError,
"cluster_manager.py not found at #{path}. " \
"Ensure the valkey-glide submodule is up to date: git submodule update --init --recursive"
end

path
end

def check_python_available
success = system("python3", "--version", out: File::NULL, err: File::NULL)
return if success

raise PythonNotFoundError,
"Python 3 is required but not found. " \
"Please install Python 3 and ensure 'python3' is available in your PATH."
end
end
end
Loading
Loading