From a5521dec7b37aa05051ece39964aba0f9afbcf0b Mon Sep 17 00:00:00 2001 From: Yurii Koba Date: Fri, 15 Aug 2025 13:15:51 +0300 Subject: [PATCH 1/2] add doc about migration via sxlx --- configuration/src/configs/database.rs | 5 - database/README.md | 12 + database/src/postgres/mod.rs | 33 +-- database/src/postgres/tx_indexer.rs | 16 +- docs/DATABASE_MIGRATIONS.md | 388 ++++++++++++++++++++++++++ scripts/apply_all_migrations.sh | 144 ++++++++++ 6 files changed, 549 insertions(+), 49 deletions(-) create mode 100644 docs/DATABASE_MIGRATIONS.md create mode 100644 scripts/apply_all_migrations.sh diff --git a/configuration/src/configs/database.rs b/configuration/src/configs/database.rs index 815bc76d..5034484f 100644 --- a/configuration/src/configs/database.rs +++ b/configuration/src/configs/database.rs @@ -22,9 +22,6 @@ pub struct DatabaseConfig { pub shards_config: std::collections::HashMap, pub max_connections: u32, - // Migrations cannot be applied to read-only replicas - // We should run rpc-server only on read-only replicas - pub read_only: bool, pub shard_layout: Option, } @@ -34,7 +31,6 @@ impl DatabaseConfig { database_url: self.database_url.clone(), shards_config: self.shards_config.clone(), max_connections: self.max_connections, - read_only: true, shard_layout: self.shard_layout.clone(), } } @@ -77,7 +73,6 @@ impl From for DatabaseConfig { max_connections: database_config .max_connections .unwrap_or_else(CommonDatabaseConfig::default_max_connections), - read_only: false, shard_layout: crate::shard_layout().ok(), } } diff --git a/database/README.md b/database/README.md index c06a0834..f4a6ac08 100644 --- a/database/README.md +++ b/database/README.md @@ -3,3 +3,15 @@ This is a helper crate that provides db manager. ## [Postgres DB](src/postgres/README.md) + +## [Database Migrations with SQLx](../docs/DATABASE_MIGRATIONS.md) + +Comprehensive guide for managing database migrations using SQLx CLI. This covers: +- Setting up and applying migrations for all database types (meta, shard, transaction details) +- Creating new migrations +- Migration best practices and troubleshooting +- Automated migration scripts + +## [Legacy Database Migration Scripts](database_migrations/README.md) + +Scripts for migrating data from existing databases (only needed for upgrading existing deployments). diff --git a/database/src/postgres/mod.rs b/database/src/postgres/mod.rs index 4c389255..059d720c 100644 --- a/database/src/postgres/mod.rs +++ b/database/src/postgres/mod.rs @@ -2,11 +2,6 @@ mod rpc_server; mod state_indexer; mod tx_indexer; -static META_DB_MIGRATOR: sqlx::migrate::Migrator = - sqlx::migrate!("src/postgres/migrations/meta_db"); -static SHARD_DB_MIGRATOR: sqlx::migrate::Migrator = - sqlx::migrate!("src/postgres/migrations/shard_db"); - #[derive(borsh::BorshSerialize, borsh::BorshDeserialize, Clone, Debug)] struct PageState { pub last_data_key: Option, @@ -45,31 +40,23 @@ pub struct PostgresDBManager { impl PostgresDBManager { async fn create_meta_db_pool( database_url: &str, - read_only: bool, max_connections: u32, ) -> anyhow::Result> { let pool = sqlx::postgres::PgPoolOptions::new() .max_connections(max_connections) .connect(database_url) .await?; - if !read_only { - Self::run_migrations(&META_DB_MIGRATOR, &pool).await?; - } Ok(pool) } async fn create_shard_db_pool( database_url: &str, - read_only: bool, max_connections: u32, ) -> anyhow::Result> { let pool = sqlx::postgres::PgPoolOptions::new() .max_connections(max_connections) .connect(database_url) .await?; - if !read_only { - Self::run_migrations(&SHARD_DB_MIGRATOR, &pool).await?; - } Ok(pool) } @@ -99,25 +86,13 @@ impl PostgresDBManager { ))?, }) } - - async fn run_migrations( - migrator: &sqlx::migrate::Migrator, - pool: &sqlx::Pool, - ) -> anyhow::Result<()> { - migrator.run(pool).await?; - Ok(()) - } } #[async_trait::async_trait] impl crate::BaseDbManager for PostgresDBManager { async fn new(config: &configuration::DatabaseConfig) -> anyhow::Result> { - let meta_db_pool = Self::create_meta_db_pool( - &config.database_url, - config.read_only, - config.max_connections, - ) - .await?; + let meta_db_pool = + Self::create_meta_db_pool(&config.database_url, config.max_connections).await?; let mut shards_pool = std::collections::HashMap::new(); let shard_layout = config .shard_layout @@ -128,9 +103,7 @@ impl crate::BaseDbManager for PostgresDBManager { .shards_config .get(&shard_id) .unwrap_or_else(|| panic!("Shard_{shard_id} - database config not found")); - let pool = - Self::create_shard_db_pool(database_url, config.read_only, config.max_connections) - .await?; + let pool = Self::create_shard_db_pool(database_url, config.max_connections).await?; shards_pool.insert(shard_id, pool); } Ok(Box::new(Self { diff --git a/database/src/postgres/tx_indexer.rs b/database/src/postgres/tx_indexer.rs index a783039b..2597a04b 100644 --- a/database/src/postgres/tx_indexer.rs +++ b/database/src/postgres/tx_indexer.rs @@ -4,23 +4,11 @@ use bigdecimal::num_traits::ToPrimitive; use bigdecimal::BigDecimal; use sqlx::QueryBuilder; -static META_RECEIPTS_AND_OUTCOMES_MIGRATOR: sqlx::migrate::Migrator = - sqlx::migrate!("src/postgres/migrations/tx_details/receipts_and_outcomes"); -static SHARDS_TRANSACTIONS_MIGRATOR: sqlx::migrate::Migrator = - sqlx::migrate!("src/postgres/migrations/tx_details/transactions"); - #[async_trait] impl crate::base::tx_indexer::TxIndexerDbManager for crate::postgres::PostgresDBManager { async fn create_tx_tables(&self) -> Result<()> { - // Transactions table and partitions on each shard - for pool in self.shards_pool.values() { - SHARDS_TRANSACTIONS_MIGRATOR.run(pool).await?; - } - - // Receipts and outcomes tables and partitions in meta_db_pool only - META_RECEIPTS_AND_OUTCOMES_MIGRATOR - .run(&self.meta_db_pool) - .await?; + // For POstgres please read the `README.md` in the `database/src/postgres` directory. + // The tables are created by the migrations, so this method is not needed. Ok(()) } diff --git a/docs/DATABASE_MIGRATIONS.md b/docs/DATABASE_MIGRATIONS.md new file mode 100644 index 00000000..24858ea1 --- /dev/null +++ b/docs/DATABASE_MIGRATIONS.md @@ -0,0 +1,388 @@ +# Database Migrations with SQLx + +This document provides a comprehensive guide for managing database migrations in the NEAR Read RPC project using SQLx. + +## Overview + +The NEAR Read RPC project uses PostgreSQL databases with SQLx for migration management. The project has three distinct database types: + +1. **Meta Database** - Stores blocks, chunks, validator metadata, and receipts/outcomes +2. **Shard Databases** - Stores state changes, partition-specific data, and transactions +3. **Transaction Details** - Split between meta database (receipts/outcomes) and shard databases (transactions) + +## Database Structure + +``` +database/src/postgres/migrations/ +├── meta_db/ # Meta database migrations (blocks, chunks, validators, receipts/outcomes) +├── shard_db/ # Shard database migrations (state changes, transactions) +└── tx_details/ # Transaction details database migrations + ├── receipts_and_outcomes/ # Applied to meta database + └── transactions/ # Applied to shard databases +``` + +## Prerequisites + +### Install SQLx CLI + +```bash +cargo install sqlx-cli --no-default-features --features native-tls,postgres +``` + +### Environment Variables + +Set up the required database connection URLs: + +```bash +# Meta database +export META_DATABASE_URL="postgresql://username:password@localhost:5432/meta_db" + +# Shard databases (one per shard) +export SHARD_0_DATABASE_URL="postgresql://username:password@localhost:5432/shard_0_db" +export SHARD_1_DATABASE_URL="postgresql://username:password@localhost:5432/shard_1_db" +# ... additional shards as needed +``` + +## Migration Management + +### Creating New Migrations + +#### 1. Meta Database Migrations + +```bash +# Navigate to meta database migrations directory +cd database/src/postgres/migrations/meta_db + +# Create a new migration +sqlx migrate add -r +``` + +Example: +```bash +sqlx migrate add -r add_new_index_to_blocks +``` + +#### 2. Shard Database Migrations + +```bash +# Navigate to shard database migrations directory +cd database/src/postgres/migrations/shard_db + +# Create a new migration +sqlx migrate add -r +``` + +Example: +```bash +sqlx migrate add -r optimize_state_changes_table +``` + +#### 3. Transaction Details Migrations + +Transaction details are split between different databases: + +**For Receipts and Outcomes (Meta Database):** +```bash +# Navigate to receipts and outcomes migrations directory +cd database/src/postgres/migrations/tx_details/receipts_and_outcomes + +# Create a new migration for receipts and outcomes +sqlx migrate add -r +``` + +**For Transactions (Shard Databases):** +```bash +# Navigate to transactions migrations directory +cd database/src/postgres/migrations/tx_details/transactions + +# Create a new migration for transactions +sqlx migrate add -r +``` + +### Applying Migrations + +#### 1. Meta Database + +```bash +# Set the database URL +export DATABASE_URL=$META_DATABASE_URL + +# Run migrations +cd database/src/postgres/migrations/meta_db +sqlx migrate run +``` + +#### 2. Shard Databases + +Apply migrations to each shard database: + +```bash +# For each shard +export DATABASE_URL=$SHARD_0_DATABASE_URL +cd database/src/postgres/migrations/shard_db +sqlx migrate run + +export DATABASE_URL=$SHARD_1_DATABASE_URL +sqlx migrate run + +# Repeat for all shards... +``` + +#### 3. Transaction Details Migrations + +Transaction details migrations are applied to different databases: + +```bash +# Receipts and outcomes migrations (applied to META database) +export DATABASE_URL=$META_DATABASE_URL +cd database/src/postgres/migrations/tx_details/receipts_and_outcomes +sqlx migrate run + +# Transactions migrations (applied to SHARD databases) +export DATABASE_URL=$SHARD_0_DATABASE_URL +cd ../transactions +sqlx migrate run + +export DATABASE_URL=$SHARD_1_DATABASE_URL +sqlx migrate run + +# Repeat for all shards... +``` + +### Migration Scripts for Automation + +Create convenience scripts to manage all databases at once: + +#### Apply All Migrations Script + +```bash +#!/bin/bash +# save as: scripts/apply_all_migrations.sh + +set -e + +echo "Applying migrations to Meta Database..." +export DATABASE_URL=$META_DATABASE_URL +cd database/src/postgres/migrations/meta_db +sqlx migrate run + +# Apply receipts and outcomes migrations to Meta Database +echo "Applying receipts and outcomes migrations to Meta Database..." +cd ../tx_details/receipts_and_outcomes +sqlx migrate run + +echo "Applying migrations to Shard Databases..." +for shard_url in $SHARD_0_DATABASE_URL $SHARD_1_DATABASE_URL $SHARD_2_DATABASE_URL $SHARD_3_DATABASE_URL $SHARD_4_DATABASE_URL $SHARD_5_DATABASE_URL; do + if [ ! -z "$shard_url" ]; then + echo "Migrating shard: $shard_url" + export DATABASE_URL=$shard_url + + # Apply shard database migrations + cd ../../shard_db + sqlx migrate run + + # Apply transactions migrations to each shard + cd ../tx_details/transactions + sqlx migrate run + fi +done + +echo "All migrations applied successfully!" +``` + +### Checking Migration Status + +#### View Applied Migrations + +```bash +# Set appropriate DATABASE_URL for the target database +export DATABASE_URL=$META_DATABASE_URL + +# Navigate to migrations directory +cd database/src/postgres/migrations/meta_db + +# Check migration status +sqlx migrate info +``` + +#### View Migration History + +```bash +# Show detailed migration history +sqlx migrate info --verbose +``` + +### Rolling Back Migrations + +#### Revert Last Migration + +```bash +# Set appropriate DATABASE_URL +export DATABASE_URL=$META_DATABASE_URL + +# Navigate to migrations directory +cd database/src/postgres/migrations/meta_db + +# Revert the last migration +sqlx migrate revert +``` + +#### Revert to Specific Version + +```bash +# Revert to a specific migration version +sqlx migrate revert --target-version +``` + +## Database Setup from Scratch + +### 1. Create Databases + +```sql +-- Connect to PostgreSQL as superuser +CREATE DATABASE meta_db; +CREATE DATABASE shard_0_db; +CREATE DATABASE shard_1_db; +-- ... create additional shard databases as needed +``` + +### 2. Apply All Migrations + +```bash +# Use the apply_all_migrations.sh script +chmod +x scripts/apply_all_migrations.sh +./scripts/apply_all_migrations.sh +``` + +## Migration Best Practices + +### 1. Migration File Naming + +SQLx uses timestamp-based naming: +``` +YYYYMMDDHHMMSS_migration_name.up.sql +YYYYMMDDHHMMSS_migration_name.down.sql +``` + +### 2. Writing Safe Migrations + +- Always test migrations on a copy of production data +- Use `IF EXISTS` and `IF NOT EXISTS` clauses where appropriate +- Make migrations atomic and reversible +- Include proper indexes for performance + +Example migration: +```sql +-- up.sql +CREATE TABLE IF NOT EXISTS new_table ( + id BIGSERIAL PRIMARY KEY, + data JSONB NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_new_table_created_at ON new_table(created_at); + +-- down.sql +DROP INDEX IF EXISTS idx_new_table_created_at; +DROP TABLE IF EXISTS new_table; +``` + +### 3. Large Table Migrations + +For large tables, consider: +- Creating new tables and migrating data in batches +- Using `CONCURRENTLY` for index creation +- Planning for downtime if necessary + +### 4. Data Migrations + +When migrating data, use transactions and include rollback logic: + +```sql +-- up.sql +BEGIN; + +-- Migration logic here +UPDATE existing_table SET new_column = 'default_value' WHERE new_column IS NULL; + +COMMIT; +``` + +## Troubleshooting + +### Common Issues + +1. **Migration fails midway** + ```bash + # Check current state + sqlx migrate info + + # Fix the issue and retry + sqlx migrate run + ``` + +2. **Database connection issues** + ```bash + # Test connection + psql $DATABASE_URL -c "SELECT version();" + ``` + +3. **Migration version conflicts** + ```bash + # Reset migrations (DANGEROUS - only for development) + sqlx migrate reset + ``` + +### Recovery Procedures + +1. **Partial migration failure** + - Review the error logs + - Manually fix any partial changes + - Re-run the migration + +2. **Rollback when down migration fails** + - Manually revert changes using SQL + - Update the `_sqlx_migrations` table if necessary + +## Integration with Application + +The project's database module automatically handles connections. Ensure migrations are applied before starting the application: + +```bash +# In your deployment script +./scripts/apply_all_migrations.sh + +# Then start the application +cargo run --bin rpc-server +``` + +## Development Workflow + +1. Create feature branch +2. Add necessary migrations using `sqlx migrate add` +3. Test migrations on development database +4. Commit migration files with your changes +5. Include migration instructions in PR description +6. Apply migrations to staging/production after deployment + +## Environment-Specific Considerations + +### Development +- Use local PostgreSQL instance +- Apply migrations manually or via script + +### Production +- Always backup databases before migration +- Test migrations on staging first +- Plan for minimal downtime +- Have rollback plan ready + +### Docker Compose +The project includes Docker Compose setup. Migrations should be applied after containers are up: + +```bash +docker-compose up -d postgres +# Wait for PostgreSQL to be ready +./scripts/apply_all_migrations.sh +docker-compose up +``` diff --git a/scripts/apply_all_migrations.sh b/scripts/apply_all_migrations.sh new file mode 100644 index 00000000..ebeeb843 --- /dev/null +++ b/scripts/apply_all_migrations.sh @@ -0,0 +1,144 @@ +#!/bin/bash +# Apply all database migrations for NEAR Read RPC project +# +# This script applies migrations to all database types: +# - Meta database +# - Shard databases +# - Transaction details databases +# +# Prerequisites: +# - SQLx CLI installed: cargo install sqlx-cli --no-default-features --features native-tls,postgres +# - Environment variables set for database URLs +# +# Usage: ./scripts/apply_all_migrations.sh + +set -e + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Function to print colored output +print_status() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +print_warning() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +print_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Check if SQLx CLI is installed +if ! command -v sqlx &> /dev/null; then + print_error "SQLx CLI is not installed. Please install it with:" + echo "cargo install sqlx-cli --no-default-features --features native-tls,postgres" + exit 1 +fi + +# Get the project root directory +PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +MIGRATIONS_DIR="$PROJECT_ROOT/database/src/postgres/migrations" + +# Check if migrations directory exists +if [ ! -d "$MIGRATIONS_DIR" ]; then + print_error "Migrations directory not found: $MIGRATIONS_DIR" + exit 1 +fi + +print_status "Starting database migrations for NEAR Read RPC..." +print_status "Project root: $PROJECT_ROOT" + +# Function to run migrations for a specific database and path +run_migrations() { + local db_url="$1" + local migration_path="$2" + local db_name="$3" + + if [ -z "$db_url" ]; then + print_warning "Skipping $db_name - database URL not set" + return 0 + fi + + print_status "Applying migrations to $db_name..." + print_status "Database URL: $db_url" + print_status "Migration path: $migration_path" + + export DATABASE_URL="$db_url" + + if [ ! -d "$migration_path" ]; then + print_warning "Migration directory not found: $migration_path" + return 0 + fi + + # Check if there are any migration files + if [ -z "$(find "$migration_path" -name "*.sql" -type f)" ]; then + print_warning "No migration files found in $migration_path" + return 0 + fi + + # Change to migration directory and run migrations + cd "$migration_path" + + # Test database connection first + if ! sqlx database create 2>/dev/null; then + print_status "Database already exists or connection successful" + fi + + # Run migrations + if sqlx migrate run; then + print_status "✓ Successfully applied migrations to $db_name" + else + print_error "✗ Failed to apply migrations to $db_name" + return 1 + fi + + echo "" +} + +# 1. Apply Meta Database migrations +print_status "=== Meta Database Migrations ===" +run_migrations "$META_DATABASE_URL" "$MIGRATIONS_DIR/meta_db" "Meta Database" + +# Apply receipts and outcomes migrations to Meta Database +if [ ! -z "$META_DATABASE_URL" ]; then + run_migrations "$META_DATABASE_URL" "$MIGRATIONS_DIR/tx_details/receipts_and_outcomes" "Meta Database - Receipts and Outcomes" +fi + +# 2. Apply Shard Database migrations +print_status "=== Shard Database Migrations ===" + +# List of shard database environment variables +SHARD_DBS=( + "SHARD_0_DATABASE_URL" + "SHARD_1_DATABASE_URL" + "SHARD_2_DATABASE_URL" + "SHARD_3_DATABASE_URL" + "SHARD_4_DATABASE_URL" + "SHARD_5_DATABASE_URL" +) + +for shard_var in "${SHARD_DBS[@]}"; do + shard_url="${!shard_var}" + if [ ! -z "$shard_url" ]; then + run_migrations "$shard_url" "$MIGRATIONS_DIR/shard_db" "Shard Database ($shard_var)" + # Apply transactions migrations to each shard database + run_migrations "$shard_url" "$MIGRATIONS_DIR/tx_details/transactions" "Shard Database ($shard_var) - Transactions" + fi +done + +# 3. Summary +print_status "=== Migration Summary ===" +print_status "All database migrations completed successfully!" +print_status "" +print_status "Next steps:" +print_status "1. Verify migrations with: sqlx migrate info" +print_status "2. Start your application services" +print_status "3. Check application logs for any issues" + +# Return to original directory +cd "$PROJECT_ROOT" From 2afdc1cb86b17cf24894b6491cd7754fe6a512c1 Mon Sep 17 00:00:00 2001 From: Yurii Koba Date: Fri, 15 Aug 2025 13:58:01 +0300 Subject: [PATCH 2/2] update readme --- README.md | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/README.md b/README.md index d2729432..9e2fb160 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,46 @@ The indexer built on top of Lake Framework that watches the network and stores t The configuration module is responsible for managing the configuration settings of the NEAR ReadRPC project. +### [database](database/README.md) + +The database module provides database abstractions and implementations for storing and retrieving data. + +### [cache-storage](cache-storage/README.md) + +The cache storage module provides caching functionality for improved performance. + +### [tx-details-storage](tx-details-storage/README.md) + +The transaction details storage module handles storage of detailed transaction information. + +### [logic-state-indexer](logic-state-indexer/README.md) + +The logic state indexer module provides state indexing functionality. + +### [readnode-primitives](readnode-primitives/README.md) + +The readnode primitives module contains common data structures and utilities. + +### [perf-testing](perf-testing/README.md) + +The performance testing module provides tools for testing and benchmarking. + +## Documentation + +### Project Documentation +- [CHANGELOG.md](CHANGELOG.md) - Project changelog and version history +- [Examples](examples/README.md) - Usage examples and sample configurations + +### Technical Documentation +- [RPC Methods](docs/RPC_METHODS.md) - Available RPC methods and their specifications +- [Custom RPC Methods](docs/CUSTOM_RPC_METHODS.md) - Custom RPC methods specific to Read RPC +- [Database Migrations](docs/DATABASE_MIGRATIONS.md) - Database migration procedures and guidelines +- [Tracing](docs/TRACING.md) - Distributed tracing setup and configuration + +### Database Documentation +- [PostgreSQL Setup](database/src/postgres/README.md) - PostgreSQL-specific configuration and setup +- [Database Migrations](docs/DATABASE_MIGRATIONS.md) - Database migration procedures and guidelines + ## Docker compose **Note!** The docker compose is not fully ready yet. It's still in progress. However, you can run the entire project to play around with it. It is still not convenient for development or debugging purposes. We are working on improving it.