diff --git a/.env.example b/.env.example new file mode 100644 index 000000000..7928024d6 --- /dev/null +++ b/.env.example @@ -0,0 +1,45 @@ +# Spyglass Docker Compose Configuration +# +# This file shows available configuration options with their defaults. +# Docker Compose works fine without a .env file - defaults work for most users. +# +# To customize (optional): +# 1. Copy this file: cp .env.example .env +# 2. Edit .env with your preferred settings +# 3. Run: docker compose up -d +# +# IMPORTANT: If you change MYSQL_PORT or MYSQL_ROOT_PASSWORD, you must also +# update your DataJoint configuration file (dj_local_conf.json) to match. +# See docs/DATABASE.md for details on DataJoint configuration. + +# ============================================================================= +# Database Configuration (Required) +# ============================================================================= + +# MySQL root password (default: tutorial) +# For local development, 'tutorial' is fine +# For production, use a strong password +MYSQL_ROOT_PASSWORD=tutorial + +# MySQL port (default: 3306) +# Change this if port 3306 is already in use +MYSQL_PORT=3306 + +# MySQL Docker image (default: datajoint/mysql:8.0) +# You can specify a different version if needed +MYSQL_IMAGE=datajoint/mysql:8.0 + +# ============================================================================= +# Optional Configuration +# ============================================================================= + +# Database name to create on startup (optional) +# Leave empty to skip database creation +MYSQL_DATABASE= + +# ============================================================================= +# Notes +# ============================================================================= +# - Don't commit .env file to git (it contains passwords) +# - Port range: 1024-65535 +# - If you change MYSQL_PORT, update your DataJoint config accordingly diff --git a/.gitignore b/.gitignore index d6984345b..6a04bcb21 100644 --- a/.gitignore +++ b/.gitignore @@ -115,6 +115,9 @@ ENV/ env.bak/ venv.bak/ +# Docker Compose +docker-compose.override.yml + # Spyder project settings .spyderproject .spyproject @@ -165,6 +168,7 @@ temp_nwb/*s *.pem dj_local_conf* !dj_local_conf_example.json +!/config_schema.json !/.vscode/extensions.json !/.vscode/settings.json diff --git a/QUICKSTART.md b/QUICKSTART.md new file mode 100644 index 000000000..8d57b09df --- /dev/null +++ b/QUICKSTART.md @@ -0,0 +1,140 @@ +# Spyglass Quickstart + +Get from zero to analyzing neural data with Spyglass in just a few commands. + +## Choose Your Path + +### ๐Ÿ‘ฅ Joining an Existing Lab? (Recommended) + +If you received database credentials from your lab admin, this is you! +The installer will: + +- Set up your development environment +- Connect you to your lab's existing database +- Prompt you to change your temporary password +- Configure all necessary directories + +**Time**: ~5 minutes | **Database**: Connect to lab's existing database + +### ๐Ÿงช Trying Spyglass Locally? + +Want to explore Spyglass features without joining a lab? +The installer can: + +- Set up a local trial database using Docker +- Create an isolated test environment +- Let you experiment with sample data + +**Time**: ~10 minutes | **Database**: Local Docker container +(requires [Docker Desktop](https://docs.docker.com/get-docker/)) + +--- + +## Prerequisites + +- **Python**: Version 3.9 or higher +- **Disk Space**: ~10GB for installation + data storage +- **Operating System**: macOS or Linux (Windows experimental) +- **Package Manager**: [conda](https://docs.conda.io/) (23.10.0+ recommended) or [mamba](https://mamba.readthedocs.io/) + +If you don't have mamba/conda, install [miniforge](https://github.com/conda-forge/miniforge#install) first. + +## Installation (2 steps) + +### Step 1: Run the installer + +```bash +# Clone the repository +git clone https://github.com/LorenFrankLab/spyglass.git +cd spyglass + +# Run interactive installer +python scripts/install.py +``` + +The installer will prompt you to choose: + +1. **Installation type**: Minimal (recommended) or Full +2. **Database setup**: + - **Remote** (recommended for lab members) - Connect to lab's existing database + - **Docker** - Local trial database for testing + - **Skip** - Configure manually later + +If joining a lab, you'll be prompted to change your password during installation. + +### Step 2: Validate installation + +```bash +# Activate the environment +conda activate spyglass + +# Run validation +python scripts/validate.py -v +``` + +**That's it!** Setup complete in ~5-10 minutes. + +## Next Steps + +### Run first tutorial +```bash +cd notebooks +jupyter notebook 01_Concepts.ipynb +``` + +### Configure for your data +- Place NWB files in `~/spyglass_data/raw/` +- See [Data Import Guide](https://lorenfranklab.github.io/spyglass/latest/notebooks/01_Insert_Data/) for details + +### Join community +- ๐Ÿ“– [Documentation](https://lorenfranklab.github.io/spyglass/) +- ๐Ÿ’ฌ [GitHub Discussions](https://github.com/LorenFrankLab/spyglass/discussions) +- ๐Ÿ› [Report Issues](https://github.com/LorenFrankLab/spyglass/issues) + +--- + +## Installation Options + +Need something different? The installer supports these options: + +```bash +python scripts/install.py --full # All optional dependencies +python scripts/install.py --pipeline=dlc # DeepLabCut pipeline +python scripts/install.py --no-database # Skip database setup +python scripts/install.py --help # See all options +``` + +## What Gets Installed + +The installer creates: + +- **Conda environment** with Spyglass and core dependencies +- **Database connection** (remote lab database OR local Docker container) +- **Data directories** in `~/spyglass_data/` +- **Jupyter environment** for running tutorials + +## Troubleshooting + +### Installation fails? +```bash +# Remove environment and retry +conda env remove -n spyglass +python scripts/install.py +``` + +### Validation fails? + +1. Check error messages for specific issues +2. If using Docker database, ensure Docker Desktop is running +3. If database connection fails, verify credentials with your lab admin +4. Try skipping database: `python scripts/install.py --no-database` + +### Need help? + +- Check [Advanced Setup Guide](https://lorenfranklab.github.io/spyglass/latest/notebooks/00_Setup/) for manual installation +- Ask questions in [GitHub Discussions](https://github.com/LorenFrankLab/spyglass/discussions) + +--- + +**Next tutorial**: [01_Concepts.ipynb](notebooks/01_Concepts.ipynb) +**Full documentation**: [lorenfranklab.github.io/spyglass](https://lorenfranklab.github.io/spyglass/) \ No newline at end of file diff --git a/README.md b/README.md index 79478c361..4bf187fb9 100644 --- a/README.md +++ b/README.md @@ -65,10 +65,67 @@ Documentation can be found at - ## Installation -For installation instructions see - -[https://lorenfranklab.github.io/spyglass/latest/notebooks/00_Setup/](https://lorenfranklab.github.io/spyglass/latest/notebooks/00_Setup/) +### Quick Start (Recommended) -Typical installation time is: 5-10 minutes +Get started with Spyglass in 5 minutes using our automated installer: + +```bash +# Clone the repository +git clone https://github.com/LorenFrankLab/spyglass.git +cd spyglass + +# Run automated installer +python scripts/install.py + +# Activate environment +conda activate spyglass +``` + +The installer will: +- โœ… Create conda environment with all dependencies +- โœ… Set up local MySQL database (Docker) or connect to remote +- โœ… Validate installation +- โœ… Provide clear next steps + +**Installation Options:** +```bash +# Minimal installation (recommended for new users) +python scripts/install.py --minimal + +# Full installation (all features) +python scripts/install.py --full + +# With Docker database +python scripts/install.py --docker + +# Connect to remote database +python scripts/install.py --remote + +# Non-interactive with environment variables +export SPYGLASS_BASE_DIR=/path/to/data +python scripts/install.py --minimal --docker + +# Non-interactive remote database setup +python scripts/install.py --remote \ + --db-host db.lab.edu \ + --db-user myuser \ + --db-password mysecret + +# Or use environment variable for password +export SPYGLASS_DB_PASSWORD=mysecret +python scripts/install.py --remote --db-host db.lab.edu --db-user myuser +``` + +**Troubleshooting:** +- See [docs/TROUBLESHOOTING.md](docs/TROUBLESHOOTING.md) for common issues +- Run `python scripts/validate.py` to check your installation +- For database help, see [docs/DATABASE.md](docs/DATABASE.md) + +### Manual Installation + +For manual installation and advanced configuration: +- [Setup Documentation](https://lorenfranklab.github.io/spyglass/latest/notebooks/00_Setup/) +- [Database Setup Guide](docs/DATABASE.md) ## Tutorials diff --git a/config_schema.json b/config_schema.json new file mode 100644 index 000000000..d4a3639c7 --- /dev/null +++ b/config_schema.json @@ -0,0 +1,46 @@ +{ + "_schema_version": "1.0.0", + "_comment": "Single source of truth for Spyglass directory structure", + "_critical": "This must match src/spyglass/settings.py SpyglassConfig.relative_dirs", + "_note": "If you modify this file, update settings.py to match (or vice versa)", + "_version_history": { + "1.0.0": "Initial DRY architecture - JSON schema replaces hard-coded directory structure" + }, + + "directory_schema": { + "spyglass": { + "raw": "raw", + "analysis": "analysis", + "recording": "recording", + "sorting": "spikesorting", + "waveforms": "waveforms", + "temp": "tmp", + "video": "video", + "export": "export" + }, + "kachery": { + "cloud": ".kachery-cloud", + "storage": "kachery_storage", + "temp": "tmp" + }, + "dlc": { + "project": "projects", + "video": "video", + "output": "output" + }, + "moseq": { + "project": "projects", + "video": "video" + } + }, + + "tls": { + "_description": "TLS (Transport Layer Security) encrypts database traffic", + "auto_enable_for_remote": true, + "localhost_addresses": [ + "localhost", + "127.0.0.1", + "::1" + ] + } +} diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 000000000..716684a46 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,68 @@ +# Spyglass Database Setup with Docker Compose +# +# Quick start (no setup needed - defaults work for most users): +# docker compose up -d +# +# This starts a MySQL database for Spyglass with: +# - Persistent data storage (survives container restart) +# - Health checks (ensures database is ready) +# - Standard configuration (matches manual Docker setup) +# +# Common tasks: +# Start: docker compose up -d +# Stop: docker compose stop +# Logs: docker compose logs mysql +# Restart: docker compose restart +# Remove: docker compose down -v # WARNING: Deletes all data! +# +# Customization (optional): +# Create .env file from .env.example to customize settings +# See .env.example for available configuration options +# +# Troubleshooting: +# Port 3306 in use: Create .env file and change MYSQL_PORT +# Services won't start: Run 'docker compose logs' to see errors +# Can't connect: Ensure Docker Desktop is running + +services: + mysql: + image: ${MYSQL_IMAGE:-datajoint/mysql:8.0} + + # Container name MUST be 'spyglass-db' to match existing code + container_name: spyglass-db + + ports: + - "${MYSQL_PORT:-3306}:3306" + + environment: + MYSQL_ROOT_PASSWORD: ${MYSQL_ROOT_PASSWORD:-tutorial} + # Optional: Create database on startup + MYSQL_DATABASE: ${MYSQL_DATABASE:-} + + volumes: + # Named volume for persistent storage + # Data survives 'docker compose down' but is removed by 'down -v' + - spyglass-db-data:/var/lib/mysql + + healthcheck: + # Check if MySQL is ready without exposing password in process list + test: ["CMD", "mysqladmin", "ping", "-h", "localhost", "--silent"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 30s + + restart: unless-stopped + + networks: + - spyglass-network + +volumes: + spyglass-db-data: + # Explicit name for predictability + name: spyglass-db-data + +networks: + spyglass-network: + name: spyglass-network + driver: bridge diff --git a/docs/DATABASE.md b/docs/DATABASE.md new file mode 100644 index 000000000..b172a3e73 --- /dev/null +++ b/docs/DATABASE.md @@ -0,0 +1,588 @@ +# Spyglass Database Setup Guide + +Spyglass requires a MySQL database backend for storing experimental data and analysis results. This guide covers all setup options from quick local development to production deployments. + +## Quick Start (Recommended) + +The easiest way to set up a database is using the installer with Docker Compose: + +```bash +cd spyglass +python scripts/install.py +# Choose option 1 (Docker Compose) when prompted +``` + +This automatically: +- Pulls the MySQL 8.0 Docker image +- Creates and starts a container named `spyglass-db` +- Waits for MySQL to be ready +- Creates configuration file with credentials + +**Or use Docker Compose directly:** +```bash +cd spyglass +docker compose up -d +``` + +## Setup Options + +### Option 1: Docker Compose (Recommended for Local Development) + +**Pros:** +- One-command setup (~2 minutes) +- Infrastructure as code (version controlled) +- Easy to customize via .env file +- Industry-standard tool +- Persistent data storage +- Health checks built-in + +**Cons:** +- Requires Docker Desktop with Compose plugin +- Uses system resources when running + +#### Prerequisites + +1. **Install Docker Desktop:** + - macOS: https://docs.docker.com/desktop/install/mac-install/ + - Windows: https://docs.docker.com/desktop/install/windows-install/ + - Linux: https://docs.docker.com/desktop/install/linux-install/ + +2. **Start Docker Desktop** and ensure it's running + +3. **Verify Compose is available:** + ```bash + docker compose version + # Should show: Docker Compose version v2.x.x + ``` + +#### Setup + +**Using installer (recommended):** +```bash +python scripts/install.py --docker # Will auto-detect and use Compose +``` + +**Using Docker Compose directly:** +```bash +# From spyglass repository root +docker compose up -d +``` + +The default configuration uses: +- Port: 3306 +- Password: tutorial +- Container name: spyglass-db +- Persistent storage: spyglass-db-data volume + +#### Customization (Optional) + +Create a `.env` file to customize settings: + +```bash +# Copy example +cp .env.example .env + +# Edit settings +nano .env +``` + +Available options: +```bash +# Change port if 3306 is in use +MYSQL_PORT=3307 + +# Change root password (for production) +MYSQL_ROOT_PASSWORD=your-secure-password + +# Use different MySQL version +MYSQL_IMAGE=datajoint/mysql:8.4 +``` + +**Important:** If you change port or password, update your DataJoint config accordingly. + +#### Management + +**Start/stop services:** +```bash +# Start +docker compose up -d + +# Stop (keeps data) +docker compose stop + +# Stop and remove containers (keeps data) +docker compose down + +# Stop and remove everything including data +docker compose down -v # WARNING: Deletes all data! +``` + +**View logs:** +```bash +docker compose logs mysql +docker compose logs -f mysql # Follow mode +``` + +**Check status:** +```bash +docker compose ps +``` + +**Access MySQL shell:** +```bash +docker compose exec mysql mysql -uroot -ptutorial +``` + +**Restart services:** +```bash +docker compose restart +``` + +### Option 2: Remote Database (Lab/Cloud Setup) + +**Pros:** +- Shared across team members +- Production-ready +- Professional backup/monitoring +- Persistent storage + +**Cons:** +- Requires existing MySQL server +- Network configuration needed +- May need VPN/SSH tunnel + +#### Prerequisites + +- MySQL 8.0+ server accessible over network +- Database credentials (host, port, user, password) +- Firewall rules allowing connection + +#### Setup + +**Using installer (interactive):** +```bash +python scripts/install.py --remote +# Enter connection details when prompted +``` + +**Using installer (non-interactive for automation):** +```bash +# Using CLI arguments +python scripts/install.py --remote \ + --db-host db.mylab.edu \ + --db-user myusername \ + --db-password mypassword + +# Using environment variables (recommended for CI/CD) +export SPYGLASS_DB_PASSWORD=mypassword +python scripts/install.py --remote \ + --db-host db.mylab.edu \ + --db-user myusername +``` + +**Manual configuration:** + +Create `~/.datajoint_config.json`: +```json +{ + "database.host": "db.mylab.edu", + "database.port": 3306, + "database.user": "myusername", + "database.password": "mypassword", + "database.use_tls": true +} +``` + +**Test connection:** +```python +import datajoint as dj +dj.conn().ping() # Should succeed +``` + +#### SSH Tunnel (For Remote Access) + +If database is behind firewall, use SSH tunnel: + +```bash +# Create tunnel (keep running in terminal) +ssh -L 3306:localhost:3306 user@remote-server + +# In separate terminal, configure as localhost +cat > ~/.datajoint_config.json << EOF +{ + "database.host": "localhost", + "database.port": 3306, + "database.user": "root", + "database.password": "password", + "database.use_tls": false +} +EOF +``` + +Or use autossh for persistent tunnel: +```bash +autossh -M 0 -L 3306:localhost:3306 user@remote-server +``` + +### Option 3: Local MySQL Installation + +**Pros:** +- No Docker required +- Direct system integration +- Full control over configuration + +**Cons:** +- More complex setup +- Platform-specific installation +- Harder to reset/clean + +#### macOS (Homebrew) + +```bash +# Install MySQL +brew install mysql + +# Start MySQL service +brew services start mysql + +# Secure installation +mysql_secure_installation + +# Create user +mysql -uroot -p +``` + +In MySQL shell: +```sql +CREATE USER 'spyglass'@'localhost' IDENTIFIED BY 'spyglass_password'; +GRANT ALL PRIVILEGES ON *.* TO 'spyglass'@'localhost'; +FLUSH PRIVILEGES; +EXIT; +``` + +Configure DataJoint: +```json +{ + "database.host": "localhost", + "database.port": 3306, + "database.user": "spyglass", + "database.password": "spyglass_password", + "database.use_tls": false +} +``` + +#### Linux (Ubuntu/Debian) + +```bash +# Install MySQL +sudo apt-get update +sudo apt-get install mysql-server + +# Start service +sudo systemctl start mysql +sudo systemctl enable mysql + +# Secure installation +sudo mysql_secure_installation + +# Create user +sudo mysql +``` + +In MySQL shell: +```sql +CREATE USER 'spyglass'@'localhost' IDENTIFIED BY 'spyglass_password'; +GRANT ALL PRIVILEGES ON *.* TO 'spyglass'@'localhost'; +FLUSH PRIVILEGES; +EXIT; +``` + +#### Windows + +1. Download MySQL Installer: https://dev.mysql.com/downloads/installer/ +2. Run installer and select "Developer Default" +3. Follow setup wizard +4. Create spyglass user with full privileges + +## Configuration Reference + +### DataJoint Configuration File + +Location: `~/.datajoint_config.json` + +**Full configuration example:** +```json +{ + "database.host": "localhost", + "database.port": 3306, + "database.user": "root", + "database.password": "tutorial", + "database.use_tls": false, + "database.charset": "utf8mb4", + "connection.init_function": null, + "loglevel": "INFO", + "safemode": true, + "fetch_format": "array" +} +``` + +**Key settings:** + +- `database.host`: MySQL server hostname or IP +- `database.port`: MySQL port (default: 3306) +- `database.user`: MySQL username +- `database.password`: MySQL password +- `database.use_tls`: Use TLS/SSL encryption (recommended for remote) + +### TLS/SSL Configuration + +**When to use TLS:** +- โœ… Remote database connections +- โœ… Production environments +- โœ… When connecting over untrusted networks +- โŒ localhost connections +- โŒ Docker containers on same machine + +**Enable TLS:** +```json +{ + "database.use_tls": true +} +``` + +**Custom certificate:** +```json +{ + "database.use_tls": { + "ssl": { + "ca": "/path/to/ca-cert.pem", + "cert": "/path/to/client-cert.pem", + "key": "/path/to/client-key.pem" + } + } +} +``` + +## Security Best Practices + +### Development + +For local development, simple credentials are acceptable: +- User: `root` or dedicated user +- Password: Simple but unique +- TLS: Disabled for localhost + +### Production + +For shared/production databases: + +1. **Strong passwords:** + ```bash + # Generate secure password + openssl rand -base64 32 + ``` + +2. **User permissions:** + ```sql + -- Create user with specific database access + CREATE USER 'spyglass'@'%' IDENTIFIED BY 'strong_password'; + GRANT ALL PRIVILEGES ON spyglass_*.* TO 'spyglass'@'%'; + FLUSH PRIVILEGES; + ``` + +3. **Enable TLS:** + ```json + { + "database.use_tls": true + } + ``` + +4. **Network security:** + - Use firewall rules + - Consider VPN for remote access + - Use SSH tunnels when appropriate + +5. **Credential management:** + - Never commit config files to git + - Use environment variables for CI/CD + - Consider secrets management tools + +### File Permissions + +Protect configuration file: +```bash +chmod 600 ~/.datajoint_config.json +``` + +## Multi-User Setup + +For lab environments with shared database: + +### Server-Side Setup + +```sql +-- Create database prefix for lab +CREATE DATABASE spyglass_common; + +-- Create users +CREATE USER 'alice'@'%' IDENTIFIED BY 'alice_password'; +CREATE USER 'bob'@'%' IDENTIFIED BY 'bob_password'; + +-- Grant permissions +GRANT ALL PRIVILEGES ON spyglass_*.* TO 'alice'@'%'; +GRANT ALL PRIVILEGES ON spyglass_*.* TO 'bob'@'%'; +FLUSH PRIVILEGES; +``` + +### Client-Side Setup + +Each user creates their own config: + +**Alice's config:** +```json +{ + "database.host": "lab-db.university.edu", + "database.user": "alice", + "database.password": "alice_password", + "database.use_tls": true +} +``` + +**Bob's config:** +```json +{ + "database.host": "lab-db.university.edu", + "database.user": "bob", + "database.password": "bob_password", + "database.use_tls": true +} +``` + +## Troubleshooting + +### Cannot Connect + +**Check MySQL is running:** +```bash +# Docker +docker ps | grep spyglass-db + +# System service (Linux) +systemctl status mysql + +# Homebrew (macOS) +brew services list | grep mysql +``` + +**Test connection:** +```bash +# With mysql client +mysql -h HOST -P PORT -u USER -p + +# With Python +python -c "import datajoint as dj; dj.conn().ping()" +``` + +### Permission Denied + +```sql +-- Grant missing privileges +GRANT ALL PRIVILEGES ON *.* TO 'user'@'host'; +FLUSH PRIVILEGES; +``` + +### Port Already in Use + +```bash +# Find what's using port 3306 +lsof -i :3306 +netstat -an | grep 3306 + +# Use different port +docker run -p 3307:3306 ... +# Update config with port 3307 +``` + +### TLS Errors + +```python +# Disable TLS for localhost +config = { + "database.host": "localhost", + "database.use_tls": False +} +``` + +For more troubleshooting help, see [TROUBLESHOOTING.md](TROUBLESHOOTING.md). + +## Advanced Topics + +### Database Backup + +**Docker database:** +```bash +# Backup +docker exec spyglass-db mysqldump -uroot -ptutorial --all-databases > backup.sql + +# Restore +docker exec -i spyglass-db mysql -uroot -ptutorial < backup.sql +``` + +**System MySQL:** +```bash +# Backup +mysqldump -u USER -p --all-databases > backup.sql + +# Restore +mysql -u USER -p < backup.sql +``` + +### Performance Tuning + +**Increase buffer pool (Docker):** +```bash +docker run -d \ + --name spyglass-db \ + -p 3306:3306 \ + -e MYSQL_ROOT_PASSWORD=tutorial \ + datajoint/mysql:8.0 \ + --innodb-buffer-pool-size=2G +``` + +**Optimize tables:** +```sql +OPTIMIZE TABLE tablename; +``` + +### Migration + +**Moving from Docker to Remote:** +1. Backup Docker database +2. Restore to remote server +3. Update config to point to remote +4. Test connection + +**Example:** +```bash +# Backup from Docker +docker exec spyglass-db mysqldump -uroot -ptutorial --all-databases > backup.sql + +# Restore to remote +mysql -h remote-host -u user -p < backup.sql + +# Update config +cat > ~/.datajoint_config.json << EOF +{ + "database.host": "remote-host", + "database.user": "user", + "database.password": "password", + "database.use_tls": true +} +EOF +``` + +## Getting Help + +- **Issues:** https://github.com/LorenFrankLab/spyglass/issues +- **Docs:** See main Spyglass documentation +- **DataJoint:** https://docs.datajoint.org/ diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md new file mode 100644 index 000000000..c59cd5c74 --- /dev/null +++ b/docs/TROUBLESHOOTING.md @@ -0,0 +1,448 @@ +# Spyglass Installation Troubleshooting + +This guide helps resolve common installation issues with Spyglass. + +## Quick Diagnosis + +Run the validation script to identify issues: + +```bash +python scripts/validate.py +``` + +The validator will check: +- โœ“ Python version compatibility +- โœ“ Conda/Mamba availability +- โœ“ Spyglass import +- โš  SpyglassConfig (optional) +- โš  Database connection (optional) + +## Common Issues + +### Environment Creation Fails + +**Symptoms:** +- `conda env create` hangs or fails +- Package conflict errors +- Timeout during solving environment + +**Solutions:** + +1. **Update conda/mamba:** + ```bash + conda update conda + # or + mamba update mamba + ``` + +2. **Clear package cache:** + ```bash + conda clean --all + ``` + +3. **Try mamba (faster, better at resolving conflicts):** + ```bash + conda install mamba -c conda-forge + mamba env create -f environment.yml + ``` + +4. **Use minimal installation first:** + ```bash + python scripts/install.py --minimal + ``` + +5. **Check disk space:** + - Minimal: ~10 GB required + - Full: ~25 GB required + ```bash + df -h + ``` + +### Docker Database Issues + +**Symptoms:** +- "Docker not available" +- Container fails to start +- MySQL timeout waiting for readiness + +**Solutions:** + +1. **Verify Docker is installed and running:** + ```bash + docker --version + docker ps + ``` + +2. **Start Docker Desktop** (macOS/Windows) + - Check system tray for Docker icon + - Ensure Docker Desktop is running + +3. **Check Docker permissions** (Linux): + ```bash + sudo usermod -aG docker $USER + # Then log out and back in + ``` + +4. **Container already exists:** + ```bash + # Check if container exists + docker ps -a | grep spyglass-db + + # Remove old container + docker rm -f spyglass-db + + # Try installation again + python scripts/install.py --docker + ``` + +5. **Port 3306 already in use:** + ```bash + # Check what's using port 3306 + lsof -i :3306 + # or + netstat -an | grep 3306 + + # Stop conflicting service or use different port + ``` + +6. **Container starts but MySQL times out:** + ```bash + # Check container logs + docker logs spyglass-db + + # Wait longer and check again + docker exec spyglass-db mysqladmin -uroot -ptutorial ping + ``` + +### Remote Database Connection Fails + +**Symptoms:** +- "Connection refused" +- "Access denied for user" +- TLS/SSL errors + +**Solutions:** + +1. **Verify credentials:** + - Double-check host, port, username, password + - Try connecting with mysql CLI: + ```bash + mysql -h HOST -P PORT -u USER -p + ``` + +2. **Check network/firewall:** + ```bash + # Test if port is open + telnet HOST PORT + # or + nc -zv HOST PORT + ``` + +3. **TLS configuration:** + - For `localhost`, TLS should be disabled + - For remote hosts, TLS should be enabled + - If TLS errors occur, verify server certificate + +4. **Database permissions:** + ```sql + -- Run on MySQL server + GRANT ALL PRIVILEGES ON *.* TO 'user'@'%' IDENTIFIED BY 'password'; + FLUSH PRIVILEGES; + ``` + +### Python Version Issues + +**Symptoms:** +- "Python 3.9+ required, found 3.8" +- Import errors for newer Python features + +**Solutions:** + +1. **Check Python version:** + ```bash + python --version + ``` + +2. **Install correct Python version:** + ```bash + # Using conda + conda install python=3.10 + + # Or create new environment + conda create -n spyglass python=3.10 + ``` + +3. **Verify conda environment:** + ```bash + # Check active environment + conda info --envs + + # Activate spyglass environment + conda activate spyglass + ``` + +### Spyglass Import Fails + +**Symptoms:** +- `ModuleNotFoundError: No module named 'spyglass'` +- Import errors for spyglass submodules + +**Solutions:** + +1. **Verify installation:** + ```bash + conda activate spyglass + pip show spyglass + ``` + +2. **Reinstall in development mode:** + ```bash + cd /path/to/spyglass + pip install -e . + ``` + +3. **Check sys.path:** + ```python + import sys + print(sys.path) + # Should include spyglass source directory + ``` + +### SpyglassConfig Issues + +**Symptoms:** +- "Cannot find configuration file" +- Base directory errors + +**Solutions:** + +1. **Check config file location:** + ```bash + ls -la ~/.spyglass/config.yaml + # or + ls -la $SPYGLASS_BASE_DIR/config.yaml + ``` + +2. **Set base directory:** + ```bash + export SPYGLASS_BASE_DIR=/path/to/data + ``` + +3. **Create default config:** + ```python + from spyglass.settings import SpyglassConfig + config = SpyglassConfig() # Auto-creates if missing + ``` + +### DataJoint Configuration Issues + +**Symptoms:** +- "Could not connect to database" +- Configuration file not found + +**Solutions:** + +1. **Check DataJoint config:** + ```bash + cat ~/.datajoint_config.json + ``` + +2. **Manually create config:** + ```json + { + "database.host": "localhost", + "database.port": 3306, + "database.user": "root", + "database.password": "tutorial", + "database.use_tls": false + } + ``` + +3. **Test connection:** + ```python + import datajoint as dj + dj.conn().ping() + ``` + +### M1/M2 Mac Issues + +**Symptoms:** +- Architecture mismatch errors +- Rosetta warnings +- Package installation failures + +**Solutions:** + +1. **Use native ARM environment:** + ```bash + # Ensure using ARM conda + conda config --env --set subdir osx-arm64 + ``` + +2. **Some packages may require Rosetta:** + ```bash + # Install Rosetta 2 if needed + softwareupdate --install-rosetta + ``` + +3. **Use mamba for better ARM support:** + ```bash + conda install mamba -c conda-forge + mamba env create -f environment.yml + ``` + +### Insufficient Disk Space + +**Symptoms:** +- Installation fails partway through +- "No space left on device" + +**Solutions:** + +1. **Check available space:** + ```bash + df -h + ``` + +2. **Clean conda cache:** + ```bash + conda clean --all + ``` + +3. **Choose different installation directory:** + ```bash + python scripts/install.py --base-dir /path/with/more/space + ``` + +4. **Use minimal installation:** + ```bash + python scripts/install.py --minimal + ``` + +### Permission Errors + +**Symptoms:** +- "Permission denied" during installation +- Cannot write to directory + +**Solutions:** + +1. **Check directory permissions:** + ```bash + ls -la /path/to/directory + ``` + +2. **Create directory with correct permissions:** + ```bash + mkdir -p ~/spyglass_data + chmod 755 ~/spyglass_data + ``` + +3. **Don't use sudo with conda:** + - Conda environments should be user-owned + - Never run `sudo conda` or `sudo pip` + +### Git Issues + +**Symptoms:** +- Cannot clone repository +- Git not found + +**Solutions:** + +1. **Install git:** + ```bash + # macOS + xcode-select --install + + # Linux (Ubuntu/Debian) + sudo apt-get install git + + # Linux (CentOS/RHEL) + sudo yum install git + ``` + +2. **Clone with HTTPS instead of SSH:** + ```bash + git clone https://github.com/LorenFrankLab/spyglass.git + ``` + +## Platform-Specific Issues + +### macOS + +**Issue: Xcode Command Line Tools missing** +```bash +xcode-select --install +``` + +**Issue: Homebrew conflicts** +```bash +# Use conda-installed tools instead of homebrew +conda activate spyglass +which python # Should show conda path +``` + +### Linux + +**Issue: Missing system libraries** +```bash +# Ubuntu/Debian +sudo apt-get install build-essential libhdf5-dev + +# CentOS/RHEL +sudo yum groupinstall "Development Tools" +sudo yum install hdf5-devel +``` + +**Issue: Docker permissions** +```bash +sudo usermod -aG docker $USER +# Log out and back in +``` + +### Windows (WSL) + +**Issue: WSL not set up** +```bash +# Install WSL 2 from PowerShell (admin): +wsl --install +``` + +**Issue: Docker Desktop integration** +- Enable WSL 2 integration in Docker Desktop settings +- Ensure Docker is running before installation + +## Still Having Issues? + +1. **Check GitHub Issues:** + https://github.com/LorenFrankLab/spyglass/issues + +2. **Ask for Help:** + - Include output from `python scripts/validate.py` + - Include relevant error messages + - Mention your OS and Python version + +3. **Manual Installation:** + See `docs/DATABASE.md` and main documentation for manual setup steps + +## Reset and Start Fresh + +If all else fails, completely reset your installation: + +```bash +# Remove conda environment +conda env remove -n spyglass + +# Remove configuration files +rm ~/.datajoint_config.json +rm -rf ~/.spyglass + +# Remove Docker container +docker rm -f spyglass-db + +# Start fresh +git clone https://github.com/LorenFrankLab/spyglass.git +cd spyglass +python scripts/install.py +``` diff --git a/environment-min.yml b/environment-min.yml new file mode 100644 index 000000000..01e32b1ed --- /dev/null +++ b/environment-min.yml @@ -0,0 +1,30 @@ +# Minimal Spyglass Environment - Core Dependencies Only +# 1. Install: `mamba env create -f environment-min.yml` +# 2. Activate: `conda activate spyglass` +# 3. For full features, use environment.yml instead +# +# This environment includes only the essential dependencies needed for basic +# spyglass functionality. Optional dependencies for specific pipelines +# (LFP analysis, spike sorting, etc.) are excluded. + +name: spyglass +channels: + - conda-forge + - franklab + - edeno +dependencies: + # Core Python scientific stack + - python>=3.9,<3.13 + - pip + - numpy + - matplotlib + - bottleneck + - seaborn + + # Core Jupyter environment for notebooks + - jupyterlab>=3.* + - ipympl + + # Essential spyglass dependencies (installed via pip for development mode) + - pip: + - . \ No newline at end of file diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 000000000..0051f0707 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,396 @@ +# Spyglass Installation Scripts + +This directory contains streamlined installation and validation scripts for Spyglass. + +## Quick Start + +Install Spyglass in one command: + +```bash +python scripts/install.py +``` + +This interactive installer will: +1. Check prerequisites (Python version, conda/mamba) +2. Create conda environment +3. Install Spyglass package +4. Optionally set up local database with Docker +5. Validate installation + +## Scripts + +### `install.py` - Main Installer + +Cross-platform installation script that automates the setup process. + +**Interactive Mode:** +```bash +python scripts/install.py +``` + +**Non-Interactive Mode:** +```bash +# Minimal installation +python scripts/install.py --minimal + +# Full installation with database +python scripts/install.py --full --docker + +# Custom environment name +python scripts/install.py --env-name my-spyglass + +# Custom data directory +python scripts/install.py --base-dir /data/spyglass +``` + +**Environment Variables:** +```bash +# Set base directory (skips prompt) +export SPYGLASS_BASE_DIR=/data/spyglass +python scripts/install.py +``` + +**Options:** +- `--minimal` - Install minimal dependencies only (~5 min, ~8 GB) +- `--full` - Install all dependencies (~15 min, ~18 GB) +- `--docker` - Set up local Docker database +- `--remote` - Connect to remote database (interactive or with CLI args) +- `--db-host HOST` - Database host for remote setup (non-interactive) +- `--db-port PORT` - Database port (default: 3306) +- `--db-user USER` - Database user (default: root) +- `--db-password PASS` - Database password (or use SPYGLASS_DB_PASSWORD env var) +- `--skip-validation` - Skip validation checks after installation +- `--env-name NAME` - Custom conda environment name (default: spyglass) +- `--base-dir PATH` - Base directory for data storage +- `--force` - Overwrite existing environment without prompting +- `--dry-run` - Show what would be done without making changes (coming soon) + +### `validate.py` - Health Check + +Validates that Spyglass is properly installed and configured. + +**Usage:** +```bash +python scripts/validate.py +``` + +**Checks:** +1. Python version โ‰ฅ3.9 +2. Conda/mamba available +3. Spyglass can be imported +4. SpyglassConfig loads correctly +5. Database connection (if configured) + +**Exit Codes:** +- `0` - All checks passed +- `1` - One or more checks failed + +## Installation Types + +### Minimal Installation +- Core dependencies only +- Suitable for basic usage +- Disk space: ~8 GB +- Install time: ~5 minutes + +### Full Installation +- All pipeline dependencies +- Includes LFP, position, spikesorting +- Disk space: ~18 GB +- Install time: ~15 minutes + +Note: DeepLabCut, Moseq, and Decoding require separate installation. + +## Requirements + +**System Requirements:** +- Python 3.9 or later +- conda or mamba package manager +- Git (recommended) +- Docker (optional, for local database) + +**Platform Support:** +- macOS (Intel & Apple Silicon) +- Linux +- Windows (via WSL or native) + +## Database Setup + +The installer supports three database setup options: + +### Option 1: Docker Compose (Recommended for Local Development) + +Automatically set up a local MySQL database using Docker Compose: + +```bash +python scripts/install.py --docker # Auto-uses Compose +``` + +Or directly: +```bash +docker compose up -d +``` + +This creates a container named `spyglass-db` with: +- Host: localhost +- Port: 3306 +- User: root +- Password: tutorial +- TLS: Disabled +- Persistent storage via Docker volume + +**Benefits:** +- One-command setup +- Infrastructure as code (version controlled) +- Easy to customize via `.env` file +- Built-in health checks + +**Customization:** +```bash +# Create .env file to customize settings +cp .env.example .env +nano .env # Edit MYSQL_PORT, MYSQL_ROOT_PASSWORD, etc. +``` + +See `docker-compose.yml` and `.env.example` in the repository root. + +### Option 2: Remote Database + +**Interactive mode:** + +```bash +python scripts/install.py --remote +``` + +You'll be prompted to enter: +- Host (e.g., db.example.com) +- Port (default: 3306) +- User (default: root) +- Password (hidden input) +- TLS settings (automatically enabled for non-localhost hosts) + +**Non-interactive mode (for automation/CI/CD):** + +```bash +# Using CLI arguments +python scripts/install.py --remote \ + --db-host db.lab.edu \ + --db-user myuser \ + --db-password mysecret + +# Using environment variable for password (recommended) +export SPYGLASS_DB_PASSWORD=mysecret +python scripts/install.py --remote \ + --db-host db.lab.edu \ + --db-user myuser +``` + +**Security Notes:** +- Passwords are hidden during interactive input (using `getpass`) +- For automation, use `SPYGLASS_DB_PASSWORD` env var instead of `--db-password` +- TLS is automatically enabled for remote hosts +- Configuration is saved to `~/.datajoint_config.json` +- Use `--force` to overwrite existing configuration + +### Option 3: Interactive Choice + +Without flags, the installer presents an interactive menu: + +```bash +python scripts/install.py + +Database setup: + 1. Docker Compose (Recommended) - One-command setup + 2. Remote - Connect to existing database + 3. Skip - Configure later + +Choice [1-3]: +``` + +The installer will auto-detect if Docker Compose is available and recommend it. + +### Option 4: Manual Setup + +Skip database setup during installation and configure manually later: + +```bash +python scripts/install.py --skip-validation +# Then configure manually: see docs/DATABASE.md +``` + +## Configuration + +The installer respects the following configuration priority: + +1. **CLI arguments** (highest priority) + ```bash + python scripts/install.py --base-dir /custom/path + ``` + +2. **Environment variables** + ```bash + export SPYGLASS_BASE_DIR=/custom/path + python scripts/install.py + ``` + +3. **Interactive prompts** (lowest priority) + - Installer will ask for configuration if not provided + +## Troubleshooting + +### Environment Already Exists + +If the installer detects an existing environment: +``` +Environment 'spyglass' exists. Overwrite? [y/N]: +``` + +**Options:** +- Answer `n` to use the existing environment (installation continues) +- Answer `y` to remove and recreate the environment +- Use `--env-name different-name` to create a separate environment +- Use `--force` to automatically overwrite without prompting + +### Environment Creation Fails + +```bash +# Update conda +conda update conda + +# Clear cache +conda clean --all + +# Try with mamba (faster) +mamba env create -f environment.yml +``` + +### Docker Issues + +Check Docker is running: +```bash +docker info +``` + +If Docker is not available: +- Install from https://docs.docker.com/get-docker/ +- Or configure database manually (see docs/DATABASE.md) + +### Database Connection Fails + +Verify configuration: +```bash +# Check config file exists +ls ~/.datajoint_config.json + +# Test connection +python -c "import datajoint as dj; dj.conn().ping(); print('โœ“ Connected')" +``` + +### Import Errors + +Ensure environment is activated: +```bash +conda activate spyglass +python -c "import spyglass; print(spyglass.__version__)" +``` + +## Development + +### Testing the Installer + +```bash +# Create test environment +python scripts/install.py --env-name spyglass-test --minimal --skip-validation + +# Validate installation +conda activate spyglass-test +python scripts/validate.py + +# Clean up +conda deactivate +conda env remove -n spyglass-test +``` + +### Running Unit Tests + +```bash +# Direct testing (bypasses pytest conftest issues) +python -c " +import sys +from pathlib import Path +sys.path.insert(0, str(Path.cwd() / 'scripts')) +from install import get_required_python_version, get_conda_command + +version = get_required_python_version() +print(f'Python version: {version}') +assert version[0] == 3 and version[1] >= 9 + +cmd = get_conda_command() +print(f'Conda command: {cmd}') +assert cmd in ['conda', 'mamba'] + +print('โœ“ All tests passed') +" +``` + +## Architecture + +### Design Principles + +1. **Self-contained** - Minimal dependencies (stdlib only) +2. **Cross-platform** - Works on Windows, macOS, Linux +3. **Single source of truth** - Reads versions from `pyproject.toml` +4. **Explicit configuration** - Clear priority: CLI > env var > prompt +5. **Graceful degradation** - Works even if optional components fail + +### Critical Execution Order + +The installer must follow this order to avoid circular dependencies: + +1. **Prerequisites check** (no spyglass imports) +2. **Create conda environment** (no spyglass imports) +3. **Install spyglass package** (`pip install -e .`) +4. **Setup database** (inline code, no spyglass imports) +5. **Validate** (runs in new environment, CAN import spyglass) + +### Why Inline Docker Code? + +The installer uses inline Docker operations instead of importing from `spyglass.utils.docker` because: +- Spyglass is not installed yet when the installer runs +- Cannot create circular dependency (installer โ†’ spyglass โ†’ installer) +- Must be self-contained with stdlib only + +The reusable Docker utilities are in `src/spyglass/utils/docker.py` for: +- Testing infrastructure (`tests/container.py`) +- Post-installation database management +- Other spyglass code + +## Comparison with Original Setup + +| Aspect | Old Setup | New Installer | +|--------|-----------|---------------| +| Steps | ~30 manual | 1 command | +| Time | Hours | 5-15 minutes | +| Lines of code | ~6,000 | ~500 | +| Platforms | Manual per platform | Unified cross-platform | +| Validation | Manual | Automatic | +| Error recovery | Debug manually | Clear messages + guidance | + +## Related Files + +- `environment-min.yml` - Minimal dependencies +- `environment.yml` - Full dependencies +- `src/spyglass/utils/docker.py` - Reusable Docker utilities +- `tests/setup/test_install.py` - Unit tests +- `pyproject.toml` - Python version requirements (single source of truth) + +## Support + +For issues: +1. Check validation output: `python scripts/validate.py` +2. See docs/TROUBLESHOOTING.md (coming soon) +3. File issue at https://github.com/LorenFrankLab/spyglass/issues + +## License + +Same as Spyglass main package. diff --git a/scripts/install.py b/scripts/install.py new file mode 100755 index 000000000..e33c1fdc3 --- /dev/null +++ b/scripts/install.py @@ -0,0 +1,2598 @@ +#!/usr/bin/env python3 +"""Cross-platform Spyglass installer. + +This script automates the Spyglass installation process, reducing setup from +~30 manual steps to 2-3 interactive prompts. + +Usage: + python scripts/install.py # Interactive mode + python scripts/install.py --minimal # Minimal install + python scripts/install.py --full # Full install + python scripts/install.py --docker # Include database setup + python scripts/install.py --help # Show help + +Environment Variables: + SPYGLASS_BASE_DIR - Set base directory (skips prompt) + +Exit codes: + 0 - Installation successful + 1 - Installation failed +""" + +import argparse +import json +import os +import re +import shutil +import subprocess +import sys +from pathlib import Path +from typing import Any, Dict, NamedTuple, Optional, Tuple + +# Color codes for cross-platform output +COLORS = ( + { + "green": "\033[92m", + "yellow": "\033[93m", + "red": "\033[91m", + "blue": "\033[94m", + "reset": "\033[0m", + } + if sys.platform != "win32" + else {k: "" for k in ["green", "yellow", "red", "blue", "reset"]} +) + +# System constants +BYTES_PER_GB = 1024**3 +LOCALHOST_ADDRESSES = frozenset(["localhost", "127.0.0.1", "::1"]) +CURRENT_SCHEMA_VERSION = "1.0.0" # Config schema version compatibility + +# Disk space requirements (GB) +DISK_SPACE_REQUIREMENTS = { + "minimal": 10, + "full": 25, +} + +# MySQL health check configuration +MYSQL_HEALTH_CHECK_INTERVAL = 2 # seconds +MYSQL_HEALTH_CHECK_ATTEMPTS = 30 # 60 seconds total +MYSQL_HEALTH_CHECK_TIMEOUT = ( + MYSQL_HEALTH_CHECK_ATTEMPTS * MYSQL_HEALTH_CHECK_INTERVAL +) + +# Docker configuration +DOCKER_IMAGE_PULL_TIMEOUT = 300 # 5 minutes +DOCKER_STARTUP_TIMEOUT = 60 # 1 minute +DEFAULT_MYSQL_PORT = 3306 +DEFAULT_MYSQL_PASSWORD = "tutorial" + + +# Named tuple for database menu options +class DatabaseOption(NamedTuple): + """Represents a database setup option in the menu. + + Attributes + ---------- + number : str + Menu option number (e.g., "1", "2") + name : str + Short name of option (e.g., "Docker", "Remote") + status : str + Availability status with icon (e.g., "โœ“ Available", "โœ— Not available") + description : str + Detailed description for user + """ + + number: str + name: str + status: str + description: str + + +def print_step(msg: str) -> None: + """Print installation step message. + + Parameters + ---------- + msg : str + Message to display + """ + print(f"{COLORS['blue']}โ–ถ{COLORS['reset']} {msg}") + + +def print_success(msg: str) -> None: + """Print success message. + + Parameters + ---------- + msg : str + Success message to display + """ + print(f"{COLORS['green']}โœ“{COLORS['reset']} {msg}") + + +def print_warning(msg: str) -> None: + """Print warning message. + + Parameters + ---------- + msg : str + Warning message to display + """ + print(f"{COLORS['yellow']}โš {COLORS['reset']} {msg}") + + +def print_error(msg: str) -> None: + """Print error message. + + Parameters + ---------- + msg : str + Error message to display + """ + print(f"{COLORS['red']}โœ—{COLORS['reset']} {msg}") + + +def show_progress_message(operation: str, estimated_minutes: int) -> None: + """Show progress message for long-running operation. + + Displays estimated time and user-friendly messages to prevent + users from thinking the installer has frozen. + + Parameters + ---------- + operation : str + Description of the operation being performed + estimated_minutes : int + Estimated completion time in minutes + + Returns + ------- + None + + Examples + -------- + >>> show_progress_message("Installing packages", 10) + """ + print_step(operation) + print(f" Estimated time: ~{estimated_minutes} minute(s)") + print(" This may take a while - please be patient...") + if estimated_minutes > 10: + print(" Tip: This is a good time for a coffee break") + + +def get_required_python_version() -> Tuple[int, int]: + """Get required Python version from pyproject.toml. + + Returns + ------- + tuple of int + Tuple of (major, minor) version. Falls back to (3, 9) if parsing fails. + + Notes + ----- + This ensures single source of truth for version requirements. + + INTENTIONAL DUPLICATION: This function is duplicated in both install.py + and validate.py because validate.py must work standalone before Spyglass + is installed. Both scripts are designed to run independently without + importing from each other to avoid path/module complexity. + + If you modify this function, you MUST update it in both files: + - scripts/install.py (this file) + - scripts/validate.py + + Future: Consider extracting to scripts/_shared.py if the installer + becomes a package, but for now standalone scripts are simpler. + """ + try: + import tomllib # Python 3.11+ + except ImportError: + try: + import tomli as tomllib # Fallback for Python 3.9-3.10 + except ImportError: + return (3, 9) # Safe fallback + + try: + pyproject_path = Path(__file__).parent.parent / "pyproject.toml" + with pyproject_path.open("rb") as f: + data = tomllib.load(f) + + # Parse ">=3.9,<3.13" format + requires_python = data["project"]["requires-python"] + match = re.search(r">=(\d+)\.(\d+)", requires_python) + if match: + return (int(match.group(1)), int(match.group(2))) + except (FileNotFoundError, KeyError, AttributeError, ValueError): + # Expected errors during parsing - use safe fallback + pass + + return (3, 9) # Safe fallback + + +def check_disk_space(required_gb: int, path: Path) -> Tuple[bool, int]: + """Check available disk space at given path. + + Walks up directory tree to find existing parent if path doesn't + exist yet, then checks available disk space. + + Parameters + ---------- + required_gb : int + Required disk space in gigabytes + path : pathlib.Path + Path to check. If doesn't exist, checks nearest existing parent. + + Returns + ------- + sufficient : bool + True if available space >= required space + available_gb : int + Available disk space in gigabytes + + Examples + -------- + >>> sufficient, available = check_disk_space(10, Path("/tmp")) + >>> if sufficient: + ... print(f"OK: {available} GB available") + """ + # Find existing path to check + check_path = path + while not check_path.exists() and check_path != check_path.parent: + check_path = check_path.parent + + # Get disk usage + usage = shutil.disk_usage(check_path) + available_gb = usage.free / BYTES_PER_GB + + return available_gb >= required_gb, int(available_gb) + + +def check_prerequisites( + install_type: str = "minimal", base_dir: Optional[Path] = None +) -> None: + """Check system prerequisites before installation. + + Verifies Python version, conda/mamba availability, and sufficient + disk space for the selected installation type. + + Parameters + ---------- + install_type : str, optional + Installation type - either 'minimal' or 'full' (default: 'minimal') + base_dir : pathlib.Path, optional + Base directory where Spyglass data will be stored + + Raises + ------ + RuntimeError + If prerequisites are not met (insufficient disk space, etc.) + + Examples + -------- + >>> check_prerequisites("minimal", Path("/tmp/spyglass_data")) + """ + print_step("Checking prerequisites...") + + # Get Python version requirement from pyproject.toml + min_version = get_required_python_version() + + # Python version + if sys.version_info < min_version: + raise RuntimeError( + f"Python {min_version[0]}.{min_version[1]}+ required, " + f"found {sys.version_info.major}.{sys.version_info.minor}" + ) + print_success(f"Python {sys.version_info.major}.{sys.version_info.minor}") + + # Conda/Mamba + conda_cmd = get_conda_command() + print_success(f"Package manager: {conda_cmd}") + + # Git (optional but recommended) + if not shutil.which("git"): + print_warning("Git not found (recommended for development)") + else: + print_success("Git available") + + # Disk space check (if base_dir provided) + if base_dir: + # Add buffer: minimal needs ~10GB (8 + 2), full needs ~25GB (18 + 7) + required_space = {"minimal": 10, "full": 25} + required_gb = required_space.get(install_type, 10) + + sufficient, available_gb = check_disk_space(required_gb, base_dir) + + if sufficient: + print_success( + f"Disk space: {available_gb} GB available (need {required_gb} GB)" + ) + else: + print_error( + "Insufficient disk space - installation cannot continue" + ) + print(f" Checking: {base_dir}") + print(f" Available: {available_gb} GB") + print( + f" Required: {required_gb} GB ({install_type} installation)" + ) + print() + print(" To fix:") + print(" 1. Free up disk space in this location") + print( + f" 2. Choose different directory: python scripts/install.py --base-dir /other/path" + ) + print( + " 3. Use minimal install (needs 10 GB): python scripts/install.py --minimal" + ) + raise RuntimeError("Insufficient disk space") + + +def get_conda_command() -> str: + """Get conda or mamba command. + + Returns: + 'mamba' if available, else 'conda' + + Raises: + RuntimeError: If neither conda nor mamba found + """ + if shutil.which("mamba"): + return "mamba" + elif shutil.which("conda"): + return "conda" + else: + raise RuntimeError( + "conda or mamba not found. Install from:\n" + " https://github.com/conda-forge/miniforge" + ) + + +def get_base_directory(cli_arg: Optional[str] = None) -> Path: + """Get base directory for Spyglass data with write permission validation. + + Determines base directory using priority: CLI argument > environment + variable > interactive prompt. Validates that directory can be created + and written to before returning. + + Parameters + ---------- + cli_arg : str, optional + Base directory path from CLI argument. If provided, takes highest + priority over environment variables and prompts. + + Returns + ------- + pathlib.Path + Validated base directory path that is writable + + Raises + ------ + RuntimeError + If directory cannot be created or is not writable due to permissions + + Examples + -------- + >>> # From CLI argument + >>> base_dir = get_base_directory("/data/spyglass") + + >>> # From environment or prompt + >>> base_dir = get_base_directory() + """ + + def validate_and_test_write(path: Path) -> Path: + """Validate directory and test write permissions. + + Parameters + ---------- + path : pathlib.Path + Directory path to validate + + Returns + ------- + pathlib.Path + Validated directory path + + Raises + ------ + RuntimeError + If directory cannot be created or written to + """ + try: + # Check if we can create the directory + path.mkdir(parents=True, exist_ok=True) + + # Test write access + test_file = path / ".spyglass_write_test" + test_file.touch() + test_file.unlink() + + return path + + except PermissionError: + raise RuntimeError( + f"Cannot write to base directory: {path}\n" + f" Check permissions or choose a different location" + ) + except OSError as e: + raise RuntimeError( + f"Cannot create base directory: {path}\n" f" Error: {e}" + ) + + # 1. CLI argument (highest priority) + if cli_arg: + base_path = Path(cli_arg).expanduser().resolve() + validated_path = validate_and_test_write(base_path) + print_success(f"Using base directory from CLI: {validated_path}") + return validated_path + + # 2. Environment variable (second priority) + if base_env := os.getenv("SPYGLASS_BASE_DIR"): + base_path = Path(base_env).expanduser().resolve() + validated_path = validate_and_test_write(base_path) + print_success( + f"Using base directory from environment: {validated_path}" + ) + return validated_path + + # 3. Interactive prompt + print("\nWhere should Spyglass store data?") + default = Path.home() / "spyglass_data" + print(f" Default: {default}") + print( + " Tip: Set SPYGLASS_BASE_DIR environment variable to skip this prompt" + ) + + while True: + response = input(f"\nData directory [{default}]: ").strip() + + if not response: + try: + validated_path = validate_and_test_write(default) + print_success(f"Base directory validated: {validated_path}") + return validated_path + except RuntimeError as e: + print_error(str(e)) + continue + + try: + base_path = Path(response).expanduser().resolve() + + # Validate parent exists + if not base_path.parent.exists(): + print_error( + f"Parent directory does not exist: {base_path.parent}" + ) + print( + " Please create parent directory first or choose another location" + ) + continue + + # Warn if directory already exists + if base_path.exists(): + if not base_path.is_dir(): + print_error( + f"Path exists but is not a directory: {base_path}" + ) + continue + + response = ( + input("Directory exists. Use it? [Y/n]: ").strip().lower() + ) + if response in ["n", "no"]: + continue + + # Validate write permissions + validated_path = validate_and_test_write(base_path) + print_success(f"Base directory validated: {validated_path}") + return validated_path + + except RuntimeError as e: + print_error(str(e)) + continue + except (ValueError, OSError) as e: + print_error(f"Invalid path: {e}") + + +def prompt_install_type() -> Tuple[str, str]: + """Interactive prompt for installation type. + + Displays menu of installation options (minimal vs full) and prompts + user to select one. Returns appropriate environment file and type. + + Parameters + ---------- + None + + Returns + ------- + env_file : str + Path to environment YAML file ("environment-min.yml" or "environment.yml") + install_type : str + Installation type identifier ("minimal" or "full") + + Examples + -------- + >>> env_file, install_type = prompt_install_type() + >>> print(f"Using {env_file} for {install_type} installation") + """ + print("\n" + "=" * 60) + print("Installation Type") + print("=" * 60) + + print("\n1. Minimal (Recommended for getting started)") + print(" โ”œโ”€ Install time: ~5 minutes") + print(" โ”œโ”€ Disk space: ~8 GB") + print(" โ”œโ”€ Includes:") + print(" โ”‚ โ€ข Core Spyglass functionality") + print(" โ”‚ โ€ข Common data tables") + print(" โ”‚ โ€ข Position tracking") + print(" โ”‚ โ€ข LFP analysis") + print(" โ”‚ โ€ข Basic spike sorting") + print(" โ””โ”€ Good for: Learning, basic workflows") + + print("\n2. Full (For advanced analysis)") + print(" โ”œโ”€ Install time: ~15 minutes") + print(" โ”œโ”€ Disk space: ~18 GB") + print(" โ”œโ”€ Includes: Everything in Minimal, plus:") + print(" โ”‚ โ€ข Advanced spike sorting (Kilosort, etc.)") + print(" โ”‚ โ€ข Ripple detection") + print(" โ”‚ โ€ข Track linearization") + print(" โ””โ”€ Good for: Production work, all features") + + print("\nNote: DeepLabCut, Moseq, and some decoding features") + print(" require separate installation (see docs)") + + # Map choices to (env_file, install_type) + choice_map = { + "1": ("environment-min.yml", "minimal"), + "2": ("environment.yml", "full"), + } + + while True: + choice = input("\nChoice [1-2]: ").strip() + + if choice not in choice_map: + print_error("Please enter 1 or 2") + continue + + env_file, install_type = choice_map[choice] + print_success(f"Selected: {install_type.capitalize()} installation") + return env_file, install_type + + +def create_conda_environment( + env_file: str, env_name: str, force: bool = False +) -> None: + """Create conda environment from file. + + Parameters + ---------- + env_file : str + Path to environment.yml file + env_name : str + Name for the environment + force : bool, optional + If True, overwrite existing environment without prompting (default: False) + + Raises + ------ + RuntimeError + If environment creation fails + """ + # Estimate time based on environment type + estimated_time = 5 if "min" in env_file else 15 + + show_progress_message( + f"Creating environment '{env_name}' from {env_file}", estimated_time + ) + + # Check if environment already exists + result = subprocess.run( + ["conda", "env", "list"], capture_output=True, text=True + ) + + if env_name in result.stdout: + if not force: + response = input( + f"Environment '{env_name}' exists. Overwrite? [y/N]: " + ) + if response.lower() not in ["y", "yes"]: + print_success(f"Using existing environment '{env_name}'") + print( + " Package installation will continue (updates if needed)" + ) + print(" To use a different name, run with: --env-name ") + return # Skip environment creation, use existing + + print_step(f"Removing existing environment '{env_name}'...") + subprocess.run( + ["conda", "env", "remove", "-n", env_name, "-y"], check=True + ) + + # Create environment with progress indication + conda_cmd = get_conda_command() + print(" Installing packages... (this will take several minutes)") + + try: + # Use Popen to show real-time progress + process = subprocess.Popen( + [conda_cmd, "env", "create", "-f", env_file, "-n", env_name], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1, + ) + + # Show dots to indicate progress + for line in process.stdout: + if ( + "Solving environment" in line + or "Downloading" in line + or "Extracting" in line + ): + print(".", end="", flush=True) + + process.wait() + print() # New line after dots + + if process.returncode != 0: + raise subprocess.CalledProcessError(process.returncode, conda_cmd) + + print_success(f"Environment '{env_name}' created") + + except subprocess.CalledProcessError as e: + raise RuntimeError( + f"Failed to create environment. Try:\n" + f" 1. Update conda: conda update conda\n" + f" 2. Clear cache: conda clean --all\n" + f" 3. Check {env_file} for conflicts" + ) from e + + +def install_spyglass_package(env_name: str) -> None: + """Install spyglass package in development mode. + + Parameters + ---------- + env_name : str + Name of the conda environment + """ + show_progress_message("Installing spyglass package", 1) + + try: + subprocess.run( + ["conda", "run", "-n", env_name, "pip", "install", "-e", "."], + check=True, + ) + print_success("Spyglass installed") + except subprocess.CalledProcessError as e: + raise RuntimeError("Failed to install spyglass package") from e + + +# Docker operations (inline - cannot import from spyglass before it's installed) + + +def is_docker_available_inline() -> bool: + """Check if Docker is available (inline, no imports). + + Checks both that docker command exists and daemon is running. + + Parameters + ---------- + None + + Returns + ------- + bool + True if Docker is available and running, False otherwise + + Notes + ----- + This is self-contained because spyglass isn't installed yet. + """ + if not shutil.which("docker"): + return False + + try: + result = subprocess.run( + ["docker", "info"], capture_output=True, timeout=5 + ) + return result.returncode == 0 + except (subprocess.CalledProcessError, subprocess.TimeoutExpired): + return False + + +def is_docker_compose_available_inline() -> bool: + """Check if Docker Compose is installed (inline, no imports). + + Returns + ------- + bool + True if 'docker compose' command is available, False otherwise + + Notes + ----- + This is self-contained because spyglass isn't installed yet. + Checks for modern 'docker compose' (not legacy 'docker-compose'). + """ + try: + result = subprocess.run( + ["docker", "compose", "version"], + capture_output=True, + timeout=5, + ) + return result.returncode == 0 + except (subprocess.TimeoutExpired, FileNotFoundError): + return False + + +def get_compose_command_inline() -> list[str]: + """Get the appropriate Docker Compose command (inline, no imports). + + Returns + ------- + list[str] + Command prefix for Docker Compose (e.g., ['docker', 'compose']) + + Notes + ----- + This is self-contained because spyglass isn't installed yet. + Always returns modern 'docker compose' format. + """ + return ["docker", "compose"] + + +def generate_env_file_inline( + mysql_port: int = 3306, + mysql_password: str = "tutorial", + mysql_image: str = "datajoint/mysql:8.0", + env_path: str = ".env", +) -> None: + """Generate .env file for Docker Compose (inline, no imports). + + Parameters + ---------- + mysql_port : int, optional + MySQL port number (default: 3306) + mysql_password : str, optional + MySQL root password (default: 'tutorial') + mysql_image : str, optional + Docker image to use (default: 'datajoint/mysql:8.0') + env_path : str, optional + Path to write .env file (default: '.env') + + Returns + ------- + None + + Raises + ------ + OSError + If file cannot be written + + Notes + ----- + This is self-contained because spyglass isn't installed yet. + Only writes non-default values to keep .env file minimal. + """ + env_lines = ["# Spyglass Docker Compose Configuration", ""] + + # Only write non-default values + if mysql_password != "tutorial": + env_lines.append(f"MYSQL_ROOT_PASSWORD={mysql_password}") + if mysql_port != 3306: + env_lines.append(f"MYSQL_PORT={mysql_port}") + if mysql_image != "datajoint/mysql:8.0": + env_lines.append(f"MYSQL_IMAGE={mysql_image}") + + # If all defaults, don't create file (compose will use defaults) + if len(env_lines) == 2: # Only header lines + return + + env_path_obj = Path(env_path) + with env_path_obj.open("w") as f: + f.write("\n".join(env_lines) + "\n") + + +def validate_env_file_inline(env_path: str = ".env") -> bool: + """Validate .env file exists and is readable (inline, no imports). + + Parameters + ---------- + env_path : str, optional + Path to .env file (default: '.env') + + Returns + ------- + bool + True if file exists and is readable (or doesn't exist, which is OK), + False if file exists but has issues + + Notes + ----- + This is self-contained because spyglass isn't installed yet. + Missing .env file is NOT an error (defaults will be used). + """ + import os + + # Missing .env is fine - compose uses defaults + if not os.path.exists(env_path): + return True + + # If it exists, make sure it's readable + try: + env_path_obj = Path(env_path) + with env_path_obj.open("r") as f: + f.read() + return True + except (OSError, PermissionError): + return False + + +# ============================================================================ +# JSON Schema Loading Functions (DRY Architecture) +# ============================================================================ +# These functions read from config_schema.json at repository root to ensure +# the installer and settings.py use the same directory structure (single +# source of truth). This avoids code duplication and ensures consistency. + + +def validate_schema(schema: Dict[str, Any]) -> None: + """Validate config schema structure. + + Raises + ------ + ValueError + If schema is invalid or missing required keys + """ + if "directory_schema" not in schema: + raise ValueError("Schema missing 'directory_schema' key") + + required_prefixes = {"spyglass", "kachery", "dlc", "moseq"} + actual_prefixes = set(schema["directory_schema"].keys()) + + if required_prefixes != actual_prefixes: + missing = required_prefixes - actual_prefixes + extra = actual_prefixes - required_prefixes + msg = [] + if missing: + msg.append(f"Missing prefixes: {missing}") + if extra: + msg.append(f"Extra prefixes: {extra}") + raise ValueError("; ".join(msg)) + + # Validate each prefix has expected keys (matches settings.py exactly) + required_keys = { + "spyglass": { + "raw", + "analysis", + "recording", + "sorting", + "waveforms", + "temp", + "video", + "export", + }, + "kachery": {"cloud", "storage", "temp"}, + "dlc": {"project", "video", "output"}, + "moseq": {"project", "video"}, + } + + for prefix, expected_keys in required_keys.items(): + actual_keys = set(schema["directory_schema"][prefix].keys()) + if expected_keys != actual_keys: + missing = expected_keys - actual_keys + extra = actual_keys - expected_keys + msg = [f"Invalid keys for '{prefix}':"] + if missing: + msg.append(f"missing {missing}") + if extra: + msg.append(f"extra {extra}") + raise ValueError(" ".join(msg)) + + +def load_full_schema() -> Dict[str, Any]: + """Load complete schema including TLS config. + + Returns + ------- + Dict[str, Any] + Complete schema with directory_schema and tls sections + + Raises + ------ + FileNotFoundError + If config_schema.json not found + ValueError + If schema is invalid + """ + import json + + schema_path = Path(__file__).parent.parent / "config_schema.json" + + if not schema_path.exists(): + raise FileNotFoundError( + f"Config schema not found: {schema_path}\n" + f"This file should exist at repository root." + ) + + try: + with open(schema_path) as f: + schema = json.load(f) + except (OSError, IOError) as e: + raise ValueError(f"Cannot read {schema_path}: {e}") + except json.JSONDecodeError as e: + raise ValueError(f"Invalid JSON in {schema_path}: {e}") + + if not isinstance(schema, dict): + raise ValueError(f"Schema should be a dict, got {type(schema)}") + + # Check schema version for compatibility + schema_version = schema.get("_schema_version") + if schema_version and schema_version != CURRENT_SCHEMA_VERSION: + print_warning( + f"Schema version mismatch: expected {CURRENT_SCHEMA_VERSION}, " + f"got {schema_version}. This may cause compatibility issues." + ) + + # Validate schema + validate_schema(schema) + + return schema + + +def load_directory_schema() -> Dict[str, Dict[str, str]]: + """Load directory schema from JSON file (single source of truth). + + Returns + ------- + Dict[str, Dict[str, str]] + Directory schema with prefixes (spyglass, kachery, dlc, moseq) + + Raises + ------ + FileNotFoundError + If config_schema.json not found at repository root + ValueError + If schema is invalid or missing required keys + """ + full_schema = load_full_schema() + return full_schema["directory_schema"] + + +def build_directory_structure( + base_dir: Path, + schema: Optional[Dict[str, Dict[str, str]]] = None, + create: bool = True, + verbose: bool = True, +) -> Dict[str, Path]: + """Build Spyglass directory structure from base directory. + + Parameters + ---------- + base_dir : Path + Base directory for Spyglass data + schema : Dict[str, Dict[str, str]], optional + Pre-loaded directory schema. If None, will load from file. + create : bool, optional + Whether to create directories if they don't exist, by default True + verbose : bool, optional + Whether to print progress feedback, by default True + + Returns + ------- + Dict[str, Path] + Mapping of directory names to full paths + """ + if schema is None: + schema = load_directory_schema() + + # Validate schema loaded successfully + if not schema: + raise ValueError( + "Directory schema could not be loaded. " + "Check config_schema.json exists at repository root." + ) + + directories = {} + + if verbose and create: + print(f"Creating Spyglass directory structure in {base_dir}") + print(" Creating:") + + for prefix, dir_map in schema.items(): + for key, rel_path in dir_map.items(): + full_path = base_dir / rel_path + directories[f"{prefix}_{key}"] = full_path + + if create: + full_path.mkdir(parents=True, exist_ok=True) + if verbose: + print(f" โ€ข {rel_path}") + + if verbose and create: + print(f" โœ“ Created {len(directories)} directories") + + return directories + + +def determine_tls(host: str, schema: Optional[Dict[str, Any]] = None) -> bool: + """Automatically determine if TLS should be used. + + Uses smart defaults - no user prompt needed. + + Parameters + ---------- + host : str + Database hostname + schema : Dict[str, Any], optional + Pre-loaded schema. If None, will load from file. + + Returns + ------- + bool + Whether to use TLS + """ + if schema is None: + schema = load_full_schema() + + tls_config = schema.get("tls", {}) + localhost_addresses = tls_config.get( + "localhost_addresses", ["localhost", "127.0.0.1", "::1"] + ) + + # Automatic decision: enable for remote, disable for local + is_local = host in localhost_addresses + use_tls = not is_local + + # User-friendly messaging (plain language instead of technical terms) + if is_local: + print( + f"{COLORS['blue']}โœ“ Connecting to local database at {host}{COLORS['reset']}" + ) + print(" Security: Using unencrypted connection (safe for localhost)") + else: + print( + f"{COLORS['blue']}โœ“ Connecting to remote database at {host}{COLORS['reset']}" + ) + print( + " Security: Using encrypted connection (TLS) to protect your data" + ) + print(" This is required when connecting over a network") + + return use_tls + + +def create_database_config( + host: str = "localhost", + port: int = 3306, + user: str = "root", + password: str = "tutorial", + use_tls: Optional[bool] = None, + base_dir: Optional[Path] = None, +) -> None: + """Create complete Spyglass configuration with database and directories. + + This creates a complete DataJoint + Spyglass configuration including: + - Database connection settings + - DataJoint external stores + - Spyglass directory structure (all 16 directories) + + Parameters + ---------- + host : str, optional + Database host (default: "localhost") + port : int, optional + Database port (default: 3306) + user : str, optional + Database user (default: "root") + password : str, optional + Database password (default: "tutorial") + use_tls : bool or None, optional + Whether to use TLS/SSL. If None, automatically determined based on host. + base_dir : Path or None, optional + Base directory for Spyglass data. If None, will prompt user. + + Notes + ----- + Uses JSON for safety (no code injection vulnerability). + Reads directory structure from config_schema.json (DRY principle). + """ + # Get base directory if not provided + if base_dir is None: + base_dir = get_base_directory() + + # Load schema once for efficiency (used by both TLS and directory creation) + full_schema = load_full_schema() + dir_schema = full_schema["directory_schema"] + + # Auto-determine TLS if not explicitly provided + if use_tls is None: + use_tls = determine_tls(host, schema=full_schema) + + # Build directory structure from JSON schema + print_step("Setting up Spyglass directories...") + dirs = build_directory_structure( + base_dir, schema=dir_schema, create=True, verbose=True + ) + + # Create complete configuration + config = { + # Database connection settings + "database.host": host, + "database.port": port, + "database.user": user, + "database.password": password, + "database.use_tls": use_tls, + # DataJoint performance settings + "filepath_checksum_size_limit": 1 * 1024**3, # 1 GB + "enable_python_native_blobs": True, + # DataJoint stores for external file storage + "stores": { + "raw": { + "protocol": "file", + "location": str(dirs["spyglass_raw"]), + "stage": str(dirs["spyglass_raw"]), + }, + "analysis": { + "protocol": "file", + "location": str(dirs["spyglass_analysis"]), + "stage": str(dirs["spyglass_analysis"]), + }, + }, + # Spyglass custom configuration + "custom": { + "debug_mode": False, + "test_mode": False, + "kachery_zone": "franklab.default", + "spyglass_dirs": { + "base": str(base_dir), + "raw": str(dirs["spyglass_raw"]), + "analysis": str(dirs["spyglass_analysis"]), + "recording": str(dirs["spyglass_recording"]), + "sorting": str(dirs["spyglass_sorting"]), + "waveforms": str(dirs["spyglass_waveforms"]), + "temp": str(dirs["spyglass_temp"]), + "video": str(dirs["spyglass_video"]), + "export": str(dirs["spyglass_export"]), + }, + "kachery_dirs": { + "cloud": str(dirs["kachery_cloud"]), + "storage": str(dirs["kachery_storage"]), + "temp": str(dirs["kachery_temp"]), + }, + "dlc_dirs": { + "base": str(base_dir / "deeplabcut"), + "project": str(dirs["dlc_project"]), + "video": str(dirs["dlc_video"]), + "output": str(dirs["dlc_output"]), + }, + "moseq_dirs": { + "base": str(base_dir / "moseq"), + "project": str(dirs["moseq_project"]), + "video": str(dirs["moseq_video"]), + }, + }, + } + + config_file = Path.home() / ".datajoint_config.json" + + # Handle existing config file with better UX + if config_file.exists(): + print_warning(f"Configuration file already exists: {config_file}") + print("\nExisting database settings:") + try: + with config_file.open() as f: + existing = json.load(f) + existing_host = existing.get("database.host", "unknown") + existing_port = existing.get("database.port", "unknown") + existing_user = existing.get("database.user", "unknown") + print(f" Database: {existing_host}:{existing_port}") + print(f" User: {existing_user}") + except (OSError, IOError, json.JSONDecodeError, KeyError) as e: + print(f" (Unable to read existing config: {e})") + + print("\nOptions:") + print( + " [b] Backup and create new (saves to .datajoint_config.json.backup)" + ) + print(" [o] Overwrite with new settings") + print(" [k] Keep existing (cancel installation)") + + choice = input("\nChoice [B/o/k]: ").strip().lower() or "b" + + if choice in ["k", "keep"]: + print_warning( + "Keeping existing configuration. Installation cancelled." + ) + print("\nTo install with different settings:") + print( + " 1. Backup your config: cp ~/.datajoint_config.json ~/.datajoint_config.json.backup" + ) + print(" 2. Run installer again") + return + elif choice in ["b", "backup"]: + backup_file = config_file.with_suffix(".json.backup") + shutil.copy2(config_file, backup_file) + print_success(f"Backed up existing config to {backup_file}") + elif choice not in ["o", "overwrite"]: + print_error("Invalid choice") + return + + # Save configuration with atomic write and secure permissions + import tempfile + + # Security warning before saving + print_warning( + "Database password will be stored in plain text in config file.\n" + " For production environments:\n" + " 1. Use environment variable SPYGLASS_DB_PASSWORD\n" + " 2. File permissions will be restricted automatically\n" + " 3. Consider database roles with limited privileges" + ) + + # Atomic write: write to temp file, then move + config_dir = config_file.parent + with tempfile.NamedTemporaryFile( + mode="w", + dir=config_dir, + delete=False, + prefix=".datajoint_config.tmp", + suffix=".json", + ) as tmp_file: + json.dump(config, tmp_file, indent=2) + tmp_path = Path(tmp_file.name) + + # Set restrictive permissions (Unix/Linux/macOS only) + try: + tmp_path.chmod(0o600) # Owner read/write only + except (AttributeError, OSError): + # Windows doesn't support chmod - permissions handled differently + pass + + # Atomic move (on same filesystem) + shutil.move(str(tmp_path), str(config_file)) + print_success(f"Configuration saved to: {config_file}") + print(f" Permissions: Owner read/write only (secure)") + + # Enhanced success message with next steps + print() + print_success("โœ“ Spyglass configuration complete!") + print() + print("Database connection:") + print(f" โ€ข Server: {host}:{port}") + print(f" โ€ข User: {user}") + tls_status = "Yes" if use_tls else "No (localhost)" + print(f" โ€ข Encrypted: {tls_status}") + print() + print("Data directories:") + print(f" โ€ข Base: {base_dir}") + print(f" โ€ข Raw data: {config['custom']['spyglass_dirs']['raw']}") + print(f" โ€ข Analysis: {config['custom']['spyglass_dirs']['analysis']}") + print(f" โ€ข ({len(dirs)} directories total)") + print() + print("Next steps:") + print(" 1. Activate environment: conda activate spyglass") + print(" 2. Test your installation: python scripts/validate.py") + print(" 3. Start using Spyglass: python -c 'import spyglass'") + print() + print("Need help? See: https://lorenfranklab.github.io/spyglass/") + + +def validate_hostname(hostname: str) -> bool: + """Validate hostname format to prevent common typos. + + Performs basic validation to catch obvious errors like spaces, + control characters, multiple consecutive dots, or invalid length. + + Parameters + ---------- + hostname : str + Hostname or IP address to validate + + Returns + ------- + bool + True if hostname appears valid, False otherwise + + Examples + -------- + >>> validate_hostname("localhost") + True + >>> validate_hostname("db.example.com") + True + >>> validate_hostname("host with spaces") + False + >>> validate_hostname("..invalid") + False + + Notes + ----- + This is intentionally permissive - only catches obvious typos. + DNS resolution will be the final validation. + """ + if not hostname: + return False + + # Reject hostnames with whitespace or control characters + if any(c.isspace() or ord(c) < 32 for c in hostname): + return False + + # Reject obvious typos (multiple dots, leading/trailing dots) + if hostname.startswith(".") or hostname.endswith(".") or ".." in hostname: + return False + + # Check length (DNS hostname max is 253 characters) + if len(hostname) > 253: + return False + + return True + + +def is_port_available(host: str, port: int) -> Tuple[bool, str]: + """Check if port is available or reachable. + + For localhost: Checks if port is free (available for binding) + For remote hosts: Checks if port is reachable (something listening) + + Parameters + ---------- + host : str + Hostname or IP address to check + port : int + Port number to check + + Returns + ------- + available : bool + True if port is available/reachable, False if blocked/in-use + message : str + Description of port status + + Examples + -------- + >>> available, msg = is_port_available("localhost", 3306) + >>> if not available: + ... print(f"Port issue: {msg}") + + Notes + ----- + The interpretation differs for localhost vs remote: + - localhost: False = port in use (good for remote, bad for Docker) + - remote: False = port unreachable (bad - firewall/wrong port) + """ + import socket + + try: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: + sock.settimeout(1) # 1 second timeout + result = sock.connect_ex((host, port)) + + # For localhost, we want the port to be FREE (not in use) + # For remote, we want the port to be IN USE (something listening) + + if host in LOCALHOST_ADDRESSES: + # Checking if local port is free for Docker/services + if result == 0: + # Port is in use + return False, f"Port {port} is already in use on {host}" + else: + # Port is free + return True, f"Port {port} is available on {host}" + else: + # Checking if remote port is reachable + if result == 0: + # Port is reachable (good!) + return True, f"Port {port} is reachable on {host}" + else: + # Port is not reachable + return ( + False, + f"Cannot reach {host}:{port} (firewall/wrong port?)", + ) + + except socket.gaierror: + # DNS resolution failed + return False, f"Cannot resolve hostname: {host}" + except socket.error as e: + # Other socket errors + return False, f"Socket error: {e}" + + +def prompt_remote_database_config() -> Optional[Dict[str, Any]]: + """Prompt user for remote database connection details. + + Interactively asks for host, port, user, and password. Uses getpass + for secure password input. Automatically enables TLS for remote hosts. + Validates hostname format to prevent typos. + + Parameters + ---------- + None + + Returns + ------- + dict or None + Dictionary with keys: 'host', 'port', 'user', 'password', 'use_tls' + Returns None if user cancels (Ctrl+C) + + Examples + -------- + >>> config = prompt_remote_database_config() + >>> if config: + ... print(f"Connecting to {config['host']}:{config['port']}") + """ + print("\nRemote database configuration:") + print(" Your lab admin should have provided these credentials.") + print(" Check your welcome email or contact your admin if unsure.") + print(" (Press Ctrl+C to cancel)") + + try: + host = input(" Host [localhost]: ").strip() or "localhost" + + # Validate hostname format + if not validate_hostname(host): + print_error(f"Invalid hostname: {host}") + print(" Hostname cannot contain spaces or invalid characters") + return None + port_str = input(" Port [3306]: ").strip() or "3306" + user = input(" User [root]: ").strip() or "root" + + # Use getpass for password to hide input + import getpass + + password = getpass.getpass(" Password: ") + + # Parse port + try: + port = int(port_str) + if not (1 <= port <= 65535): + raise ValueError("Port must be between 1 and 65535") + except ValueError as e: + print_error(f"Invalid port: {e}") + return None + + # Check if port is reachable + print(f" Testing connection to {host}:{port}...") + port_reachable, port_msg = is_port_available(host, port) + + if host not in LOCALHOST_ADDRESSES and not port_reachable: + # Remote host, port not reachable + print_warning(port_msg) + print("\n Possible causes:") + print(" โ€ข Wrong port number (MySQL usually uses 3306)") + print(" โ€ข Firewall blocking connections") + print(" โ€ข Database server not running") + print(" โ€ข Wrong hostname") + print("\n Common MySQL ports:") + print(" โ€ข Standard MySQL: 3306") + print(" โ€ข SSH tunnel: Check your tunnel configuration") + + retry = input("\n Continue anyway? [y/N]: ").strip().lower() + if retry not in ["y", "yes"]: + return None + elif port_reachable: + print(" โœ“ Port is reachable") + + # Determine TLS based on host (use TLS for non-localhost) + use_tls = host not in LOCALHOST_ADDRESSES + + if use_tls: + print_warning(f"TLS will be enabled for remote host '{host}'") + tls_response = input(" Disable TLS? [y/N]: ").strip().lower() + if tls_response in ["y", "yes"]: + use_tls = False + print_warning( + "TLS disabled (not recommended for remote connections)" + ) + + return { + "host": host, + "port": port, + "user": user, + "password": password, + "use_tls": use_tls, + } + + except KeyboardInterrupt: + print("\n") + print_warning("Database configuration cancelled") + return None + + +def get_database_options() -> Tuple[list[DatabaseOption], bool]: + """Get available database options based on system capabilities. + + Checks Docker Compose availability and returns menu options. + + Parameters + ---------- + None + + Returns + ------- + options : list of DatabaseOption + List of database option objects for menu display + compose_available : bool + True if Docker Compose is available + + Examples + -------- + >>> options, compose_avail = get_database_options() + >>> for opt in options: + ... print(f"{opt.number}. {opt.name} - {opt.status}") + """ + options = [] + + # Check Docker Compose availability + compose_available = is_docker_compose_available_inline() + + # Option 1: Remote database (primary use case - joining existing lab) + options.append( + DatabaseOption( + number="1", + name="Remote", + status="โœ“ Available (Recommended for lab members)", + description="Connect to lab's existing database", + ) + ) + + # Option 2: Docker (trial/development use case) + if compose_available: + options.append( + DatabaseOption( + number="2", + name="Docker", + status="โœ“ Available", + description="Local trial database (for testing)", + ) + ) + else: + options.append( + DatabaseOption( + number="2", + name="Docker", + status="โœ— Not available", + description="Requires Docker Desktop", + ) + ) + + # Option 3: Skip setup + options.append( + DatabaseOption( + number="3", + name="Skip", + status="โœ“ Available", + description="Configure manually later", + ) + ) + + return options, compose_available + + +def prompt_database_setup() -> str: + """Ask user about database setup preference. + + Displays menu of database setup options with availability status + and prompts user to choose. + + Parameters + ---------- + None + + Returns + ------- + str + One of: 'compose' (Docker Compose), 'remote' (existing database), + or 'skip' (configure later) + + Examples + -------- + >>> choice = prompt_database_setup() + >>> if choice == "compose": + ... setup_database_compose() + """ + print("\n" + "=" * 60) + print("Database Setup") + print("=" * 60) + + options, compose_available = get_database_options() + + print("\nOptions:") + for opt in options: + # Color status based on availability + status_color = COLORS["green"] if "โœ“" in opt.status else COLORS["red"] + print( + f" {opt.number}. {opt.name:20} {status_color}{opt.status}{COLORS['reset']}" + ) + print(f" {opt.description}") + + # If Docker not available, guide user + if not compose_available: + print(f"\n{COLORS['yellow']}โš {COLORS['reset']} Docker is not available") + print(" To enable Docker setup:") + print( + " 1. Install Docker Desktop: https://docs.docker.com/get-docker/" + ) + print(" 2. Start Docker Desktop") + print(" 3. Verify: docker compose version") + print(" 4. Re-run installer") + + # Map choices to actions (updated order: Remote first, then Docker) + choice_map = { + "1": "remote", + "2": "compose", + "3": "skip", + } + + # Get valid choices + valid_choices = ["1", "3"] # Remote and Skip always available + if compose_available: + valid_choices.insert(1, "2") # Insert Docker as option 2 if available + + while True: + choice = input(f"\nChoice [{'/'.join(valid_choices)}]: ").strip() + + if choice not in choice_map: + print_error(f"Please enter {' or '.join(valid_choices)}") + continue + + # Handle Docker unavailability + if choice == "2" and not compose_available: + print_error("Docker is not available") + continue + + return choice_map[choice] + + +def cleanup_failed_compose_setup_inline() -> None: + """Clean up after failed Docker Compose setup (inline, no imports). + + Stops and removes containers created by Docker Compose if setup fails. + This ensures a clean state for retry attempts. + + Returns + ------- + None + + Notes + ----- + This is self-contained because spyglass isn't installed yet. + Silently handles errors - cleanup is best-effort. + """ + try: + compose_cmd = get_compose_command_inline() + subprocess.run( + compose_cmd + ["down", "-v"], + capture_output=True, + timeout=30, + ) + except (subprocess.TimeoutExpired, FileNotFoundError): + pass # Best-effort cleanup + + +def setup_database_compose() -> Tuple[bool, str]: + """Set up database using Docker Compose. + + Checks Docker Compose availability, generates .env if needed, + starts services, waits for readiness, and creates configuration file. + + Returns + ------- + success : bool + True if database setup succeeded, False otherwise + reason : str + Reason for failure or "success" + + Notes + ----- + This function cannot import from spyglass because spyglass hasn't been + installed yet. All operations must be inline. + + Uses docker-compose.yml in repository root for configuration. + Creates .env file only if non-default values are needed. + + Examples + -------- + >>> success, reason = setup_database_compose() + >>> if success: + ... print("Database ready") + """ + import time + + print_step("Setting up database with Docker Compose...") + + # Check Docker Compose availability + if not is_docker_compose_available_inline(): + return False, "compose_unavailable" + + # Check if port 3306 is available + port = 3306 # Default port (could be customized via .env) + port_available, port_msg = is_port_available("localhost", port) + if not port_available: + print_error(port_msg) + print("\n Port 3306 is already in use. Solutions:") + + # Platform-specific guidance + if sys.platform == "darwin": # macOS + print(" 1. Stop existing MySQL (if installed):") + print(" brew services stop mysql") + print( + " # or: sudo launchctl unload -w /Library/LaunchDaemons/com.mysql.mysql.plist" + ) + print(" 2. Find what's using the port:") + print(" lsof -i :3306") + elif sys.platform.startswith("linux"): # Linux + print(" 1. Stop existing MySQL service:") + print(" sudo systemctl stop mysql") + print(" # or: sudo service mysql stop") + print(" 2. Find what's using the port:") + print(" sudo lsof -i :3306") + print(" # or: sudo netstat -tulpn | grep 3306") + elif sys.platform == "win32": # Windows + print(" 1. Stop existing MySQL service:") + print(" net stop MySQL") + print(" # or use Services app (services.msc)") + print(" 2. Find what's using the port:") + print(" netstat -ano | findstr :3306") + + print(" Alternative: Use a different port:") + print(" Create .env file with: MYSQL_PORT=3307") + print(" (and update DataJoint config to match)") + return False, "port_in_use" + + # Show what will happen + print("\n" + "=" * 60) + print("Docker Database Setup") + print("=" * 60) + print("\nThis will:") + print(" โ€ข Download MySQL 8.0 Docker image (~200 MB)") + print(" โ€ข Create a container named 'spyglass-db'") + print(" โ€ข Start MySQL on localhost:3306") + print(" โ€ข Save credentials to ~/.datajoint_config.json") + print("\nEstimated time: 2-3 minutes") + print("=" * 60) + + try: + # Generate .env file (only if customizations needed) + # For now, use all defaults - no .env file needed + # Future: could prompt for port/password customization + generate_env_file_inline() + + # Validate .env if it exists + if not validate_env_file_inline(): + return False, "env_file_invalid" + + # Get compose command + compose_cmd = get_compose_command_inline() + + # Pull images first (better UX - shows progress) + show_progress_message("Pulling Docker images", 2) + result = subprocess.run( + compose_cmd + ["pull"], + capture_output=True, + timeout=300, # 5 minutes for image pull + ) + if result.returncode != 0: + print_error(f"Failed to pull images: {result.stderr.decode()}") + return False, "pull_failed" + + # Start services + print_step("Starting services...") + result = subprocess.run( + compose_cmd + ["up", "-d"], + capture_output=True, + timeout=60, + ) + if result.returncode != 0: + error_msg = result.stderr.decode() + print_error(f"Failed to start services: {error_msg}") + cleanup_failed_compose_setup_inline() + return False, "start_failed" + + print_success("Services started") + + # Wait for MySQL readiness using health check + print_step("Waiting for MySQL to be ready...") + print(" Checking connection", end="", flush=True) + + for attempt in range(30): # 60 seconds max + try: + # Check if service is healthy + result = subprocess.run( + compose_cmd + ["ps", "--format", "json"], + capture_output=True, + timeout=5, + ) + + if result.returncode == 0: + # Parse JSON output to check health + import json + + try: + services = json.loads(result.stdout.decode()) + # Handle both single dict and list of dicts + if isinstance(services, dict): + services = [services] + + mysql_service = next( + ( + s + for s in services + if "mysql" in s.get("Service", "") + ), + None, + ) + + if mysql_service and "healthy" in mysql_service.get( + "Health", "" + ): + print() # New line after dots + print_success("MySQL is ready") + break + except json.JSONDecodeError: + pass + + except subprocess.TimeoutExpired: + pass + + if attempt < 29: + print(".", end="", flush=True) + time.sleep(2) + else: + # Timeout - provide debug info + print() + print_error("MySQL did not become ready within 60 seconds") + print("\n Check logs:") + print(" docker compose logs mysql") + cleanup_failed_compose_setup_inline() + return False, "timeout" + + # Read actual port/password from .env if it exists + import os + + actual_port = port + actual_password = "tutorial" + + env_path = Path(".env") + if env_path.exists(): + # Parse .env file to check for custom values + try: + with env_path.open("r") as f: + for line in f: + line = line.strip() + if line.startswith("MYSQL_PORT="): + actual_port = int(line.split("=", 1)[1]) + elif line.startswith("MYSQL_ROOT_PASSWORD="): + actual_password = line.split("=", 1)[1] + except (OSError, ValueError): + pass # Use defaults if .env parsing fails + + # Create configuration file matching .env values + create_database_config( + host="localhost", + port=actual_port, + user="root", + password=actual_password, + use_tls=False, + ) + + # Warn if .env exists with custom values + if os.path.exists(".env"): + print_warning( + "Using custom settings from .env file. " + "DataJoint config updated to match." + ) + + return True, "success" + + except subprocess.CalledProcessError as e: + print_error(f"Docker Compose command failed: {e}") + cleanup_failed_compose_setup_inline() + return False, str(e) + except subprocess.TimeoutExpired: + print_error("Docker Compose command timed out") + cleanup_failed_compose_setup_inline() + return False, "timeout" + except Exception as e: + print_error(f"Unexpected error: {e}") + cleanup_failed_compose_setup_inline() + return False, str(e) + + +def test_database_connection( + host: str, + port: int, + user: str, + password: str, + use_tls: bool, + timeout: int = 10, +) -> Tuple[bool, Optional[str]]: + """Test database connection before saving configuration. + + Attempts to connect to MySQL database and execute a simple query + to verify connectivity. Handles graceful fallback if pymysql not + yet installed. + + Parameters + ---------- + host : str + Database hostname or IP address + port : int + Database port number (typically 3306) + user : str + Database username for authentication + password : str + Database password for authentication + use_tls : bool + Whether to enable TLS/SSL encryption + timeout : int, optional + Connection timeout in seconds (default: 10) + + Returns + ------- + success : bool + True if connection succeeded, False otherwise + error_message : str or None + Error message if connection failed, None if successful + """ + try: + import pymysql + + print_step("Testing database connection...") + + connection = pymysql.connect( + host=host, + port=port, + user=user, + password=password, + connect_timeout=timeout, + ssl={"ssl": True} if use_tls else None, + ) + + # Test basic operation + with connection.cursor() as cursor: + cursor.execute("SELECT VERSION()") + version = cursor.fetchone() + print(f" MySQL version: {version[0]}") + + connection.close() + print_success("Database connection successful!") + return True, None + + except ImportError: + # pymysql not available yet (before pip install) + print_warning("Cannot test connection (pymysql not available)") + print(" Connection will be tested during validation") + return True, None # Allow to proceed + + except Exception as e: + error_msg = str(e) + print_error(f"Database connection failed: {error_msg}") + return False, error_msg + + +def handle_database_setup_interactive(env_name: str) -> None: + """Interactive database setup with retry logic. + + Allows user to try different database options if one fails, + without restarting the entire installation. + + Parameters + ---------- + env_name : str + Name of conda environment where DataJoint is installed + + Returns + ------- + None + """ + while True: + db_choice = prompt_database_setup() + + if db_choice == "compose": + success, reason = setup_database_compose() + if success: + break + else: + print_error("Docker setup failed") + if reason == "compose_unavailable": + print("\nDocker is not available.") + print(" Option 1: Install Docker Desktop and restart") + print(" Option 2: Choose remote database") + print(" Option 3: Skip for now") + else: + print(f" Error: {reason}") + + retry = input("\nTry different option? [Y/n]: ").strip().lower() + if retry in ["n", "no"]: + print_warning("Skipping database setup") + print(" Configure later: docker compose up -d") + print(" Or manually: see docs/DATABASE.md") + break + # Loop continues to show menu again + + elif db_choice == "remote": + success = setup_database_remote(env_name) + if success: + break + # If remote setup returns False (cancelled), loop to menu + + else: # skip + print_warning("Skipping database setup") + print(" Configure later: docker compose up -d") + print(" Or manually: see docs/DATABASE.md") + break + + +def handle_database_setup_cli( + env_name: str, + db_type: str, + db_host: Optional[str] = None, + db_port: Optional[int] = None, + db_user: Optional[str] = None, + db_password: Optional[str] = None, +) -> None: + """Handle database setup from CLI arguments. + + Parameters + ---------- + env_name : str + Name of conda environment where DataJoint is installed + db_type : str + One of: "compose", "docker" (alias for compose), or "remote" + db_host : str, optional + Database host for remote connection + db_port : int, optional + Database port for remote connection + db_user : str, optional + Database user for remote connection + db_password : str, optional + Database password for remote connection + + Returns + ------- + None + """ + # Treat 'docker' as alias for 'compose' for backward compatibility + if db_type == "docker": + db_type = "compose" + + if db_type == "compose": + success, reason = setup_database_compose() + if not success: + print_error("Docker setup failed") + if reason == "compose_unavailable": + print_warning("Docker not available") + print(" Install from: https://docs.docker.com/get-docker/") + else: + print_error(f"Error: {reason}") + print(" You can configure manually later") + elif db_type == "remote": + success = setup_database_remote( + env_name=env_name, + host=db_host, + port=db_port, + user=db_user, + password=db_password, + ) + if not success: + print_warning("Remote database setup cancelled") + print(" You can configure manually later") + + +def change_database_password( + host: str, + port: int, + user: str, + old_password: str, + use_tls: bool, + env_name: str, +) -> Optional[str]: + """Prompt user to change their database password using DataJoint. + + Interactive password change flow for new lab members who received + temporary credentials from their admin. Runs inside the conda environment + where DataJoint is installed. + + Parameters + ---------- + host : str + Database hostname + port : int + Database port + user : str + Database username + old_password : str + Current password (temporary from admin) + use_tls : bool + Whether TLS is enabled + env_name : str + Name of conda environment where DataJoint is installed + + Returns + ------- + str or None + New password if changed, None if user skipped or error occurred + + Notes + ----- + Prompts user for new password, then uses DataJoint (running in conda env) + to change password on MySQL server. + """ + import getpass + + print("\n" + "=" * 60) + print("Password Change (Recommended for lab members)") + print("=" * 60) + print("\nIf you received temporary credentials from your lab admin,") + print("you should change your password now for security.") + print() + + change = input("Change password? [Y/n]: ").strip().lower() + if change in ["n", "no"]: + print_warning("Keeping current password") + return None + + # Prompt for new password with confirmation + while True: + print() + new_password = getpass.getpass(" New password: ") + if not new_password: + print_error("Password cannot be empty") + continue + + confirm_password = getpass.getpass(" Confirm password: ") + if new_password != confirm_password: + print_error("Passwords do not match") + retry = input(" Try again? [Y/n]: ").strip().lower() + if retry in ["n", "no"]: + return None + continue + + break + + # Change password using DataJoint in conda environment + print_step("Changing password on database server...") + + # Build Python code to run inside conda environment + # New password is passed via environment variable for security + python_code = f""" +import sys +import os +import datajoint as dj + +# Configure connection +dj.config['database.host'] = {repr(host)} +dj.config['database.port'] = {port} +dj.config['database.user'] = {repr(user)} +dj.config['database.password'] = {repr(old_password)} +{'dj.config["database.use_tls"] = True' if use_tls else ''} + +# Get new password from environment variable (passed securely) +new_password = os.environ.get('SPYGLASS_NEW_PASSWORD') +if not new_password: + print("ERROR: SPYGLASS_NEW_PASSWORD not provided", file=sys.stderr) + sys.exit(1) + +try: + # Connect to database + dj.conn() + + # Change password using ALTER USER with proper parameterization + conn = dj.conn() + with conn.cursor() as cursor: + cursor.execute("ALTER USER %s@'%%' IDENTIFIED BY %s", + (dj.config['database.user'], new_password)) + conn.commit() + + print("SUCCESS") +except Exception as e: + print(f"ERROR: {{e}}", file=sys.stderr) + sys.exit(1) +""" + + try: + # Pass new password via environment variable for security + import os + env = os.environ.copy() + env['SPYGLASS_NEW_PASSWORD'] = new_password + + result = subprocess.run( + ["conda", "run", "-n", env_name, "python", "-c", python_code], + env=env, + capture_output=True, + text=True, + timeout=30, + ) + + if result.returncode == 0 and "SUCCESS" in result.stdout: + print_success("Password changed successfully!") + return new_password + else: + print_error(f"Failed to change password: {result.stderr}") + print("\nYou can change it manually later:") + print(f" conda activate {env_name}") + print(" python -c 'import datajoint as dj; dj.set_password()'") + return None + + except subprocess.TimeoutExpired: + print_error("Password change timed out") + print("\nYou can change it manually later:") + print(f" conda activate {env_name}") + print(" python -c 'import datajoint as dj; dj.set_password()'") + return None + + except Exception as e: + print_error(f"Failed to change password: {e}") + print("\nYou can change it manually later:") + print(f" conda activate {env_name}") + print(" python -c 'import datajoint as dj; dj.set_password()'") + return None + + +def setup_database_remote( + env_name: str, + host: Optional[str] = None, + port: Optional[int] = None, + user: Optional[str] = None, + password: Optional[str] = None, +) -> bool: + """Set up remote database connection. + + Prompts for connection details (if not provided), tests the connection, + optionally changes password for new lab members, and creates configuration + file if connection succeeds. + + Parameters + ---------- + env_name : str + Name of conda environment where DataJoint is installed + host : str, optional + Database host (prompts if not provided) + port : int, optional + Database port (prompts if not provided) + user : str, optional + Database user (prompts if not provided) + password : str, optional + Database password (prompts if not provided, checks env var) + + Returns + ------- + bool + True if configuration was created, False if cancelled + + Examples + -------- + >>> if setup_database_remote(): + ... print("Remote database configured") + >>> if setup_database_remote(host="db.example.com", user="myuser"): + ... print("Non-interactive setup succeeded") + """ + print_step("Setting up remote database connection...") + + # If any parameters are missing, prompt interactively + if host is None or user is None or password is None: + config = prompt_remote_database_config() + if config is None: + return False + else: + # Non-interactive mode - use provided parameters + import os + + # Validate hostname format + if not validate_hostname(host): + print_error(f"Invalid hostname: {host}") + print(" Hostname cannot contain spaces or invalid characters") + return False + + # Check environment variable for password if not provided + if password is None: + password = os.environ.get("SPYGLASS_DB_PASSWORD") + if password is None: + print_error( + "Password required: use --db-password or SPYGLASS_DB_PASSWORD env var" + ) + return False + + # Use defaults for optional parameters + if port is None: + port = 3306 + + # Check if port is reachable (for remote hosts only) + if host not in LOCALHOST_ADDRESSES: + print(f" Testing connection to {host}:{port}...") + port_reachable, port_msg = is_port_available(host, port) + if not port_reachable: + print_warning(port_msg) + print(" Port may be blocked by firewall or wrong port number") + print(" Continuing anyway (connection test will verify)...") + else: + print(" โœ“ Port is reachable") + + # Determine TLS based on host + use_tls = host not in LOCALHOST_ADDRESSES + + config = { + "host": host, + "port": port, + "user": user, + "password": password, + "use_tls": use_tls, + } + + print(f" Connecting to {host}:{port} as {user}") + if use_tls: + print(" TLS: enabled") + + # Test connection before saving + success, _error = test_database_connection(**config) + + if not success: + print_error(f"Cannot connect to database: {_error}") + print() + print("Most common causes (in order):") + print(" 1. Wrong password - Double check credentials") + print(" 2. Firewall blocking connection") + print(" 3. Database not running") + print(" 4. TLS mismatch") + print() + print("Diagnostic steps:") + print(f" Test port: nc -zv {host} {port}") + print(f" Test MySQL: mysql -h {host} -P {port} -u {user} -p") + print() + print( + "Need help? See: docs/TROUBLESHOOTING.md#database-connection-fails" + ) + print() + + retry = input("Retry with different settings? [y/N]: ").strip().lower() + if retry in ["y", "yes"]: + return setup_database_remote(env_name) # Recursive retry + else: + print_warning("Database setup cancelled") + return False + + # Offer password change for new lab members (only for non-localhost) + if config["host"] not in LOCALHOST_ADDRESSES: + new_password = change_database_password( + host=config["host"], + port=config["port"], + user=config["user"], + old_password=config["password"], + use_tls=config["use_tls"], + env_name=env_name, + ) + # Update config with new password if changed + if new_password is not None: + config["password"] = new_password + + # Save configuration + create_database_config(**config) + return True + + +def validate_installation(env_name: str) -> bool: + """Run validation checks. + + Executes validate.py script in the specified conda environment to + verify installation success. + + Parameters + ---------- + env_name : str + Name of the conda environment to validate + + Returns + ------- + bool + True if all critical checks passed, False if any failed + + Notes + ----- + Prints warnings if validation fails but does not raise exceptions. + """ + print_step("Validating installation...") + + validate_script = Path(__file__).parent / "validate.py" + + try: + subprocess.run( + ["conda", "run", "-n", env_name, "python", str(validate_script)], + check=True, + ) + print_success("Validation passed") + return True + except subprocess.CalledProcessError: + print_warning("Some validation checks failed") + print(" Review errors above and see docs/TROUBLESHOOTING.md") + return False + + +def run_installation(args) -> None: + """Main installation flow. + + Orchestrates the complete installation process in a specific order + to avoid import issues and ensure proper setup. + + Parameters + ---------- + args : argparse.Namespace + Parsed command-line arguments containing installation options + + Returns + ------- + None + + Notes + ----- + CRITICAL ORDER: + 1. Get base directory (for disk space check) + 2. Check prerequisites including disk space (no spyglass imports) + 3. Create conda environment (no spyglass imports) + 4. Install spyglass package (pip install -e .) + 5. Setup database (inline code, NO spyglass imports) + 6. Validate (runs IN the new environment, CAN import spyglass) + """ + print(f"\n{COLORS['blue']}{'='*60}{COLORS['reset']}") + print(f"{COLORS['blue']} Spyglass Installation{COLORS['reset']}") + print(f"{COLORS['blue']}{'='*60}{COLORS['reset']}\n") + + # Determine installation type + if args.minimal: + env_file = "environment-min.yml" + install_type = "minimal" + elif args.full: + env_file = "environment.yml" + install_type = "full" + else: + env_file, install_type = prompt_install_type() + + # 1. Get base directory first (CLI arg > env var > prompt) + base_dir = get_base_directory(args.base_dir) + + # 2. Check prerequisites with disk space validation (no spyglass imports) + check_prerequisites(install_type, base_dir) + + # 3. Create environment (no spyglass imports) + create_conda_environment(env_file, args.env_name, force=args.force) + + # 3. Install package (pip install makes spyglass available) + install_spyglass_package(args.env_name) + + # 4. Database setup (INLINE CODE - no spyglass imports!) + # This happens AFTER spyglass is installed but doesn't use it + # because docker operations are self-contained + if args.docker: + # Docker explicitly requested via CLI + handle_database_setup_cli(args.env_name, "docker") + elif args.remote: + # Remote database explicitly requested via CLI + # Support non-interactive mode with CLI args or env vars + import os + + db_password = args.db_password or os.environ.get("SPYGLASS_DB_PASSWORD") + handle_database_setup_cli( + args.env_name, + "remote", + db_host=args.db_host, + db_port=args.db_port, + db_user=args.db_user, + db_password=db_password, + ) + else: + # Interactive prompt with retry logic + handle_database_setup_interactive(args.env_name) + + # 5. Validation (runs in new environment, CAN import spyglass) + validation_passed = True + if not args.skip_validation: + validation_passed = validate_installation(args.env_name) + + # Success message - conditional based on validation + print(f"\n{COLORS['green']}{'='*60}{COLORS['reset']}") + if validation_passed: + print(f"{COLORS['green']}โœ“ Installation complete!{COLORS['reset']}") + print(f"{COLORS['green']}{'='*60}{COLORS['reset']}\n") + else: + print( + f"{COLORS['yellow']}โš  Installation complete with warnings{COLORS['reset']}" + ) + print(f"{COLORS['yellow']}{'='*60}{COLORS['reset']}\n") + print("Core installation succeeded but some features may not work.") + print("Review warnings above and see: docs/TROUBLESHOOTING.md\n") + print("Next steps:") + print(f" 1. Activate environment: conda activate {args.env_name}") + print(" 2. Start tutorial: jupyter notebook notebooks/") + print( + " 3. View documentation: https://lorenfranklab.github.io/spyglass/" + ) + + +def main() -> None: + """Main entry point for Spyglass installer. + + Parses command-line arguments and runs the installation process. + """ + parser = argparse.ArgumentParser( + description="Install Spyglass in one command", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python scripts/install.py # Interactive + python scripts/install.py --minimal # Minimal install + python scripts/install.py --full --docker # Full with local database + python scripts/install.py --remote # Connect to remote database + +Environment Variables: + SPYGLASS_BASE_DIR - Set base directory (skips prompt) + """, + ) + parser.add_argument( + "--minimal", + action="store_true", + help="Install minimal dependencies only", + ) + parser.add_argument( + "--full", action="store_true", help="Install all dependencies" + ) + parser.add_argument( + "--docker", action="store_true", help="Set up local Docker database" + ) + parser.add_argument( + "--remote", + action="store_true", + help="Connect to remote database (interactive)", + ) + parser.add_argument("--db-host", help="Database host (for --remote)") + parser.add_argument( + "--db-port", + type=int, + default=3306, + help="Database port (default: 3306)", + ) + parser.add_argument( + "--db-user", default="root", help="Database user (default: root)" + ) + parser.add_argument( + "--db-password", + help="Database password (or use SPYGLASS_DB_PASSWORD env var)", + ) + parser.add_argument( + "--skip-validation", action="store_true", help="Skip validation checks" + ) + parser.add_argument( + "--env-name", + default="spyglass", + help="Conda environment name (default: spyglass)", + ) + parser.add_argument( + "--base-dir", + help="Base directory for data (overrides SPYGLASS_BASE_DIR)", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Show what would be done without making changes", + ) + parser.add_argument( + "--force", + action="store_true", + help="Overwrite existing environment without prompting", + ) + + args = parser.parse_args() + + try: + run_installation(args) + except KeyboardInterrupt: + print("\n\nInstallation cancelled by user.") + sys.exit(1) + except Exception as e: + print_error(f"Installation failed: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/scripts/validate.py b/scripts/validate.py new file mode 100755 index 000000000..48efc5473 --- /dev/null +++ b/scripts/validate.py @@ -0,0 +1,326 @@ +#!/usr/bin/env python3 +"""Validate Spyglass installation. + +This script checks that Spyglass is properly installed and configured. +It can be run standalone or called by the installer. + +Usage: + python scripts/validate.py + +Exit codes: + 0 - All checks passed + 1 - One or more checks failed +""" + +import re +import sys +from pathlib import Path +from typing import Callable, NamedTuple + +# Exit codes +EXIT_SUCCESS = 0 +EXIT_FAILURE = 1 + + +class Check(NamedTuple): + """Represents a validation check to run. + + Attributes + ---------- + name : str + Human-readable name of the check + func : Callable[[], None] + Function to execute for this check + critical : bool + If True, failure causes validation to fail (default: True) + If False, failure only produces warning + """ + + name: str + func: Callable[[], None] + critical: bool = True + + +def check_python_version() -> None: + """Check Python version meets minimum requirement. + + Reads requirement from pyproject.toml to maintain single source of truth. + Falls back to hardcoded (3, 9) if parsing fails. + + Parameters + ---------- + None + + Returns + ------- + None + + Raises + ------ + RuntimeError + If Python version is below minimum requirement + """ + min_version = get_required_python_version() + + if sys.version_info < min_version: + raise RuntimeError( + f"Python {min_version[0]}.{min_version[1]}+ required, " + f"found {sys.version_info.major}.{sys.version_info.minor}" + ) + + print( + f"โœ“ Python {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}" + ) + + +def get_required_python_version() -> tuple[int, int]: + """Get required Python version from pyproject.toml. + + This ensures single source of truth for version requirements. + Falls back to (3, 9) if parsing fails. + + Parameters + ---------- + None + + Returns + ------- + tuple + Tuple of (major, minor) version numbers as integers + + Examples + -------- + >>> major, minor = get_required_python_version() + >>> print(f"Requires Python {major}.{minor}+") + + Notes + ----- + INTENTIONAL DUPLICATION: This function is duplicated in both install.py + and validate.py because validate.py must work standalone before Spyglass + is installed. Both scripts are designed to run independently without + importing from each other to avoid path/module complexity. + + If you modify this function, you MUST update it in both files: + - scripts/install.py + - scripts/validate.py (this file) + + Future: Consider extracting to scripts/_shared.py if the installer + becomes a package, but for now standalone scripts are simpler. + """ + try: + import tomllib # Python 3.11+ + except ImportError: + try: + import tomli as tomllib # Python 3.9-3.10 + except ImportError: + return (3, 9) # Safe fallback + + try: + pyproject_path = Path(__file__).parent.parent / "pyproject.toml" + with pyproject_path.open("rb") as f: + data = tomllib.load(f) + + # Parse ">=3.9,<3.13" format + requires_python = data["project"]["requires-python"] + match = re.search(r">=(\d+)\.(\d+)", requires_python) + if match: + return (int(match.group(1)), int(match.group(2))) + except Exception: + pass + + return (3, 9) # Safe fallback + + +def check_conda() -> None: + """Check conda/mamba is available. + + Parameters + ---------- + None + + Returns + ------- + None + + Raises + ------ + RuntimeError + If neither conda nor mamba is found in PATH + """ + import shutil + + conda_cmd = None + if shutil.which("mamba"): + conda_cmd = "mamba" + elif shutil.which("conda"): + conda_cmd = "conda" + else: + raise RuntimeError( + "conda or mamba not found\n" + "Install from: https://github.com/conda-forge/miniforge" + ) + + print(f"โœ“ Package manager: {conda_cmd}") + + +def check_spyglass_import() -> None: + """Verify spyglass can be imported. + + Parameters + ---------- + None + + Returns + ------- + None + + Raises + ------ + RuntimeError + If spyglass package cannot be imported + """ + try: + import spyglass + + version = getattr(spyglass, "__version__", "unknown") + print(f"โœ“ Spyglass version: {version}") + except ImportError as e: + raise RuntimeError(f"Cannot import spyglass: {e}") + + +def check_spyglass_config() -> None: + """Verify SpyglassConfig integration works. + + This is a non-critical check - warns instead of failing. + + Parameters + ---------- + None + + Returns + ------- + None + + Notes + ----- + Prints warnings for configuration issues but does not raise exceptions. + """ + try: + from spyglass.settings import SpyglassConfig + + config = SpyglassConfig() + print("โœ“ SpyglassConfig loaded") + print(f" Base directory: {config.base_dir}") + + if not config.base_dir.exists(): + print(" Status: Will be created on first use") + else: + print(" Status: Ready") + except Exception as e: + print(f"โš  SpyglassConfig warning: {e}") + print(" This may not be a critical issue") + + +def check_database() -> None: + """Test database connection if configured. + + This is a non-critical check - warns instead of failing. + + Parameters + ---------- + None + + Returns + ------- + None + + Notes + ----- + Prints warnings for database issues but does not raise exceptions. + """ + try: + import datajoint as dj + + dj.conn().ping() + print("โœ“ Database connection successful") + except Exception as e: + print(f"โš  Database not configured: {e}") + print(" Configure manually or run: python scripts/install.py --docker") + + +def main() -> None: + """Run all validation checks. + + Executes suite of validation checks and reports results. Exits with + code 0 on success, 1 on failure. + + Parameters + ---------- + None + + Returns + ------- + None + """ + print("\n" + "=" * 60) + print(" Spyglass Installation Validation") + print("=" * 60 + "\n") + + # Define all validation checks + checks = [ + Check("Python version", check_python_version, critical=True), + Check("Conda/Mamba", check_conda, critical=True), + Check("Spyglass import", check_spyglass_import, critical=True), + Check("SpyglassConfig", check_spyglass_config, critical=False), + Check("Database connection", check_database, critical=False), + ] + + critical_failed = [] + warnings = [] + + # Run critical checks + print("Critical Checks:") + for check in checks: + if not check.critical: + continue + try: + check.func() + except Exception as e: + print(f"โœ— {check.name}: {e}") + critical_failed.append(check.name) + + # Run optional checks + print("\nOptional Checks:") + for check in checks: + if check.critical: + continue + try: + check.func() + except Exception as e: + print(f"โš  {check.name}: {e}") + warnings.append(check.name) + + # Summary + print("\n" + "=" * 60) + if critical_failed: + print("โœ— Validation failed - installation incomplete") + print("=" * 60 + "\n") + print("Failed checks:", ", ".join(critical_failed)) + print("\nThese issues must be fixed before using Spyglass.") + print("See docs/TROUBLESHOOTING.md for help") + sys.exit(EXIT_FAILURE) + elif warnings: + print("โš  Validation passed with warnings") + print("=" * 60 + "\n") + print("Warnings:", ", ".join(warnings)) + print("\nSpyglass is installed but optional features may not work.") + print("See docs/TROUBLESHOOTING.md for configuration help") + sys.exit(EXIT_SUCCESS) # Exit 0 since installation is functional + else: + print("โœ… All checks passed!") + print("=" * 60 + "\n") + sys.exit(EXIT_SUCCESS) + + +if __name__ == "__main__": + main() diff --git a/scripts/validate_spyglass.py b/scripts/validate_spyglass.py new file mode 100644 index 000000000..a5a6d55de --- /dev/null +++ b/scripts/validate_spyglass.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 +"""Validate that install.py and settings.py produce compatible configs. + +This script verifies that the configuration generated by install.py is compatible +with what settings.py expects. It identifies missing keys that might cause issues. +""" + +import json +import sys +from pathlib import Path +from tempfile import TemporaryDirectory + +# Add scripts dir to path to import install functions +scripts_dir = Path(__file__).parent +sys.path.insert(0, str(scripts_dir)) + +from install import build_directory_structure + +# Import from spyglass +sys.path.insert(0, str(scripts_dir.parent / "src")) +from spyglass.settings import SpyglassConfig + + +def get_installer_config_structure(base_dir: Path) -> dict: + """Get config structure that installer would create.""" + from install import load_directory_schema + + dir_schema = load_directory_schema() + dirs = build_directory_structure( + base_dir, schema=dir_schema, create=True, verbose=False + ) + + # Replicate exactly what create_database_config does + config = { + # Database connection settings + "database.host": "localhost", + "database.port": 3306, + "database.user": "testuser", + "database.password": "testpass", + "database.use_tls": False, + # DataJoint performance settings + "filepath_checksum_size_limit": 1 * 1024**3, # 1 GB + "enable_python_native_blobs": True, + # DataJoint stores for external file storage + "stores": { + "raw": { + "protocol": "file", + "location": str(dirs["spyglass_raw"]), + "stage": str(dirs["spyglass_raw"]), + }, + "analysis": { + "protocol": "file", + "location": str(dirs["spyglass_analysis"]), + "stage": str(dirs["spyglass_analysis"]), + }, + }, + # Spyglass custom configuration + "custom": { + "debug_mode": False, + "test_mode": False, + "kachery_zone": "franklab.default", + "spyglass_dirs": { + "base": str(base_dir), + "raw": str(dirs["spyglass_raw"]), + "analysis": str(dirs["spyglass_analysis"]), + "recording": str(dirs["spyglass_recording"]), + "sorting": str(dirs["spyglass_sorting"]), + "waveforms": str(dirs["spyglass_waveforms"]), + "temp": str(dirs["spyglass_temp"]), + "video": str(dirs["spyglass_video"]), + "export": str(dirs["spyglass_export"]), + }, + "kachery_dirs": { + "cloud": str(dirs["kachery_cloud"]), + "storage": str(dirs["kachery_storage"]), + "temp": str(dirs["kachery_temp"]), + }, + "dlc_dirs": { + "base": str(base_dir / "deeplabcut"), + "project": str(dirs["dlc_project"]), + "video": str(dirs["dlc_video"]), + "output": str(dirs["dlc_output"]), + }, + "moseq_dirs": { + "base": str(base_dir / "moseq"), + "project": str(dirs["moseq_project"]), + "video": str(dirs["moseq_video"]), + }, + }, + } + + return config + + +def get_settings_config_structure(base_dir: Path) -> dict: + """Get config structure that settings.py generates.""" + sg_config = SpyglassConfig() + + config = sg_config._generate_dj_config( + base_dir=str(base_dir), + database_user="testuser", + database_password="testpass", + database_host="localhost", + database_port=3306, + database_use_tls=False, + ) + + return config + + +def get_all_keys(d: dict, prefix: str = "") -> set: + """Recursively get all keys in nested dictionary.""" + keys = set() + for k, v in d.items(): + full_key = f"{prefix}.{k}" if prefix else k + keys.add(full_key) + if isinstance(v, dict): + keys.update(get_all_keys(v, full_key)) + return keys + + +def compare_configs(): + """Compare config structures and report differences.""" + print("=" * 80) + print("Config Structure Comparison: install.py vs settings.py") + print("=" * 80) + print() + + with TemporaryDirectory() as tmpdir: + base_dir = Path(tmpdir) / "spyglass_data" + + # Get both config structures + installer_config = get_installer_config_structure(base_dir) + settings_config = get_settings_config_structure(base_dir) + + # Get all keys from both + installer_keys = get_all_keys(installer_config) + settings_keys = get_all_keys(settings_config) + + # Find differences + missing_in_installer = settings_keys - installer_keys + extra_in_installer = installer_keys - settings_keys + + # Report results + print("MISSING KEYS IN INSTALLER CONFIG:") + print("-" * 80) + if missing_in_installer: + for key in sorted(missing_in_installer): + print(f" โŒ {key}") + # Show value from settings + parts = key.split(".") + value = settings_config + try: + for part in parts: + value = value[part] + print(f" Expected value: {value}") + except (KeyError, TypeError): + pass + else: + print(" โœ… None - all settings.py keys present in installer") + + print() + print("EXTRA KEYS IN INSTALLER CONFIG:") + print("-" * 80) + if extra_in_installer: + for key in sorted(extra_in_installer): + print(f" โš ๏ธ {key}") + else: + print(" โœ… None - installer has no extra keys") + + print() + print("=" * 80) + print("SUMMARY:") + print("=" * 80) + + if not missing_in_installer and not extra_in_installer: + print("โœ… Config structures are IDENTICAL") + return 0 + elif missing_in_installer: + print(f"โŒ Installer is MISSING {len(missing_in_installer)} keys") + print( + " These keys should be added to install.py::create_database_config()" + ) + return 1 + else: + print("โš ๏ธ Installer has extra keys (might be OK)") + return 0 + + +if __name__ == "__main__": + sys.exit(compare_configs()) diff --git a/src/spyglass/settings.py b/src/spyglass/settings.py index 1edf89d1e..c62cb2b78 100644 --- a/src/spyglass/settings.py +++ b/src/spyglass/settings.py @@ -21,6 +21,101 @@ class SpyglassConfig: facilitate testing. """ + @staticmethod + def _load_directory_schema(): + """Load directory schema from JSON file at repository root. + + Returns + ------- + dict + Directory schema with prefixes (spyglass, kachery, dlc, moseq) + + Notes + ----- + This method reads from config_schema.json at the repository root, + which is the single source of truth for Spyglass directory structure. + Falls back to hard-coded defaults if file is not found (for backwards + compatibility during development). + """ + # Define fallback once to avoid duplication + fallback_schema = { + "spyglass": { + "raw": "raw", + "analysis": "analysis", + "recording": "recording", + "sorting": "spikesorting", + "waveforms": "waveforms", + "temp": "tmp", + "video": "video", + "export": "export", + }, + "kachery": { + "cloud": ".kachery-cloud", + "storage": "kachery_storage", + "temp": "tmp", + }, + "dlc": { + "project": "projects", + "video": "video", + "output": "output", + }, + "moseq": { + "project": "projects", + "video": "video", + }, + } + + schema_path = Path(__file__).parent.parent.parent / "config_schema.json" + + if not schema_path.exists(): + logger.warning( + f"Config schema file not found at {schema_path}. " + "Using fallback default directory structure. " + "This is normal during development but should not happen " + "in production installations." + ) + return fallback_schema + + try: + with open(schema_path) as f: + schema = json.load(f) + + if not isinstance(schema, dict): + raise ValueError(f"Schema should be a dict, got {type(schema)}") + + if "directory_schema" not in schema: + raise ValueError("Schema missing 'directory_schema' key") + + # Check schema version for compatibility + schema_version = schema.get("_schema_version", "1.0.0") + expected_version = "1.0.0" + if schema_version != expected_version: + logger.warning( + f"Config schema version mismatch: expected {expected_version}, " + f"got {schema_version}. This may cause compatibility issues." + ) + + return schema["directory_schema"] + + except (OSError, IOError) as e: + logger.error( + f"Failed to read directory schema from {schema_path}: {e}. " + "Using fallback defaults." + ) + return fallback_schema + except json.JSONDecodeError as e: + logger.error( + f"Invalid JSON in directory schema {schema_path}: {e}. " + "Using fallback defaults." + ) + return fallback_schema + except ValueError as e: + logger.error( + f"Schema validation failed for {schema_path}: {e}. " + "Using fallback defaults." + ) + return fallback_schema + def __init__(self, base_dir: str = None, **kwargs) -> None: """ Initializes a new instance of the class. @@ -59,34 +154,10 @@ def __init__(self, base_dir: str = None, **kwargs) -> None: self._dlc_base = None self.load_failed = False - self.relative_dirs = { - # {PREFIX}_{KEY}_DIR, default dir relative to base_dir - # NOTE: Adding new dir requires edit to HHMI hub - "spyglass": { - "raw": "raw", - "analysis": "analysis", - "recording": "recording", - "sorting": "spikesorting", - "waveforms": "waveforms", - "temp": "tmp", - "video": "video", - "export": "export", - }, - "kachery": { - "cloud": ".kachery-cloud", - "storage": "kachery_storage", - "temp": "tmp", - }, - "dlc": { - "project": "projects", - "video": "video", - "output": "output", - }, - "moseq": { - "project": "projects", - "video": "video", - }, - } + # Load directory schema from JSON file (single source of truth) + # {PREFIX}_{KEY}_DIR, default dir relative to base_dir + # NOTE: Adding new dir requires edit to HHMI hub AND config_schema.json + self.relative_dirs = self._load_directory_schema() self.dj_defaults = { "database.host": kwargs.get("database_host", "lmf-db.cin.ucsf.edu"), "database.user": kwargs.get("database_user"), @@ -160,12 +231,23 @@ def load_config( or os.environ.get("SPYGLASS_BASE_DIR") ) - if resolved_base and not Path(resolved_base).exists(): - resolved_base = Path(resolved_base).expanduser() - if not resolved_base or not Path(resolved_base).exists(): + # Log when supplied base_dir causes environment variable overrides to be ignored + if self.supplied_base_dir: + logger.info( + "Using supplied base_dir - ignoring SPYGLASS_* environment variable overrides" + ) + + if resolved_base: + base_path = Path(resolved_base).expanduser().resolve() + if not self._debug_mode: + # Create base directory if it doesn't exist + base_path.mkdir(parents=True, exist_ok=True) + resolved_base = str(base_path) + + if not resolved_base: if not on_startup: # Only warn if not on startup logger.error( - f"Could not find SPYGLASS_BASE_DIR: {resolved_base}" + "Could not find SPYGLASS_BASE_DIR" + "\n\tCheck dj.config['custom']['spyglass_dirs']['base']" + "\n\tand os.environ['SPYGLASS_BASE_DIR']" ) @@ -178,14 +260,14 @@ def load_config( or os.environ.get("DLC_PROJECT_PATH", "").split("projects")[0] or str(Path(resolved_base) / "deeplabcut") ) - Path(self._dlc_base).mkdir(exist_ok=True) + Path(self._dlc_base).mkdir(parents=True, exist_ok=True) self._moseq_base = ( dj_moseq.get("base") or os.environ.get("MOSEQ_BASE_DIR") or str(Path(resolved_base) / "moseq") ) - Path(self._moseq_base).mkdir(exist_ok=True) + Path(self._moseq_base).mkdir(parents=True, exist_ok=True) config_dirs = {"SPYGLASS_BASE_DIR": str(resolved_base)} source_config_lookup = { @@ -257,7 +339,7 @@ def _mkdirs_from_dict_vals(self, dir_dict) -> None: if self._debug_mode: return for dir_str in dir_dict.values(): - Path(dir_str).mkdir(exist_ok=True) + Path(dir_str).mkdir(parents=True, exist_ok=True) def _set_dj_config_stores(self, check_match=True, set_stores=True) -> None: """ @@ -265,8 +347,6 @@ def _set_dj_config_stores(self, check_match=True, set_stores=True) -> None: Parameters ---------- - dir_dict: dict - Dictionary of resolved dirs. check_match: bool Optional. Default True. Check that dj.config['stores'] match resolved dirs. @@ -323,6 +403,8 @@ def _generate_dj_config( Parameters ---------- + base_dir : str, optional + The base directory. If not provided, will use existing config. database_user : str, optional The database user. If not provided, resulting config will not specify. @@ -331,7 +413,7 @@ def _generate_dj_config( specify. database_host : str, optional Default lmf-db.cin.ucsf.edu. MySQL host name. - dapabase_port : int, optional + database_port : int, optional Default 3306. Port number for MySQL server. database_use_tls : bool, optional Default True. Use TLS encryption. @@ -373,7 +455,7 @@ def save_dj_config( datajoint builtins will be used to save. output_filename : str or Path, optional Default to datajoint global config. If save_method = 'custom', name - of file to generate. Must end in either be either yaml or json. + of file to generate. Must end in either yaml or json. base_dir : str, optional The base directory. If not provided, will default to the env var set_password : bool, optional @@ -392,7 +474,9 @@ def save_dj_config( if output_filename: save_method = "custom" path = Path(output_filename).expanduser() # Expand ~ - filepath = path if path.is_absolute() else path.absolute() + filepath = ( + path if path.is_absolute() else path.resolve() + ) # Resolve relative paths and symlinks filepath.parent.mkdir(exist_ok=True, parents=True) filepath = ( filepath.with_suffix(".json") # ensure suffix, default json @@ -419,7 +503,12 @@ def save_dj_config( user_warn = ( f"Replace existing file? {filepath.resolve()}\n\t" - + "\n\t".join([f"{k}: {v}" for k, v in config.items()]) + + "\n\t".join( + [ + f"{k}: {v if k != 'database.password' else '***'}" + for k, v in dj.config._conf.items() + ] + ) + "\n" ) @@ -501,7 +590,9 @@ def _dj_custom(self) -> dict: "project": self.moseq_project_dir, "video": self.moseq_video_dir, }, - "kachery_zone": "franklab.default", + "kachery_zone": os.environ.get( + "KACHERY_ZONE", "franklab.default" + ), } } @@ -601,7 +692,7 @@ def moseq_video_dir(self) -> str: sg_config.load_config(on_startup=True) if sg_config.load_failed: # Failed to load logger.warning("Failed to load SpyglassConfig. Please set up config file.") - config = {} # Let __intit__ fetch empty config for first time setup + config = {} # Let __init__ fetch empty config for first time setup prepopulate = False test_mode = False debug_mode = False diff --git a/src/spyglass/utils/docker.py b/src/spyglass/utils/docker.py new file mode 100644 index 000000000..de76ca682 --- /dev/null +++ b/src/spyglass/utils/docker.py @@ -0,0 +1,216 @@ +"""Docker utilities for Spyglass database setup. + +This module provides utilities for managing MySQL database containers via Docker. +These utilities are used by: +1. Testing infrastructure (tests/container.py) +2. Post-installation database management +3. NOT for the installer (installer uses inline code to avoid circular dependency) +""" + +import shutil +import subprocess +import time +from dataclasses import dataclass +from typing import Optional + + +@dataclass +class DockerConfig: + """Docker container configuration for MySQL database.""" + + container_name: str = "spyglass-db" + image: str = "datajoint/mysql:8.0" + port: int = 3306 + password: str = "tutorial" + + +def is_docker_available() -> bool: + """Check if Docker is installed and daemon is running. + + Returns + ------- + bool + True if Docker is available, False otherwise + """ + if not shutil.which("docker"): + return False + + try: + subprocess.run( + ["docker", "info"], + capture_output=True, + timeout=5, + check=True, + ) + return True + except (subprocess.CalledProcessError, subprocess.TimeoutExpired): + return False + + +def container_exists(container_name: str) -> bool: + """Check if a Docker container exists. + + Parameters + ---------- + container_name : str + Name of the container to check + + Returns + ------- + bool + True if container exists, False otherwise + """ + result = subprocess.run( + ["docker", "ps", "-a", "--format", "{{.Names}}"], + capture_output=True, + text=True, + ) + return container_name in result.stdout + + +def start_database_container(config: Optional[DockerConfig] = None) -> None: + """Start MySQL database container. + + Parameters + ---------- + config : DockerConfig, optional + Docker configuration. Uses defaults if None. + + Raises + ------ + RuntimeError + If Docker is not available or container fails to start + """ + if config is None: + config = DockerConfig() + + if not is_docker_available(): + raise RuntimeError( + "Docker is not available. Install from: " + "https://docs.docker.com/get-docker/" + ) + + # Check if container already exists + if container_exists(config.container_name): + # Start existing container + subprocess.run( + ["docker", "start", config.container_name], + check=True, + ) + else: + # Pull image first (better UX - shows progress) + subprocess.run(["docker", "pull", config.image], check=True) + + # Create and start new container + subprocess.run( + [ + "docker", + "run", + "-d", + "--name", + config.container_name, + "-p", + f"{config.port}:3306", + "-e", + f"MYSQL_ROOT_PASSWORD={config.password}", + config.image, + ], + check=True, + ) + + # Wait for MySQL to be ready + wait_for_mysql(config) + + +def wait_for_mysql( + config: Optional[DockerConfig] = None, timeout: int = 60 +) -> None: + """Wait for MySQL to be ready to accept connections. + + Parameters + ---------- + config : DockerConfig, optional + Docker configuration. Uses defaults if None. + timeout : int, optional + Maximum time to wait in seconds (default: 60) + + Raises + ------ + TimeoutError + If MySQL does not become ready within timeout + """ + if config is None: + config = DockerConfig() + + for attempt in range(timeout // 2): + try: + result = subprocess.run( + [ + "docker", + "exec", + config.container_name, + "mysqladmin", + "ping", + "-h", + "localhost", + "--silent", + ], + capture_output=True, + timeout=5, + ) + + if result.returncode == 0: + return # Success! + + except subprocess.TimeoutExpired: + pass + + if attempt < (timeout // 2) - 1: + time.sleep(2) + + raise TimeoutError( + f"MySQL did not become ready within {timeout}s. " + f"Check logs: docker logs {config.container_name}" + ) + + +def stop_database_container(config: Optional[DockerConfig] = None) -> None: + """Stop MySQL database container. + + Parameters + ---------- + config : DockerConfig, optional + Docker configuration. Uses defaults if None. + """ + if config is None: + config = DockerConfig() + + try: + subprocess.run( + ["docker", "stop", config.container_name], + check=True, + capture_output=True, + ) + except subprocess.CalledProcessError: + # Container may not be running, that's okay + pass + + +def remove_database_container(config: Optional[DockerConfig] = None) -> None: + """Remove MySQL database container. + + Args: + config: Docker configuration (uses defaults if None) + """ + if config is None: + config = DockerConfig() + + try: + subprocess.run( + ["docker", "rm", "-f", config.container_name], + check=True, + capture_output=True, + ) + except subprocess.CalledProcessError: + # Container may not exist, that's okay + pass diff --git a/src/spyglass/utils/mixins/base.py b/src/spyglass/utils/mixins/base.py index e10791dfb..b9c7ed2d2 100644 --- a/src/spyglass/utils/mixins/base.py +++ b/src/spyglass/utils/mixins/base.py @@ -36,19 +36,24 @@ def _graph_deps(self) -> list: return [TableChain, RestrGraph] - @cached_property + @property def _test_mode(self) -> bool: """Return True if in test mode. Avoids circular import. Prevents prompt on delete. + Note: Using @property instead of @cached_property so we always get + current value from dj.config, even if test_mode changes after first access. + Used by ... - BaseMixin._spyglass_version - HelpersMixin """ - from spyglass.settings import test_mode + import datajoint as dj - return test_mode + # Check dj.config directly instead of importing module-level variable + # which gets stale if load_config() is called after initial import + return dj.config.get("custom", {}).get("test_mode", False) @cached_property def _spyglass_version(self): diff --git a/tests/conftest.py b/tests/conftest.py index 0c9db53b5..efceeb958 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,7 +21,6 @@ from numba import NumbaWarning from pandas.errors import PerformanceWarning -from .container import DockerMySQLManager from .data_downloader import DataDownloader # ------------------------------- TESTS CONFIG ------------------------------- @@ -42,6 +41,83 @@ warnings.filterwarnings("ignore", category=NumbaWarning, module="numba") +class _TestDatabaseManager: + """Manages test database connection (service container or local Docker). + + Provides minimal interface compatible with old DockerMySQLManager for tests. + """ + + def __init__(self, container_name="mysql", port=None, null_server=False): + self.container_name = container_name + # Use 3308 as default (GitHub Actions service container port) + self.port = port if port is not None else 3308 + self.null_server = null_server + + def wait(self, timeout=120, wait_interval=3): + """Wait for database to be ready. + + For service containers (null_server=False), does minimal verification. + Config setup is handled by test fixtures (server_credentials + dj_conn). + + Parameters + ---------- + timeout : int + Maximum time to wait in seconds. Default 120. + wait_interval : int + Time between connection attempts in seconds. Default 3. + """ + if self.null_server: + return + + # Service container should be ready if health check passed + # Let test fixtures handle actual connection verification + pass + + def stop(self): + """Stop database. No-op as service container is managed by GitHub Actions.""" + if self.null_server: + return + # Service container cleanup is handled by GitHub Actions + pass + + @property + def credentials(self): + """Database credentials for test connection.""" + return { + "database.host": "localhost", + "database.password": "tutorial", + "database.user": "root", + "database.port": int(self.port), + "safemode": "false", + "custom": {"test_mode": True, "debug_mode": False}, + } + + @property + def container(self): + """Docker container object. + + Returns None for service containers (managed by GitHub Actions) or + null_server mode, since we don't have Python Docker API access. + """ + return None + + @property + def connected(self): + """Check if database connection is available. + + Updates dj.config and verifies connection works. This ensures test_mode + is set in dj.config before any spyglass imports happen in mini_insert. + """ + try: + import datajoint as dj + + # Update config to ensure test_mode is set (needed for electrode validation skip) + dj.config.update(self.credentials) + return dj.conn().is_connected + except Exception: + return False + + def pytest_addoption(parser): """Permit constants when calling pytest at command line @@ -125,13 +201,19 @@ def pytest_configure(config): RAW_DIR = BASE_DIR / "raw" os.environ["SPYGLASS_BASE_DIR"] = str(BASE_DIR) - SERVER = DockerMySQLManager( + # Check if docker module is available for local testing + try: + import docker as _docker_check + + docker_available = True + except ImportError: + docker_available = False + + # Use GitHub Actions service container or local Docker for tests + SERVER = _TestDatabaseManager( container_name=config.option.container_name, port=config.option.container_port, - restart=TEARDOWN, - shutdown=TEARDOWN, - null_server=config.option.no_docker, - verbose=VERBOSE, + null_server=config.option.no_docker or not docker_available, ) DOWNLOADS = DataDownloader( diff --git a/tests/container.py b/tests/container.py deleted file mode 100644 index fb960dc07..000000000 --- a/tests/container.py +++ /dev/null @@ -1,223 +0,0 @@ -import atexit -import time - -import datajoint as dj -import docker -from datajoint import logger - - -class DockerMySQLManager: - """Manage Docker container for MySQL server - - Parameters - ---------- - image_name : str - Docker image name. Default 'datajoint/mysql'. - mysql_version : str - MySQL version. Default '8.0'. - container_name : str - Docker container name. Default 'spyglass-pytest'. - port : str - Port to map to DJ's default 3306. Default '330[mysql_version]' - (i.e., 3308 if testing 8.0). - null_server : bool - If True, do not start container. Return on all methods. Default False. - Useful for iterating on tests in existing container. - restart : bool - If True, stop and remove existing container on startup. Default True. - shutdown : bool - If True, stop and remove container on exit from python. Default True. - verbose : bool - If True, print container status on startup. Default False. - """ - - def __init__( - self, - image_name="datajoint/mysql", - mysql_version="8.0", - container_name="spyglass-pytest", - port=None, - null_server=False, - restart=True, - shutdown=True, - verbose=False, - ) -> None: - self.image_name = image_name - self.mysql_version = mysql_version - self.container_name = container_name - self.port = port or "330" + self.mysql_version[0] - self.client = None if null_server else docker.from_env() - self.null_server = null_server - self.password = "tutorial" - self.user = "root" - self.host = "localhost" - self._ran_container = None - self.logger = logger - self.logger.setLevel("INFO" if verbose else "ERROR") - - if not self.null_server: - if shutdown: - atexit.register(self.stop) # stop container on python exit - if restart: - self.stop() # stop container if it exists - self.start() - - @property - def container(self) -> docker.models.containers.Container: - if self.null_server: - return self.container_name - return self.client.containers.get(self.container_name) - - @property - def container_status(self) -> str: - if self.null_server: - return None - try: - self.container.reload() - return self.container.status - except docker.errors.NotFound: - return None - - @property - def container_health(self) -> str: - if self.null_server: - return None - try: - self.container.reload() - return self.container.health - except docker.errors.NotFound: - return None - - @property - def msg(self) -> str: - return f"Container {self.container_name} " - - def start(self) -> str: - if self.null_server: - return None - - elif self.container_status in ["created", "running", "restarting"]: - self.logger.info( - self.msg + "starting: " + self.container_status + "." - ) - - elif self.container_status == "exited": - self.logger.info(self.msg + "restarting.") - self.container.restart() - - else: - self._ran_container = self.client.containers.run( - image=f"{self.image_name}:{self.mysql_version}", - name=self.container_name, - ports={3306: self.port}, - environment=[ - f"MYSQL_ROOT_PASSWORD={self.password}", - "MYSQL_DEFAULT_STORAGE_ENGINE=InnoDB", - ], - detach=True, - tty=True, - ) - self.logger.info(self.msg + "starting new.") - - return self.container.name - - def wait(self, timeout=120, wait=3) -> None: - """Wait for healthy container. - - Parameters - ---------- - timeout : int - Timeout in seconds. Default 120. - wait : int - Time to wait between checks in seconds. Default 5. - """ - if self.null_server: - return None - if not self.container_status or self.container_status == "exited": - self.start() - - print("") - for i in range(timeout // wait): - if self.container.health == "healthy": - break - self.logger.info(f"Container {self.container_name} starting... {i}") - time.sleep(wait) - self.logger.info( - f"Container {self.container_name}, {self.container.health}." - ) - - @property - def _add_sql(self) -> str: - ESC = r"\_%" - return ( - "CREATE USER IF NOT EXISTS 'basic'@'%' IDENTIFIED BY " - + f"'{self.password}'; GRANT USAGE ON `%`.* TO 'basic'@'%';" - + "GRANT SELECT ON `%`.* TO 'basic'@'%';" - + f"GRANT ALL PRIVILEGES ON `common{ESC}`.* TO `basic`@`%`;" - + f"GRANT ALL PRIVILEGES ON `spikesorting{ESC}`.* TO `basic`@`%`;" - + f"GRANT ALL PRIVILEGES ON `lfp{ESC}`.* TO `basic`@`%`;" - + f"GRANT ALL PRIVILEGES ON `position{ESC}`.* TO `basic`@`%`;" - + f"GRANT ALL PRIVILEGES ON `ripple{ESC}`.* TO `basic`@`%`;" - + f"GRANT ALL PRIVILEGES ON `linearization{ESC}`.* TO `basic`@`%`;" - ).strip() - - def add_user(self) -> int: - """Add 'basic' user to container.""" - if self.null_server: - return None - - if self._container_running(): - result = self.container.exec_run( - cmd=[ - "mysql", - "-u", - self.user, - f"--password={self.password}", - "-e", - self._add_sql, - ], - stdout=False, - stderr=False, - tty=True, - ) - if result.exit_code == 0: - self.logger.info("Container added user.") - else: - logger.error("Failed to add user.") - return result.exit_code - else: - logger.error(f"Container {self.container_name} does not exist.") - return None - - @property - def credentials(self): - """Datajoint credentials for this container.""" - return { - "database.host": "localhost", - "database.password": self.password, - "database.user": self.user, - "database.port": int(self.port), - "safemode": "false", - "custom": {"test_mode": True, "debug_mode": False}, - } - - @property - def connected(self) -> bool: - self.wait() - dj.config.update(self.credentials) - return dj.conn().is_connected - - def stop(self, remove=True) -> None: - """Stop and remove container.""" - if self.null_server: - return None - if not self.container_status or self.container_status == "exited": - return - - container_name = self.container_name - self.container.stop() # Logger I/O operations close during teardown - print(f"Container {container_name} stopped.") - - if remove: - self.container.remove() - print(f"Container {container_name} removed.") diff --git a/tests/setup/__init__.py b/tests/setup/__init__.py new file mode 100644 index 000000000..844e72b9b --- /dev/null +++ b/tests/setup/__init__.py @@ -0,0 +1 @@ +"""Tests for installation and setup scripts.""" diff --git a/tests/setup/test_config_schema.py b/tests/setup/test_config_schema.py new file mode 100644 index 000000000..11b8bfef9 --- /dev/null +++ b/tests/setup/test_config_schema.py @@ -0,0 +1,517 @@ +"""Tests for config schema DRY architecture. + +Verifies that: +1. JSON schema is valid +2. Installer and settings.py use the same schema +3. Installer produces config that settings.py can use +4. Directory structures match exactly +""" + +import json +import sys +from pathlib import Path +from tempfile import TemporaryDirectory + +import pytest + +# Import from installer +scripts_dir = Path(__file__).parent.parent.parent / "scripts" +sys.path.insert(0, str(scripts_dir)) + +from install import ( + build_directory_structure, + determine_tls, + load_directory_schema, + load_full_schema, + validate_schema, +) + +# Spyglass imports - lazy loaded in tests to avoid hanging during pytest collection +# DO NOT import SpyglassConfig at module level - it imports datajoint which may +# try to connect to database before fixtures are set up + + +class TestConfigSchema: + """Tests for config_schema.json file.""" + + def test_json_schema_is_valid(self): + """Test that config_schema.json is valid JSON and has required structure.""" + schema_path = Path(__file__).parent.parent.parent / "config_schema.json" + assert ( + schema_path.exists() + ), "config_schema.json not found at repository root" + + with open(schema_path) as f: + schema = json.load(f) + + # Check top-level structure + assert isinstance(schema, dict) + assert "directory_schema" in schema + assert "tls" in schema + + # Check directory_schema has all prefixes + dir_schema = schema["directory_schema"] + assert set(dir_schema.keys()) == {"spyglass", "kachery", "dlc", "moseq"} + + def test_validate_schema_passes_for_valid_schema(self): + """Test that validate_schema() accepts valid schema.""" + schema = load_full_schema() + # Should not raise + validate_schema(schema) + + def test_validate_schema_rejects_invalid_schema(self): + """Test that validate_schema() rejects invalid schemas.""" + # Missing directory_schema + with pytest.raises(ValueError, match="missing 'directory_schema'"): + validate_schema({"other_key": {}}) + + # Missing required prefix + with pytest.raises(ValueError, match="Missing prefixes"): + validate_schema( + { + "directory_schema": { + "spyglass": {"raw": "raw"}, + "kachery": {"cloud": ".kachery-cloud"}, + # Missing dlc and moseq + } + } + ) + + +class TestSchemaConsistency: + """Tests for schema consistency between installer and settings.py.""" + + def test_installer_and_settings_use_same_schema(self): + """Test that installer and settings.py load identical schemas.""" + from spyglass.settings import SpyglassConfig + + # Load from installer + installer_schema = load_directory_schema() + + # Load from settings.py + config = SpyglassConfig() + settings_schema = config.relative_dirs + + # Should be identical + assert installer_schema == settings_schema, ( + "Installer and settings.py have different directory schemas. " + "This violates the DRY principle." + ) + + def test_schema_has_all_required_prefixes(self): + """Test that schema has all 4 required directory prefixes.""" + schema = load_directory_schema() + assert set(schema.keys()) == {"spyglass", "kachery", "dlc", "moseq"} + + def test_schema_has_correct_directory_counts(self): + """Test that each prefix has expected number of directories.""" + schema = load_directory_schema() + + assert ( + len(schema["spyglass"]) == 8 + ), "spyglass should have 8 directories" + assert len(schema["kachery"]) == 3, "kachery should have 3 directories" + assert len(schema["dlc"]) == 3, "dlc should have 3 directories" + assert len(schema["moseq"]) == 2, "moseq should have 2 directories" + + def test_spyglass_directories_are_correct(self): + """Test that spyglass directories have correct keys.""" + schema = load_directory_schema() + expected_keys = { + "raw", + "analysis", + "recording", + "sorting", + "waveforms", + "temp", + "video", + "export", + } + assert set(schema["spyglass"].keys()) == expected_keys + + def test_dlc_directories_are_correct(self): + """Test that DLC directories have correct keys.""" + schema = load_directory_schema() + expected_keys = {"project", "video", "output"} + assert set(schema["dlc"].keys()) == expected_keys + + def test_moseq_directories_are_correct(self): + """Test that MoSeq directories have correct keys.""" + schema = load_directory_schema() + expected_keys = {"project", "video"} + assert set(schema["moseq"].keys()) == expected_keys + + +class TestInstallerConfig: + """Tests for installer config generation.""" + + def test_build_directory_structure_creates_all_dirs(self): + """Test that build_directory_structure creates all 16 directories.""" + with TemporaryDirectory() as tmpdir: + base_dir = Path(tmpdir) / "spyglass_data" + + dirs = build_directory_structure( + base_dir, create=True, verbose=False + ) + + # Should return dict with all directories + assert len(dirs) == 16, f"Expected 16 directories, got {len(dirs)}" + + # All directories should exist + for name, path in dirs.items(): + assert path.exists(), f"Directory {name} not created at {path}" + + def test_build_directory_structure_dry_run(self): + """Test that create=False doesn't create directories.""" + with TemporaryDirectory() as tmpdir: + base_dir = Path(tmpdir) / "spyglass_data" + + dirs = build_directory_structure( + base_dir, create=False, verbose=False + ) + + # Should return dict but not create dirs + assert len(dirs) == 16 + assert not (base_dir / "raw").exists() + + def test_installer_config_has_all_directory_groups(self): + """Test that installer creates config with all 4 directory groups.""" + with TemporaryDirectory() as tmpdir: + base_dir = Path(tmpdir) / "spyglass_data" + + # Simulate what create_database_config does + dirs = build_directory_structure( + base_dir, create=True, verbose=False + ) + + config = { + "custom": { + "spyglass_dirs": { + "base": str(base_dir), + "raw": str(dirs["spyglass_raw"]), + "analysis": str(dirs["spyglass_analysis"]), + "recording": str(dirs["spyglass_recording"]), + "sorting": str(dirs["spyglass_sorting"]), + "waveforms": str(dirs["spyglass_waveforms"]), + "temp": str(dirs["spyglass_temp"]), + "video": str(dirs["spyglass_video"]), + "export": str(dirs["spyglass_export"]), + }, + "kachery_dirs": { + "cloud": str(dirs["kachery_cloud"]), + "storage": str(dirs["kachery_storage"]), + "temp": str(dirs["kachery_temp"]), + }, + "dlc_dirs": { + "project": str(dirs["dlc_project"]), + "video": str(dirs["dlc_video"]), + "output": str(dirs["dlc_output"]), + }, + "moseq_dirs": { + "project": str(dirs["moseq_project"]), + "video": str(dirs["moseq_video"]), + }, + } + } + + # Verify all groups present + custom = config["custom"] + assert "spyglass_dirs" in custom + assert "kachery_dirs" in custom + assert "dlc_dirs" in custom + assert "moseq_dirs" in custom + + def test_installer_directory_paths_match_schema(self): + """Test that installer constructs paths according to schema.""" + with TemporaryDirectory() as tmpdir: + base_dir = Path(tmpdir) / "spyglass_data" + + # Get schema + schema = load_directory_schema() + + # Build directories + dirs = build_directory_structure( + base_dir, create=True, verbose=False + ) + + # Verify each path matches schema + for prefix in schema: + for key, rel_path in schema[prefix].items(): + expected_path = base_dir / rel_path + actual_path = dirs[f"{prefix}_{key}"] + assert expected_path == actual_path, ( + f"Path mismatch for {prefix}.{key}: " + f"expected {expected_path}, got {actual_path}" + ) + + def test_installer_config_keys_match_settings_expectations(self): + """Test that installer config keys match what settings.py expects.""" + from spyglass.settings import SpyglassConfig + + with TemporaryDirectory() as tmpdir: + base_dir = Path(tmpdir) / "spyglass_data" + + # Get what settings.py expects + config_obj = SpyglassConfig() + expected_structure = config_obj.relative_dirs + + # Build what installer creates + dirs = build_directory_structure( + base_dir, create=True, verbose=False + ) + + # Verify each prefix group has correct keys + for prefix in expected_structure: + expected_keys = set(expected_structure[prefix].keys()) + + # Get actual keys from dirs dict + actual_keys = set() + for dir_name in dirs.keys(): + if dir_name.startswith(f"{prefix}_"): + key = dir_name.split("_", 1)[1] + actual_keys.add(key) + + assert expected_keys == actual_keys, ( + f"Key mismatch for {prefix}: " + f"expected {expected_keys}, got {actual_keys}" + ) + + +class TestBackwardsCompatibility: + """Tests for backwards compatibility.""" + + def test_schema_matches_original_hardcoded_structure(self): + """Test that schema produces same structure as original hard-coded version.""" + # Original structure from settings.py before refactor + original = { + "spyglass": { + "raw": "raw", + "analysis": "analysis", + "recording": "recording", + "sorting": "spikesorting", + "waveforms": "waveforms", + "temp": "tmp", + "video": "video", + "export": "export", + }, + "kachery": { + "cloud": ".kachery-cloud", + "storage": "kachery_storage", + "temp": "tmp", + }, + "dlc": { + "project": "projects", + "video": "video", + "output": "output", + }, + "moseq": { + "project": "projects", + "video": "video", + }, + } + + # Load current schema + current = load_directory_schema() + + # Should be identical + assert current == original, ( + "Schema has changed from original hard-coded structure. " + "This breaks backwards compatibility." + ) + + def test_settings_produces_original_structure(self): + """Test that settings.py produces original structure at runtime.""" + from spyglass.settings import SpyglassConfig + + # Original structure + original = { + "spyglass": { + "raw": "raw", + "analysis": "analysis", + "recording": "recording", + "sorting": "spikesorting", + "waveforms": "waveforms", + "temp": "tmp", + "video": "video", + "export": "export", + }, + "kachery": { + "cloud": ".kachery-cloud", + "storage": "kachery_storage", + "temp": "tmp", + }, + "dlc": { + "project": "projects", + "video": "video", + "output": "output", + }, + "moseq": { + "project": "projects", + "video": "video", + }, + } + + # Get from runtime + config = SpyglassConfig() + runtime_structure = config.relative_dirs + + # Should be identical + assert runtime_structure == original, ( + "Runtime structure differs from original. " + "This breaks backwards compatibility." + ) + + +class TestTLSDetermination: + """Tests for automatic TLS determination.""" + + def test_localhost_disables_tls(self): + """Test that localhost connections disable TLS.""" + assert determine_tls("localhost") is False + + def test_ipv4_localhost_disables_tls(self): + """Test that 127.0.0.1 disables TLS.""" + assert determine_tls("127.0.0.1") is False + + def test_ipv6_localhost_disables_tls(self): + """Test that ::1 disables TLS.""" + assert determine_tls("::1") is False + + def test_remote_hostname_enables_tls(self): + """Test that remote hostnames enable TLS.""" + assert determine_tls("lmf-db.cin.ucsf.edu") is True + + def test_remote_ip_enables_tls(self): + """Test that remote IP addresses enable TLS.""" + assert determine_tls("192.168.1.100") is True + + def test_custom_schema_tls_config(self): + """Test TLS determination with custom schema.""" + custom_schema = { + "tls": { + "localhost_addresses": ["localhost", "127.0.0.1", "mylocal"] + } + } + # Custom local address should disable TLS + assert determine_tls("mylocal", schema=custom_schema) is False + # Other addresses should enable TLS + assert determine_tls("remote.host", schema=custom_schema) is True + + +class TestSchemaVersioning: + """Tests for schema versioning.""" + + def test_schema_has_version(self): + """Test that schema file includes version.""" + schema = load_full_schema() + assert "_schema_version" in schema + assert schema["_schema_version"] == "1.0.0" + + def test_version_history_present(self): + """Test that version history is documented.""" + schema = load_full_schema() + assert "_version_history" in schema + assert "1.0.0" in schema["_version_history"] + + +class TestConfigCompatibility: + """Tests for config compatibility between installer and settings.py.""" + + def _get_all_keys(self, d: dict, prefix: str = "") -> set: + """Recursively get all keys in nested dictionary.""" + keys = set() + for k, v in d.items(): + full_key = f"{prefix}.{k}" if prefix else k + keys.add(full_key) + if isinstance(v, dict): + keys.update(self._get_all_keys(v, full_key)) + return keys + + def test_installer_config_has_all_settings_keys(self): + """Test that installer config includes all keys from settings.py.""" + from spyglass.settings import SpyglassConfig + + with TemporaryDirectory() as tmpdir: + base_dir = Path(tmpdir) / "spyglass_data" + + # Get config from installer + dir_schema = load_directory_schema() + dirs = build_directory_structure( + base_dir, schema=dir_schema, create=True, verbose=False + ) + + installer_config = { + "database.host": "localhost", + "database.port": 3306, + "database.user": "testuser", + "database.password": "testpass", + "database.use_tls": False, + "filepath_checksum_size_limit": 1 * 1024**3, + "enable_python_native_blobs": True, + "stores": { + "raw": { + "protocol": "file", + "location": str(dirs["spyglass_raw"]), + "stage": str(dirs["spyglass_raw"]), + }, + "analysis": { + "protocol": "file", + "location": str(dirs["spyglass_analysis"]), + "stage": str(dirs["spyglass_analysis"]), + }, + }, + "custom": { + "debug_mode": False, + "test_mode": False, + "kachery_zone": "franklab.default", + "spyglass_dirs": { + "base": str(base_dir), + "raw": str(dirs["spyglass_raw"]), + "analysis": str(dirs["spyglass_analysis"]), + "recording": str(dirs["spyglass_recording"]), + "sorting": str(dirs["spyglass_sorting"]), + "waveforms": str(dirs["spyglass_waveforms"]), + "temp": str(dirs["spyglass_temp"]), + "video": str(dirs["spyglass_video"]), + "export": str(dirs["spyglass_export"]), + }, + "kachery_dirs": { + "cloud": str(dirs["kachery_cloud"]), + "storage": str(dirs["kachery_storage"]), + "temp": str(dirs["kachery_temp"]), + }, + "dlc_dirs": { + "base": str(base_dir / "deeplabcut"), + "project": str(dirs["dlc_project"]), + "video": str(dirs["dlc_video"]), + "output": str(dirs["dlc_output"]), + }, + "moseq_dirs": { + "base": str(base_dir / "moseq"), + "project": str(dirs["moseq_project"]), + "video": str(dirs["moseq_video"]), + }, + }, + } + + # Get config from settings.py + sg_config = SpyglassConfig() + settings_config = sg_config._generate_dj_config( + base_dir=str(base_dir), + database_user="testuser", + database_password="testpass", + database_host="localhost", + database_port=3306, + database_use_tls=False, + ) + + # Get all keys from both + installer_keys = self._get_all_keys(installer_config) + settings_keys = self._get_all_keys(settings_config) + + # Installer must have all settings.py keys + missing_keys = settings_keys - installer_keys + assert not missing_keys, ( + f"Installer config is missing keys from settings.py: " + f"{sorted(missing_keys)}. Update install.py::create_database_config()" + ) diff --git a/tests/setup/test_install.py b/tests/setup/test_install.py new file mode 100644 index 000000000..691408b32 --- /dev/null +++ b/tests/setup/test_install.py @@ -0,0 +1,198 @@ +"""Tests for installation script.""" + +import subprocess +import sys +from pathlib import Path +from unittest.mock import Mock, mock_open, patch + +import pytest + +# Add scripts to path +scripts_dir = Path(__file__).parent.parent.parent / "scripts" +sys.path.insert(0, str(scripts_dir)) + +from install import ( + check_prerequisites, + get_base_directory, + get_conda_command, + get_required_python_version, + is_docker_available_inline, +) + + +class TestGetRequiredPythonVersion: + """Tests for get_required_python_version().""" + + def test_returns_tuple(self): + """Test that function returns a tuple.""" + version = get_required_python_version() + assert isinstance(version, tuple) + assert len(version) == 2 + + def test_version_is_reasonable(self): + """Test that returned version is reasonable.""" + major, minor = get_required_python_version() + assert major == 3 + assert 9 <= minor <= 13 # Current supported range + + +class TestGetCondaCommand: + """Tests for get_conda_command().""" + + def test_prefers_mamba(self): + """Test that mamba is preferred over conda.""" + with patch("shutil.which") as mock_which: + mock_which.side_effect = lambda cmd: cmd == "mamba" + assert get_conda_command() == "mamba" + + def test_falls_back_to_conda(self): + """Test fallback to conda when mamba unavailable.""" + with patch("shutil.which") as mock_which: + mock_which.side_effect = lambda cmd: cmd == "conda" + assert get_conda_command() == "conda" + + def test_raises_when_neither_available(self): + """Test that RuntimeError raised when neither available.""" + with patch("shutil.which", return_value=None): + with pytest.raises(RuntimeError, match="conda or mamba not found"): + get_conda_command() + + +class TestGetBaseDirectory: + """Tests for get_base_directory().""" + + def test_cli_arg_priority(self, tmp_path): + """Test that CLI argument has highest priority.""" + cli_path = tmp_path / "cli_path" + result = get_base_directory(str(cli_path)) + assert result == cli_path.resolve() + assert result.exists() # Verify it was created + + def test_env_var_priority(self, monkeypatch, tmp_path): + """Test that environment variable has second priority.""" + env_path = tmp_path / "env_path" + monkeypatch.setenv("SPYGLASS_BASE_DIR", str(env_path)) + result = get_base_directory(None) + assert result == env_path.resolve() + assert result.exists() # Verify it was created + + def test_cli_overrides_env_var(self, monkeypatch, tmp_path): + """Test that CLI argument overrides environment variable.""" + env_path = tmp_path / "env_path" + cli_path = tmp_path / "cli_path" + monkeypatch.setenv("SPYGLASS_BASE_DIR", str(env_path)) + result = get_base_directory(str(cli_path)) + assert result == cli_path.resolve() + assert result.exists() # Verify CLI path was created + assert not env_path.exists() # Verify ENV path was NOT created + + def test_expands_user_home(self, tmp_path): + """Test that ~ is expanded to user home.""" + # Use a subdirectory under user's home that we can safely create/delete + from pathlib import Path + import tempfile + + with tempfile.TemporaryDirectory() as tmpdir: + # Create a test directory we can safely use + test_path = Path(tmpdir) / "test" + result = get_base_directory(str(test_path)) + assert test_path.resolve() == result + assert result.is_absolute() + assert result.exists() + + +class TestIsDockerAvailableInline: + """Tests for is_docker_available_inline().""" + + def test_returns_false_when_docker_not_in_path(self): + """Test returns False when docker not in PATH.""" + with patch("shutil.which", return_value=None): + assert is_docker_available_inline() is False + + def test_returns_false_when_daemon_not_running(self): + """Test returns False when docker daemon not running.""" + with patch("shutil.which", return_value="/usr/bin/docker"): + with patch("subprocess.run") as mock_run: + mock_run.side_effect = subprocess.CalledProcessError( + 1, "docker" + ) + assert is_docker_available_inline() is False + + def test_returns_true_when_docker_available(self): + """Test returns True when docker is available.""" + with patch("shutil.which", return_value="/usr/bin/docker"): + with patch("subprocess.run") as mock_run: + mock_run.return_value = Mock(returncode=0) + assert is_docker_available_inline() is True + + +class TestCheckPrerequisites: + """Tests for check_prerequisites().""" + + def test_does_not_raise_on_valid_system(self): + """Test that function doesn't raise on valid system.""" + # This test assumes we're running on a valid development system + # If it fails, the system isn't suitable for development + try: + check_prerequisites() + except RuntimeError as e: + # Only acceptable failure is conda/mamba not found in test env + if "conda or mamba not found" not in str(e): + raise + + +@pytest.mark.integration +class TestInstallationIntegration: + """Integration tests for full installation workflow. + + These tests are marked as integration and can be run separately. + They require conda/mamba and take longer to run. + """ + + def test_validate_script_exists(self): + """Test that validate.py script exists.""" + validate_script = scripts_dir / "validate.py" + assert validate_script.exists() + assert validate_script.is_file() + + def test_install_script_exists(self): + """Test that install.py script exists.""" + install_script = scripts_dir / "install.py" + assert install_script.exists() + assert install_script.is_file() + + def test_scripts_are_executable(self): + """Test that scripts have execute permissions.""" + validate_script = scripts_dir / "validate.py" + install_script = scripts_dir / "install.py" + + # Check if readable and executable (on Unix-like systems) + if sys.platform != "win32": + assert validate_script.stat().st_mode & 0o111 # Has execute bit + assert install_script.stat().st_mode & 0o111 + + +class TestDockerUtilities: + """Tests for docker utility module.""" + + def test_docker_module_exists(self): + """Test that docker utilities module exists.""" + docker_module = ( + Path(__file__).parent.parent.parent + / "src" + / "spyglass" + / "utils" + / "docker.py" + ) + assert docker_module.exists() + + def test_docker_module_imports(self): + """Test that docker utilities can be imported.""" + try: + from spyglass.utils import docker + + assert hasattr(docker, "DockerConfig") + assert hasattr(docker, "is_docker_available") + assert hasattr(docker, "start_database_container") + except ImportError: + pytest.skip("Spyglass not installed") diff --git a/tests/utils/conftest.py b/tests/utils/conftest.py index de5a80c4d..c3c5911c9 100644 --- a/tests/utils/conftest.py +++ b/tests/utils/conftest.py @@ -33,8 +33,8 @@ def schema_test(teardown, dj_conn): def chain(Nwbfile): """Return example TableChain object from chains.""" from spyglass.linearization.merge import ( - LinearizedPositionOutput, - ) # noqa: F401 + LinearizedPositionOutput, # noqa: F401 + ) from spyglass.utils.dj_graph import TableChain yield TableChain(Nwbfile, LinearizedPositionOutput)