diff --git a/pocs/maml_model_agnostic_meta_learning.py b/pocs/maml_model_agnostic_meta_learning.py index 8b3530b..f9f6c64 100644 --- a/pocs/maml_model_agnostic_meta_learning.py +++ b/pocs/maml_model_agnostic_meta_learning.py @@ -1,3 +1,73 @@ +"""Meta-learning model implementing MAML algorithm for few-shot learning. + + This neural network is designed to quickly adapt to new tasks through + the following process: + + 1. Meta-Initialization: + - The network starts with parameters θ that are explicitly trained to be easily + adaptable to new tasks + + 2. Task Adaptation: + - For each task, the model takes a few gradient steps from θ to θ' + - These steps use a small amount of task-specific data (support set) + + 3. Meta-Update: + - The model evaluates performance on held-out task data (query set) + - Updates θ to minimize loss after task adaptation + - This creates a better starting point for future task adaptation + + Key Components: + -------------- + 1. Meta-Learning Architecture: + - Uses a neural network with skip connections for better gradient flow + - Implements bi-level optimization (inner and outer loops) + - Employs gradient clipping and adaptive learning rates for stability + + 2. Task Generation: + - Creates synthetic regression tasks with controlled complexity + - Each task represents a different non-linear function + - Includes multiple non-linearities (linear, sinusoidal, and hyperbolic components) + - Adds controlled noise for robustness + + 3. Meta-Training Process: + - Inner Loop: Quick adaptation to specific tasks (few gradient steps) + - Outer Loop: Updates meta-parameters to optimize post-adaptation performance + - Uses higher-order gradients to optimize the learning process itself + + 4. Visualization and Monitoring: + - Tracks adaptation progress + - Analyzes feature importance + - Monitors error distributions + - Visualizes prediction quality + + Technical Details: + ---------------- + - Architecture: Multi-layer perceptron with skip connections + - Optimization: SGD with momentum for meta-updates + - Learning Rate: Adaptive with ReduceLROnPlateau scheduling + - Regularization: Gradient clipping, early stopping + + Usage: + ------ + The main training loop: + 1. Generates synthetic tasks + 2. Performs meta-training + 3. Demonstrates adaptation to new tasks + 4. Visualizes the adaptation process + + Example: + >>> meta_model = MetaModelGenerator(input_size=10, hidden_sizes=[64, 64], output_size=1) + >>> tasks = create_synthetic_tasks(num_tasks=200) + >>> task_dataloader = create_task_dataloader(tasks) + >>> # Train model + >>> loss, grad_norm = meta_model.meta_train_step(task_batch, device) + + References: + ---------- + 1. Finn et al. (2017) - Model-Agnostic Meta-Learning for Fast Adaptation of Deep Networks + 2. Antoniou et al. (2019) - How to train your MAML + """ + from typing import Dict, List, Optional, Tuple, Type import matplotlib.pyplot as plt import numpy as np @@ -14,6 +84,44 @@ class MetaModelGenerator(nn.Module): + """Meta-learning model implementing MAML algorithm for few-shot learning. + + This neural network is designed to quickly adapt to new tasks through gradient-based + meta-learning. It uses a architecture with skip connections for better gradient flow + and implements both inner loop (task adaptation) and outer loop (meta) optimization. + + Architecture Features: + -------------------- + - Skip connections between hidden layers for better gradient flow + - Smaller initial weights for stable training + - Momentum-based meta-optimization + - Adaptive learning rate scheduling + + Training Process: + --------------- + 1. Inner Loop (Task Adaptation): + - Takes few gradient steps on task-specific data + - Uses fast weights for quick adaptation + - Implements higher-order gradients for meta-learning + + 2. Outer Loop (Meta-Update): + - Updates model parameters to improve post-adaptation performance + - Uses gradient clipping for stability + - Monitors gradient norms and learning progress + + Args: + input_size (int): Dimension of input features + hidden_sizes (List[int]): List of hidden layer sizes + output_size (int): Dimension of output predictions + inner_lr (float, optional): Learning rate for task adaptation. Defaults to 0.05 + meta_lr (float, optional): Learning rate for meta-updates. Defaults to 0.003 + + Attributes: + inner_lr (float): Learning rate for inner loop optimization + meta_optimizer (optim.SGD): Optimizer for meta-updates + scheduler (optim.lr_scheduler): Learning rate scheduler for meta-optimization + """ + def __init__( self, input_size: int, diff --git a/src/bayes_updating.py b/src/bayes_updating.py index 348b2ea..56f0391 100644 --- a/src/bayes_updating.py +++ b/src/bayes_updating.py @@ -1,3 +1,76 @@ +""" +Bayesian Belief Network for Dynamic Belief Updating Using Language Models +====================================================================== + +This module implements a novel approach to modeling and updating belief systems using +language model embeddings and Bayesian inference. It demonstrates how beliefs about +topics can evolve based on new evidence while maintaining uncertainty estimates. + +Core Concepts: +------------- +1. Belief Representation: + - Beliefs are represented as high-dimensional vectors in embedding space + - Each dimension captures semantic aspects of the belief + - Vectors are generated using LLM embeddings for consistent semantic meaning + +2. Bayesian Framework: + - Prior: Current belief state and confidence + - Likelihood: Similarity between new evidence and current belief + - Posterior: Updated belief incorporating new evidence + - Confidence: Uncertainty measure updated via Bayes' rule + +3. Belief Evolution: + - Beliefs change gradually through weighted averaging + - Confidence levels affect the impact of new evidence + - Historical states are maintained for analysis + - Time-based decay models forgetting and uncertainty growth + +Key Components: +-------------- +- BeliefState: Data structure holding current beliefs and history +- BayesianBeliefUpdater: Core logic for belief updates +- BeliefVisualizer: Visualization of belief evolution + +Example Experiment: +----------------- +The main() function demonstrates the system using AI ethics as a test domain: +1. Starts with neutral belief about AI ethics +2. Processes sequence of statements representing different viewpoints +3. Shows how beliefs evolve from: + - Initial safety-focused perspective + - Through various challenging viewpoints + - To more nuanced understanding incorporating multiple aspects +4. Demonstrates confidence dynamics as: + - Increases with confirming evidence + - Decreases with contradictory evidence + - Decays over time without updates + +Usage: +------ + +```python +llm = OllamaInterface() +updater = BayesianBeliefUpdater(llm) +await updater.initialize_belief_state("AI ethics") +new_state = await updater.update_belief("AI ethics", "New evidence...") +analysis = await updater.analyze_belief_shift("AI ethics") +``` + +Mathematical Foundation: +---------------------- +The system implements Bayes' theorem: +P(belief|evidence) ∝ P(evidence|belief) * P(belief) + +Where: +- P(belief) is the prior confidence +- P(evidence|belief) is calculated via cosine similarity +- P(belief|evidence) becomes the posterior confidence + +The belief vector itself is updated using weighted averaging: +new_belief = (1-w) * old_belief + w * evidence +where w is derived from the posterior confidence +""" + from dataclasses import dataclass, field from typing import Dict, List, Optional, Tuple import numpy as np @@ -15,10 +88,35 @@ class BeliefState: """ A data structure that represents a belief about a topic using vector embeddings. - - belief_vector: High-dimensional vector representing semantic meaning - - confidence: Scalar value [0-1] indicating certainty in current belief - - prior_states: Historical record of previous beliefs and confidences (limited to max_history) - - themes: List of identified themes (currently unused but prepared for future) + + The belief state combines two key aspects: + 1. Semantic Understanding: High-dimensional vector capturing meaning + 2. Epistemic Uncertainty: Confidence level in current belief + + Key Components: + -------------- + belief_vector : np.ndarray + High-dimensional embedding vector representing the semantic content of the belief. + - Each dimension captures different aspects of meaning + - Normalized to unit length for consistent comparison + - Generated from LLM embeddings + + confidence : float + Scalar value [0-1] indicating certainty in current belief + - 0.0 = complete uncertainty + - 1.0 = absolute certainty (never actually reached) + - Decays over time without reinforcement + + prior_states : List[Tuple[np.ndarray, float]] + Historical record of previous beliefs and confidences + - Enables analysis of belief evolution + - Limited by max_history to prevent unbounded growth + - Used for visualization and trend analysis + + themes : List[str] + Identified themes in the belief content + - Currently unused but prepared for future theme tracking + - Will enable analysis of belief clusters and patterns """ belief_vector: np.ndarray @@ -35,12 +133,53 @@ def __post_init__(self): class BayesianBeliefUpdater: """ - Core class that implements Bayesian belief updating using language model embeddings. - Key concepts: - 1. Each topic maintains its own belief state - 2. Updates are performed using Bayesian inference - 3. Beliefs are represented as high-dimensional embeddings - 4. Confidence is updated based on similarity between current beliefs and new evidence + Implements dynamic belief updating using Bayesian inference and LLM embeddings. + + Core Algorithm: + -------------- + 1. Belief Representation: + - Uses LLM embeddings to capture semantic meaning + - Maintains normalized vectors for consistent comparison + - Tracks confidence separately from belief content + + 2. Update Mechanism: + a) Prior Capture: + - Stores current state before update + - Maintains limited history + + b) Evidence Processing: + - Converts new evidence to embedding + - Ensures consistent semantic space + + c) Likelihood Calculation: + - Uses cosine similarity + - Higher similarity = stronger support for current belief + + d) Confidence Update: + - Applies Bayes' rule + - Includes time-based decay + - More sensitive to contradictory evidence + + e) Belief Vector Update: + - Weighted average based on confidence + - Ensures smooth transitions + - Maintains vector normalization + + Design Principles: + ----------------- + 1. Conservative Updates: + - Beliefs change gradually + - Requires consistent evidence for major shifts + + 2. Uncertainty Handling: + - Confidence decays over time + - Contradictory evidence reduces confidence faster + - Maximum confidence is capped + + 3. Memory Effects: + - Maintains history of belief states + - Enables analysis of belief evolution + - Supports visualization of changes """ def __init__(self, llm: LanguageModel):