- 
                Notifications
    
You must be signed in to change notification settings  - Fork 1.2k
 
Open
Labels
stage/needs-triageAutomatically applied to new issues and PRs, indicating they haven't been looked at.Automatically applied to new issues and PRs, indicating they haven't been looked at.
Description
Description:
When I try to locally invoke a particular lambda I receive the error:
[ERROR] Runtime.ImportModuleError: Unable to import module 'index': No module named 'regex._regex'
Traceback (most recent call last):
09 Oct 2025 17:53:53,221 [ERROR] (rapid) Init failed error=Runtime exited with error: exit status 1 InvokeID=
09 Oct 2025 17:53:53,224 [ERROR] (rapid) Invoke failed error=Runtime exited with error: exit status 1 InvokeID=caeb871a-4519-48b9-809a-351ebb7a71d9
09 Oct 2025 17:53:53,225 [ERROR] (rapid) Invoke DONE failed: Sandbox.Failure
{"errorMessage": "Unable to import module 'index': No module named 'regex._regex'", "errorType": "Runtime.ImportModuleError", "requestId": "", "stackTrace": []}When I look in the asset folder referenced in the debug output I see the regex library.
cdk.out/asset.d20346a9df67baa95ee488ff2aa746cd84c0bab186bf47cdf2c4c35 65a0aece2
Steps to reproduce:
I'm not certain the steps to reproduce the issue but the general setup is
requirements.txt
textstat>=0.7.3CDK Lambda definition
        stepreads_processor_lambda = _lambda.Function(
            self, "StepReadsProcessorLambda",
            function_name=f"{props.app_config.name}-stepreads-processor-lambda-{props.env_config.name}",
            runtime=_lambda.Runtime.PYTHON_3_12,
            handler="index.handler",
            code=_lambda.Code.from_asset(
                "src-backend/stepreads-processor-lambda",
                bundling=cdk.BundlingOptions(
                    image=_lambda.Runtime.PYTHON_3_12.bundling_image,
                    command=[
                        "bash", "-c",
                        "pip install -r requirements.txt -t /asset-output && cp -au . /asset-output"
                    ]
                )
            ),
            timeout=cdk.Duration.seconds(300),  # 5 minutes for complete pipeline with retries
            memory_size=1024,  # Increased for text generation workload
            environment={
                "ACCOUNT_ID": props.env_config.account,
                "DYNAMODB_TABLE_NAME": props.storage_stack.dynamodb_table.table_name,
                "DYNAMODB_TABLE_LEXILE_GSI_NAME": props.storage_stack.dynamodb_table_lexile_gsi_name,
                "INPUT_BUCKET_NAME": props.storage_stack.input_bucket.bucket_name,
                "OUTPUT_BUCKET_NAME": props.storage_stack.output_bucket.bucket_name,
                "VECTOR_BUCKET_NAME": props.storage_stack.vector_bucket.vector_bucket_name,
                "GRADED_TEXT_VECTOR_INDEX_NAME": props.storage_stack.graded_text_vector_index.index_name,
                "BEDROCK_EMBEDDING_MODEL_ID": props.app_config.bedrock.embedding_model_id,
                "BEDROCK_TEXT_GENERATION_MODEL_ID": props.app_config.bedrock.text_generation_model_id,
                "BEDROCK_TEXT_GENERATION_INFERENCE_PROFILE_ARN": props.app_config.bedrock.text_generation_inference_profile_arn,
                "STEPREADS_MAX_RETRY_COUNT": str(props.env_config.stepreads.default_retry_count),  # Configurable retry count
                "STEPREADS_FKG_VALIDATION_TOLERANCE": str(props.env_config.stepreads.default_fkg_validation_tolerance),  # Allow ±1.0 grade level difference (more lenient)
                "STEPREADS_SNS_TOPIC_ARN": props.storage_stack.stepreads_notifications_topic.topic_arn,
                "DEFAULT_PCT_FOR_TARGET_LEXILE_STEP": str(props.env_config.stepreads.default_pct_for_target_lexile_step),
                "DEFAULT_TARGET_GRADE_MOVE": str(props.env_config.stepreads.default_target_grade_move),
                "DEFAULT_SIMILARITY_THRESHOLD": str(props.env_config.stepreads.default_similarity_threshold),
                "DEFAULT_RETRY_COUNT": str(props.env_config.stepreads.default_retry_count),
            },
            log_group=props.storage_stack.stepreads_processor_lambda_log_group
        )The initial lambda code looks like this:
import os
# Configure NLTK environment BEFORE any other imports
os.environ['NLTK_DATA'] = '/tmp/nltk_data'
import json
import traceback
import boto3
import textstat
from datetime import datetime, timezone
from typing import Dict, List, Any, Optional, Tuple
# Configure NLTK data path for Lambda environment
import nltk
nltk.data.path = ['/tmp/nltk_data']
# Initialize AWS clients
s3 = boto3.client('s3')
s3vectors = boto3.client('s3vectors')
dynamodb = boto3.resource('dynamodb')
bedrock = boto3.client('bedrock-runtime')
sns = boto3.client('sns')
# Environment variables
DYNAMODB_TABLE_NAME = os.environ.get('DYNAMODB_TABLE_NAME')
DYNAMODB_TABLE_LEXILE_GSI_NAME = os.environ.get('DYNAMODB_TABLE_LEXILE_GSI_NAME')
OUTPUT_BUCKET_NAME = os.environ.get('OUTPUT_BUCKET_NAME')
VECTOR_BUCKET_NAME = os.environ.get('VECTOR_BUCKET_NAME')
GRADED_TEXT_VECTOR_INDEX_NAME = os.environ.get('GRADED_TEXT_VECTOR_INDEX_NAME')
BEDROCK_EMBEDDING_MODEL_ID = os.environ.get('BEDROCK_EMBEDDING_MODEL_ID', 'amazon.titan-embed-text-v2:0')
BEDROCK_TEXT_GENERATION_MODEL_ID = os.environ.get('BEDROCK_TEXT_GENERATION_MODEL_ID')
BEDROCK_TEXT_GENERATION_INFERENCE_PROFILE_ARN = os.environ.get('BEDROCK_TEXT_GENERATION_INFERENCE_PROFILE_ARN')
STEPREADS_SNS_TOPIC_ARN = os.environ.get('STEPREADS_SNS_TOPIC_ARN')
# Configurable default lexile percentage for target lexile step via environment variable (default: 0.2)
# Determines the range of lexile values to consider for similar texts
DEFAULT_PCT_FOR_TARGET_LEXILE_STEP = os.environ.get('DEFAULT_PCT_FOR_TARGET_LEXILE_STEP', '0.2')
# Configurable retry count via environment variable
MAX_RETRY_COUNT = int(os.environ.get('STEPREADS_MAX_RETRY_COUNT', '5'))
# Configurable grade tolerance via environment variable (default: 1.0 for better success rate)
FKG_VALIDATION_TOLERANCE = float(os.environ.get('STEPREADS_FKG_VALIDATION_TOLERANCE', '1.0'))
# Constants - New organized structure in output bucket
STATUS_FILE_PREFIX = "stepreads/status"
EMBEDDING_DIMENSIONS = 1024
MAX_SIMILAR_TEXTS = 5
def handler(event, context):
    """
    Main Lambda handler for SQS-triggered StepReads processing
    """
    print("🚀 STEPREADS PROCESSOR LAMBDA STARTED")
    print("=" * 60)
    print(f"Event: {json.dumps(event)}")
    print(f"📊 Lambda Info: Memory={context.memory_limit_in_mb}MB, Timeout={context.get_remaining_time_in_millis()//1000}s")
    print(f"📦 Records to process: {len(event.get('Records', []))}")
    print("=" * 60)Observed result:
Expected result:
Execution of code in my handler
Additional environment details (Ex: Windows, Mac, Amazon Linux etc)
{
  "version": "1.144.0",
  "system": {
    "python": "3.11.10",
    "os": "macOS-15.6.1-arm64-arm-64bit"
  },
  "additional_dependencies": {
    "docker_engine": "28.4.0",
    "aws_cdk": "2.1029.1 (build b45b1ab)",
    "terraform": "1.13.1"
  },
  "available_beta_feature_env_vars": [
    "SAM_CLI_BETA_FEATURES",
    "SAM_CLI_BETA_BUILD_PERFORMANCE",
    "SAM_CLI_BETA_TERRAFORM_SUPPORT",
    "SAM_CLI_BETA_PACKAGE_PERFORMANCE",
    "SAM_CLI_BETA_RUST_CARGO_LAMBDA"
  ]
}Metadata
Metadata
Assignees
Labels
stage/needs-triageAutomatically applied to new issues and PRs, indicating they haven't been looked at.Automatically applied to new issues and PRs, indicating they haven't been looked at.