huggingface · weijiang2023 · Feb 27, 2025 · Mar 1, 2025 · Mar 1, 2025 · Mar 1, 2025
diff --git a/README.md b/README.md
diff --git a/README.md.backup b/README.md.backup
diff --git a/generate_fashion_dataset.py b/generate_fashion_dataset.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python
+"""
+Simplified script to generate a fashion dataset for training DeepSeek-R1-Fashion model.
+"""
+
+import os
+import json
+import argparse
+from tqdm import tqdm
+import numpy as np
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from datasets import Dataset
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Generate fashion dataset for DeepSeek-R1")
+    parser.add_argument("--output-path", type=str, default="data/fashion-dataset",
+                       help="Path to save the generated dataset")
+    parser.add_argument("--num-samples", type=int, default=1000,
+                       help="Number of samples to generate")
+    parser.add_argument("--model", type=str, default="deepseek-ai/DeepSeek-R1",
+                       help="Model to use for generation")
+    return parser.parse_args()
+
+def main():
+    args = parse_args()
+
+    # Fashion-related queries
+    fashion_queries = [
+        "What's a good outfit for a summer wedding?",
+        "How do I style a basic white t-shirt?",
+        "What are the key fashion trends for Fall 2025?",
+        "Can you recommend sustainable fashion brands?",
+        "How should I dress for a job interview in tech?",
+        "What accessories go well with a little black dress?",
+        "How do I build a minimalist wardrobe?",
+        "What colors are complementary to olive skin tone?",
+        "How do I style oversized clothing without looking sloppy?",
+        "What's the difference between business casual and smart casual?",
+        # Additional queries for variety
+        "How can I dress professionally while pregnant?",
+        "What are good outfit ideas for a first date?",
+        "How do I choose the right jeans for my body type?",
+        "What should I wear to a music festival?",
+        "How do I transition my wardrobe from winter to spring?",
+        "What are must-have pieces for a capsule wardrobe?",
+        "How can I dress to look taller?",
+        "What's appropriate to wear to a funeral?",
+        "How do I care for silk clothing?",
+        "What are some 90s fashion trends making a comeback?"
+    ]
+
+    # System prompt for fashion advice
+    system_prompt = """You are a helpful AI assistant specializing in fashion advice.
+    When responding to fashion-related queries, follow these guidelines:
+    1. Consider the occasion, body type, personal style, and practical concerns
+    2. Provide specific recommendations with reasoning
+    3. Include options at different price points when appropriate
+    4. Suggest styling combinations and accessories
+    5. Mention current trends while respecting timeless principles
+
+    Your advice should be detailed, personalized, and practical."""
+
+    print("Loading tokenizer and model...")
+    try:
+        tokenizer = AutoTokenizer.from_pretrained(args.model)
+        model = AutoModelForCausalLM.from_pretrained(args.model)
+    except Exception as e:
+        print(f"Error loading model: {e}")
+        print("Using a fallback model instead...")
+        tokenizer = AutoTokenizer.from_pretrained("gpt2")
+        model = AutoModelForCausalLM.from_pretrained("gpt2")
+
+    # Create directory if it doesn't exist
+    os.makedirs(os.path.dirname(args.output_path), exist_ok=True)
+
+    # Generate responses
+    print(f"Generating {args.num_samples} fashion conversation samples...")
+    all_data = []
+    for _ in tqdm(range(args.num_samples)):
+        # Select a random query
+        query = np.random.choice(fashion_queries)
+
+        # Format the prompt
+        prompt = f"{system_prompt}\n\nUser: {query}\nAssistant:"
+
+        # Generate response
+        inputs = tokenizer(prompt, return_tensors="pt")
+        outputs = model.generate(
+            inputs.input_ids,
+            max_new_tokens=1024,
+            temperature=0.7,
+            top_p=0.9,
+            do_sample=True
+        )
+        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+
+        # Extract the assistant's response
+        try:
+            assistant_response = response.split("Assistant:")[1].strip()
+        except IndexError:
+            assistant_response = response.replace(prompt, "").strip()
+
+        # Store the data
+        data = {
+            "text": query,
+            "response": assistant_response
+        }
+        all_data.append(data)
+
+    # Save the dataset
+    with open(args.output_path, 'w') as f:
+        for item in all_data:
+            f.write(json.dumps(item) + '\n')
+
+    print(f"Dataset generation complete. Saved to {args.output_path}")
+    print(f"Generated {len(all_data)} samples")
+
+if __name__ == "__main__":
+    main()
diff --git a/recipes/DeepSeek-R1-Fashion/README.md b/recipes/DeepSeek-R1-Fashion/README.md
@@ -0,0 +1,109 @@
+# DeepSeek-R1-Fashion
+
+This recipe provides configuration files and instructions for training a fashion-specialized version of DeepSeek-R1. The model is fine-tuned to provide high-quality fashion advice, outfit recommendations, and style guidance.
+
+## Training Process
+
+The training process consists of two main steps:
+
+1. **Supervised Fine-Tuning (SFT)**: Fine-tune the base DeepSeek-R1 model on a fashion dataset
+2. **Group Relative Policy Optimization (GRPO)**: Further refine the model with reinforcement learning
+
+## Data Preparation
+
+Before training, you need to prepare a fashion dataset. You can use the provided script to generate synthetic fashion conversations:
+
+```bash
+python recipes/DeepSeek-R1-Fashion/generate_fashion_dataset.py --output-path data/fashion-dataset --num-samples 10000
+```
+
+For the GRPO phase, you'll need query data:
+
+```bash
+# Create a directory for fashion queries
+mkdir -p data/fashion-queries-dataset
+
+# Example of creating a simple query dataset
+python -c "
+from datasets import Dataset
+import json
+
+queries = [
+    'What should I wear to a summer wedding?',
+    'How do I style a denim jacket?',
+    'What are the current fashion trends?',
+    # Add more fashion queries here
+]
+
+ds = Dataset.from_dict({'query': queries})
+ds.to_json('data/fashion-queries-dataset/fashion_queries.jsonl')
+"
+```
+
+## Training Commands
+
+### 1. Supervised Fine-Tuning (SFT)
+
+```bash
+ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/zero3.yaml \
+    src/open_r1/sft.py \
+    --config recipes/DeepSeek-R1-Fashion/sft/config_fashion.yaml
+```
+
+### 2. GRPO Training
+
+```bash
+ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/zero2.yaml \
+    --num_processes=7 src/open_r1/grpo.py \
+    --config recipes/DeepSeek-R1-Fashion/grpo/config_fashion.yaml
+```
+
+## Evaluation
+
+After training, evaluate your fashion model using:
+
+```bash
+MODEL=your-username/DeepSeek-R1-Fashion
+MODEL_ARGS="pretrained=$MODEL,dtype=bfloat16,max_model_length=32768,gpu_memory_utilization=0.8,generation_parameters={max_new_tokens:32768,temperature:0.6,top_p:0.95}"
+OUTPUT_DIR=data/evals/$MODEL
+
+# Fashion style evaluation
+TASK=fashion_style
+lighteval vllm $MODEL_ARGS "custom|$TASK|0|0" \
+    --custom-tasks src/open_r1/evaluate.py \
+    --use-chat-template \
+    --output-dir $OUTPUT_DIR
+```
+
+## Configuration Details
+
+### SFT Configuration
+
+The SFT configuration (`config_fashion.yaml`) uses the following key settings:
+
+- Base model: DeepSeek-R1
+- Learning rate: 5e-5
+- Training epochs: 1
+- Max sequence length: 16384
+- Batch size: 16
+
+### GRPO Configuration
+
+The GRPO configuration includes:
+
+- Base model: Your SFT-trained fashion model
+- Learning rate: 1e-6
+- Reward functions:
+  - accuracy: Checks factual correctness
+  - format: Ensures proper output formatting
+  - tag_count: Maintains proper usage of think/answer tags
+  - fashion_relevance: Custom reward for fashion-specific quality
+
+## Customization
+
+You can customize the configurations by:
+
+1. Adjusting training parameters in the config files
+2. Modifying the system prompt to better match your fashion use case
+3. Using different reward weights in the GRPO phase
+4. Adding custom reward functions for fashion-specific evaluation
diff --git a/recipes/DeepSeek-R1-Fashion/accelerate_config.yaml b/recipes/DeepSeek-R1-Fashion/accelerate_config.yaml
@@ -0,0 +1,16 @@
+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: NO
+downcast_bf16: 'no'
+gpu_ids: "0"
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 1
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false