2048-RLbot/test_enhanced.py at master · BAJimH/2048-RLbot · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
测试强化版DQN模型在2048游戏中的性能
"""

import torch
import numpy as np
import gymnasium as gym
import gym_2048
import argparse
import time
from interface import DQNInterface
from utils import load_model_path_by_args
import matplotlib.pyplot as plt


def play_game(model, env, num_episodes=10, render=False, delay=0.1):
    """
    使用训练好的模型玩多局游戏并记录结果
    """
    rewards = []
    max_tiles = []

    for episode in range(num_episodes):
        state, _ = env.reset()
        done = False
        total_reward = 0

        if render:
            print(f"开始第 {episode+1}/{num_episodes} 局游戏")
            env.render()
            time.sleep(delay)

        while not done:
            # 使用模型选择行动
            with torch.no_grad():
                state_tensor = torch.tensor(np.array(state)).unsqueeze(0).to(model.device)
                q_values = model.net(state_tensor)
                action = torch.max(q_values, dim=1)[1].item()

            # 执行行动
            next_state, reward, done, truncated, info = env.step(action)

            # 更新状态和奖励
            state = next_state
            total_reward += reward

            if render:
                env.render()
                print(f"动作: {action}, 奖励: {reward}, 总奖励: {total_reward}")
                time.sleep(delay)

        # 记录结果
        rewards.append(total_reward)
        max_tile = np.max(state)
        max_tiles.append(max_tile)

        if render:
            print(f"游戏结束! 总奖励: {total_reward}, 最大块: {max_tile}")
            print("-" * 50)

    return rewards, max_tiles


def evaluate_models(model_paths, num_episodes=10, render=False):
    """
    评估多个模型并比较它们的性能
    """
    results = {}

    for model_name, path in model_paths.items():
        print(f"评估模型: {model_name}")

        # 加载模型
        model = DQNInterface.load_from_checkpoint(path)
        model.eval()

        # 创建环境
        env = gym.make(model.hparams.env_name, render_mode="human" if render else None)

        # 评估模型
        rewards, max_tiles = play_game(model, env, num_episodes, render)

        # 记录结果
        results[model_name] = {
            "rewards": rewards,
            "max_tiles": max_tiles,
            "avg_reward": np.mean(rewards),
            "max_reward": np.max(rewards),
            "avg_max_tile": np.mean(max_tiles),
            "max_max_tile": np.max(max_tiles)
        }

        print(f"平均奖励: {results[model_name]['avg_reward']:.2f}")
        print(f"最大奖励: {results[model_name]['max_reward']:.2f}")
        print(f"平均最大块: {results[model_name]['avg_max_tile']:.2f}")
        print(f"游戏中最大块: {results[model_name]['max_max_tile']}")
        print("-" * 50)

    return results


def plot_results(results):
    """
    绘制评估结果的图表
    """
    plt.figure(figsize=(15, 10))

    # 绘制奖励分布
    plt.subplot(2, 2, 1)
    for model_name, data in results.items():
        plt.hist(data["rewards"], alpha=0.7, label=model_name)
    plt.title("奖励分布")
    plt.xlabel("奖励")
    plt.ylabel("频率")
    plt.legend()

    # 绘制平均奖励
    plt.subplot(2, 2, 2)
    model_names = list(results.keys())
    avg_rewards = [results[model]["avg_reward"] for model in model_names]
    plt.bar(model_names, avg_rewards)
    plt.title("平均奖励")
    plt.xlabel("模型")
    plt.ylabel("平均奖励")

    # 绘制最大块分布
    plt.subplot(2, 2, 3)
    for model_name, data in results.items():
        plt.hist(data["max_tiles"], alpha=0.7, label=model_name)
    plt.title("最大块分布")
    plt.xlabel("最大块值")
    plt.ylabel("频率")
    plt.legend()

    # 绘制平均最大块
    plt.subplot(2, 2, 4)
    avg_max_tiles = [results[model]["avg_max_tile"] for model in model_names]
    plt.bar(model_names, avg_max_tiles)
    plt.title("平均最大块")
    plt.xlabel("模型")
    plt.ylabel("平均最大块值")

    plt.tight_layout()
    plt.savefig("model_comparison.png")
    plt.show()


def main():
    # 定义要评估的模型路径
    model_paths = {
        "DQN_Standard": args.model_path,
    }

    # 如果提供了比较模型，则添加到评估列表
    if args.compare_model_path:
        model_paths["DQN_Enhanced"] = args.compare_model_path

    # 评估模型
    results = evaluate_models(model_paths, args.num_episodes, args.render)

    # 如果评估多个模型，则绘制比较图表
    if len(model_paths) > 1 and not args.no_plot:
        plot_results(results)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="测试DQN模型在2048游戏中的性能")

    parser.add_argument(
        "--model_path",
        type=str,
        required=True,
        help="模型检查点的路径",
    )

    parser.add_argument(
        "--compare_model_path",
        type=str,
        default=None,
        help="用于比较的另一个模型检查点的路径",
    )

    parser.add_argument(
        "--num_episodes",
        type=int,
        default=10,
        help="评估的游戏局数",
    )

    parser.add_argument(
        "--render",
        action="store_true",
        help="是否渲染游戏过程",
    )

    parser.add_argument(
        "--no_plot",
        action="store_true",
        help="不绘制比较图表",
    )

    args = parser.parse_args()
    main()