Skip to content

训练麻将打时候报错,看起来像是在已经 #322

@acracker

Description

@acracker

看起来像是dealer.deck中已经没有牌了, 但是还会执行step, 我看了一下代码,也不知道如何退出。

err:

C:\Users\pl\MiniConda3\python.exe D:/code/mahjong/t2.py

Logs saved in experiments/leduc_holdem_cfr_result/
Traceback (most recent call last):
  File "D:/code/mahjong/t2.py", line 131, in <module>
    train(args)
  File "D:/code/mahjong/t2.py", line 65, in train
    agent.train()
  File "C:\Users\pl\MiniConda3\lib\site-packages\rlcard\agents\cfr_agent.py", line 41, in train
    self.traverse_tree(probs, player_id)
  File "C:\Users\pl\MiniConda3\lib\site-packages\rlcard\agents\cfr_agent.py", line 73, in traverse_tree
    utility = self.traverse_tree(new_probs, player_id)
  File "C:\Users\pl\MiniConda3\lib\site-packages\rlcard\agents\cfr_agent.py", line 73, in traverse_tree
    utility = self.traverse_tree(new_probs, player_id)
  File "C:\Users\pl\MiniConda3\lib\site-packages\rlcard\agents\cfr_agent.py", line 73, in traverse_tree
    utility = self.traverse_tree(new_probs, player_id)
  [Previous line repeated 86 more times]
  File "C:\Users\pl\MiniConda3\lib\site-packages\rlcard\agents\cfr_agent.py", line 72, in traverse_tree
    self.env.step(action)
  File "C:\Users\pl\MiniConda3\lib\site-packages\rlcard\envs\env.py", line 84, in step
    next_state, player_id = self.game.step(action)
  File "C:\Users\pl\MiniConda3\lib\site-packages\rlcard\games\mahjong\game.py", line 68, in step
    self.round.proceed_round(self.players, action)
  File "C:\Users\pl\MiniConda3\lib\site-packages\rlcard\games\mahjong\round.py", line 78, in proceed_round
    self.dealer.deal_cards(players[self.current_player], 1)
  File "C:\Users\pl\MiniConda3\lib\site-packages\rlcard\games\mahjong\dealer.py", line 26, in deal_cards
    player.hand.append(self.deck.pop())
IndexError: pop from empty list

code:

# -*- coding: utf-8 -*-
import os
import logging
import argparse

import rlcard
from rlcard.agents import (
    CFRAgent,
    RandomAgent,
)
from rlcard.utils import (
    set_seed,
    tournament,
    Logger,
    plot_curve,
)


def train(args):
    # Make environments, CFR only supports Leduc Holdem
    env = rlcard.make(
        args.env,
        config={
            'seed': 0,
            'allow_step_back': True,
        }
    )
    eval_env = rlcard.make(
        'leduc-holdem',
        config={
            'seed': 0,
        }
    )

    # Seed numpy, torch, random
    set_seed(args.seed)

    # Initilize CFR Agent
    agent = CFRAgent(
        env,
        os.path.join(
            args.log_dir,
            'cfr_model',
            args.env,
        ),
    )
    agent.load()  # If we have saved model, we first load the model

    # Evaluate CFR against random
    eval_env.set_agents([
        agent,
        RandomAgent(num_actions=env.num_actions),
    ])

    # Start training
    with Logger(args.log_dir) as logger:
        for episode in range(args.num_episodes):
            agent.train()
            print('\rIteration {}'.format(episode), end='')
            # Evaluate the performance. Play with Random agents.
            if episode % args.evaluate_every == 0:
                agent.save() # Save model
                logger.log_performance(
                    episode,
                    tournament(
                        eval_env,
                        args.num_eval_games
                    )[0]
                )

        # Get the paths
        csv_path, fig_path = logger.csv_path, logger.fig_path
    # Plot the learning curve
    plot_curve(csv_path, fig_path, 'cfr')


if __name__ == '__main__':
    parser = argparse.ArgumentParser("CFR example in RLCard")
    parser.add_argument(
        '--env',
        type=str,
        default='mahjong',
        choices=[
            'blackjack',
            'leduc-holdem',
            'limit-holdem',
            'doudizhu',
            'mahjong',
            'no-limit-holdem',
            'uno',
            'gin-rummy',
            'bridge',
        ],
    )

    parser.add_argument(
        '--seed',
        type=int,
        default=42,
    )
    parser.add_argument(
        '--num_episodes',
        type=int,
        default=5000,
    )
    parser.add_argument(
        '--num_eval_games',
        type=int,
        default=2000,
    )
    parser.add_argument(
        '--evaluate_every',
        type=int,
        default=100,
    )
    parser.add_argument(
        '--log_dir',
        type=str,
        default='experiments/leduc_holdem_cfr_result/',
    )

    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG)
    train(args)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions