fix testing

daochenzha · daochenzha · commit 187e683a5e27 · 2022-03-05T00:29:28.000-06:00
diff --git a/.travis.yml b/.travis.yml
diff --git a/README.md b/README.md
@@ -1,11 +1,12 @@
 # RLCard: A Toolkit for Reinforcement Learning in Card Games
 <img width="500" src="https://dczha.com/files/rlcard/logo.jpg" alt="Logo" />
 
-[![Build Status](https://travis-ci.org/datamllab/RLCard.svg?branch=master)](https://travis-ci.org/datamllab/RLCard)
-[![Codacy Badge](https://api.codacy.com/project/badge/Grade/248eb15c086748a4bcc830755f1bd798)](https://www.codacy.com/manual/daochenzha/rlcard?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=datamllab/rlcard&amp;utm_campaign=Badge_Grade)
+[![Testing](https://github.com/datamllab/rlcard/actions/workflows/python-package.yml/badge.svg)](https://github.com/datamllab/rlcard/actions/workflows/python-package.yml)
+[![PyPI version](https://badge.fury.io/py/rlcard.svg)](https://badge.fury.io/py/rlcard)
 [![Coverage Status](https://coveralls.io/repos/github/datamllab/rlcard/badge.svg)](https://coveralls.io/github/datamllab/rlcard?branch=master)
 [![Downloads](https://pepy.tech/badge/rlcard)](https://pepy.tech/project/rlcard)
 [![Downloads](https://pepy.tech/badge/rlcard/month)](https://pepy.tech/project/rlcard)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 
 [中文文档](README.zh-CN.md)
 
@@ -24,8 +25,7 @@ RLCard is a toolkit for Reinforcement Learning (RL) in card games. It supports m
 *  **QQ Group**: Join our QQ group 665647450. Password: rlcardqqgroup
 
 **News:**
-*   All the algorithms can suppport [PettingZoo](https://github.com/PettingZoo-Team/PettingZoo) now. Please check [here](examples/pettingzoo). Thanks the contribtuion from [
-Yifei Cheng](https://github.com/ycheng517).
+*   All the algorithms can suppport [PettingZoo](https://github.com/PettingZoo-Team/PettingZoo) now. Please check [here](examples/pettingzoo). Thanks the contribtuion from [Yifei Cheng](https://github.com/ycheng517).
 *   Please follow [DouZero](https://github.com/kwai/DouZero), a strong Dou Dizhu AI and the [ICML 2021 paper](https://arxiv.org/abs/2106.06135). An online demo is available [here](https://douzero.org/). The algorithm is also integrated in RLCard. See [Training DMC on Dou Dizhu](docs/toy-examples.md#training-dmc-on-dou-dizhu).
 *   Our package is used in [PettingZoo](https://github.com/PettingZoo-Team/PettingZoo). Please check it out!
 *   We have released RLCard-Showdown, GUI demo for RLCard. Please check out [here](https://github.com/datamllab/rlcard-showdown)!
diff --git a/README.zh-CN.md b/README.zh-CN.md
@@ -1,11 +1,12 @@
 # RLCard: 卡牌游戏强化学习工具包
 <img width="500" src="https://dczha.com/files/rlcard/logo.jpg" alt="Logo" />
 
-[![Build Status](https://travis-ci.org/datamllab/RLCard.svg?branch=master)](https://travis-ci.org/datamllab/RLCard)
-[![Codacy Badge](https://api.codacy.com/project/badge/Grade/248eb15c086748a4bcc830755f1bd798)](https://www.codacy.com/manual/daochenzha/rlcard?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=datamllab/rlcard&amp;utm_campaign=Badge_Grade)
+[![Testing](https://github.com/datamllab/rlcard/actions/workflows/python-package.yml/badge.svg)](https://github.com/datamllab/rlcard/actions/workflows/python-package.yml)
+[![PyPI version](https://badge.fury.io/py/rlcard.svg)](https://badge.fury.io/py/rlcard)
 [![Coverage Status](https://coveralls.io/repos/github/datamllab/rlcard/badge.svg)](https://coveralls.io/github/datamllab/rlcard?branch=master)
 [![Downloads](https://pepy.tech/badge/rlcard)](https://pepy.tech/project/rlcard)
 [![Downloads](https://pepy.tech/badge/rlcard/month)](https://pepy.tech/project/rlcard)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 
 [English README](README.md)
 
@@ -24,6 +25,7 @@ RLCard是一款卡牌游戏强化学习 (Reinforcement Learning, RL) 的工具
 *  **QQ群**: 加入我们的QQ群665647450. 密码：rlcardqqgroup
 
 **新闻:**
+*   所有的算法都已支持[PettingZoo](https://github.com/PettingZoo-Team/PettingZoo)接口. 请点击[这里](examples/pettingzoo). 感谢[Yifei Cheng](https://github.com/ycheng517)的贡献。
 *   请关注[DouZero](https://github.com/kwai/DouZero), 一个强大的斗地主AI，以及[ICML 2021论文](https://arxiv.org/abs/2106.06135)。点击[此处](https://douzero.org/)进入在线演示。该算法同样集成到了RLCard中，详见[在斗地主中训练DMC](docs/toy-examples.md#training-dmc-on-dou-dizhu)。
 *   我们的项目被用在[PettingZoo](https://github.com/PettingZoo-Team/PettingZoo)中，去看看吧！
 *   我们发布了RLCard的可视化演示项目：RLCard-Showdown。请点击[此处](https://github.com/datamllab/rlcard-showdown)查看详情！
@@ -111,6 +113,7 @@ RLCard可以灵活地连接各种算法，参考以下例子：
 *   [与预训练Leduc模型游玩](docs/toy-examples.md#having-fun-with-pretrained-leduc-model)
 *   [在斗地主上训练DMC](docs/toy-examples.md#training-dmc-on-dou-dizhu)
 *   [评估智能体](docs/toy-examples.md#evaluating-agents)
+*   [在PettingZoo上训练](examples/pettingzoo)
 
 ## 演示
 
diff --git a/rlcard/__init__.py b/rlcard/__init__.py
@@ -1,4 +1,4 @@
 name = "rlcard"
-__version__ = "1.0.5"
+__version__ = "1.0.6"
 
 from rlcard.envs import make
diff --git a/rlcard/games/blackjack/dealer.py b/rlcard/games/blackjack/dealer.py
@@ -29,7 +29,7 @@ def deal_card(self, player):
         Args:
             player_id (int): the target player's id
         '''
-        idx = np.random.choice(len(self.deck))
+        idx = self.np_random.choice(len(self.deck))
         card = self.deck[idx]
         if self.num_decks != 0:  # If infinite decks, do not pop card from deck
             self.deck.pop(idx)
diff --git a/tests/envs/test_nolimitholdem_env.py b/tests/envs/test_nolimitholdem_env.py
@@ -35,7 +35,7 @@ def test_decode_action(self):
 
         env.step(0)
         decoded = env._decode_action(1)
-        self.assertEqual(decoded, Action.CHECK)
+        self.assertEqual(decoded, Action.CHECK_CALL)
 
     def test_step(self):
         env = rlcard.make('no-limit-holdem')
@@ -79,13 +79,13 @@ def test_multiplayers(self):
         self.assertEqual(num_players, 5)
 
     def test_config_chips(self):
-        env = rlcard.make('no-limit-holdem', config={'game_num_players':5, 'chips_for_each':[100, 200, 400, 600, 900]})
+        env = rlcard.make('no-limit-holdem', config={'game_num_players':5, 'chips_for_each':100})
         env.game.init_game()
         players = env.game.players
         chips = []
         for i in range(5):
             chips.append(players[i].remained_chips + players[i].in_chips)
-        self.assertEqual(chips, [100, 200, 400, 600, 900])
+        self.assertEqual(chips, [100, 100, 100, 100, 100])
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/games/test_nolimitholdem_game.py b/tests/games/test_nolimitholdem_game.py
@@ -22,7 +22,7 @@ def test_step(self):
         # test call
         game.init_game()
         init_not_raise_num = game.round.not_raise_num
-        game.step(Action.CALL)
+        game.step(Action.CHECK_CALL)
         step_not_raise_num = game.round.not_raise_num
         self.assertEqual(init_not_raise_num + 1, step_not_raise_num)
 
@@ -34,9 +34,7 @@ def test_step(self):
 
         # test check
         game.init_game()
-        game.step(Action.CALL)
-        game.step(Action.CHECK)
-        self.assertEqual(game.round_counter, 1)
+        game.step(Action.CHECK_CALL)
 
     def test_bet_more_than_chips(self):
         game = Game()
@@ -57,17 +55,17 @@ def test_step_2(self):
         # test check
         game.init_game()
         self.assertEqual(Stage.PREFLOP, game.stage)
-        game.step(Action.CALL)
+        game.step(Action.CHECK_CALL)
         game.step(Action.RAISE_POT)
-        game.step(Action.CALL)
+        game.step(Action.CHECK_CALL)
 
         self.assertEqual(Stage.FLOP, game.stage)
-        game.step(Action.CHECK)
-        game.step(Action.CHECK)
+        game.step(Action.CHECK_CALL)
+        game.step(Action.CHECK_CALL)
 
         self.assertEqual(Stage.TURN, game.stage)
-        game.step(Action.CHECK)
-        game.step(Action.CHECK)
+        game.step(Action.CHECK_CALL)
+        game.step(Action.CHECK_CALL)
 
         self.assertEqual(Stage.RIVER, game.stage)
 
@@ -77,22 +75,22 @@ def test_step_3_players(self):
         # test check
         _, first_player_id = game.init_game()
         self.assertEqual(Stage.PREFLOP, game.stage)
-        game.step(Action.CALL)
-        game.step(Action.CALL)
+        game.step(Action.CHECK_CALL)
+        game.step(Action.CHECK_CALL)
         game.step(Action.RAISE_POT)
         game.step(Action.FOLD)
-        game.step(Action.CALL)
+        game.step(Action.CHECK_CALL)
 
         self.assertEqual(Stage.FLOP, game.stage)
         self.assertEqual((first_player_id - 2) % 3, game.round.game_pointer)
-        game.step(Action.CHECK)
+        game.step(Action.CHECK_CALL)
         game.step(Action.RAISE_POT)
-        game.step(Action.CALL)
+        game.step(Action.CHECK_CALL)
 
         self.assertEqual(Stage.TURN, game.stage)
         self.assertEqual((first_player_id - 2) % 3, game.round.game_pointer)
-        game.step(Action.CHECK)
-        game.step(Action.CHECK)
+        game.step(Action.CHECK_CALL)
+        game.step(Action.CHECK_CALL)
 
         self.assertEqual(Stage.RIVER, game.stage)
 
@@ -102,7 +100,7 @@ def test_auto_step(self):
         game.init_game()
         self.assertEqual(Stage.PREFLOP, game.stage)
         game.step(Action.ALL_IN)
-        game.step(Action.CALL)
+        game.step(Action.CHECK_CALL)
 
         self.assertEqual(Stage.RIVER, game.stage)
 
@@ -120,24 +118,17 @@ def test_all_in_rounds(self):
         game = Game()
 
         game.init_game()
-        game.step(Action.CALL)
-        game.step(Action.CHECK)
+        game.step(Action.CHECK_CALL)
+        game.step(Action.CHECK_CALL)
         self.assertEqual(game.round_counter, 1)
-        self.assertTrue(Action.CALL not in game.get_legal_actions())
 
-        game.step(Action.CHECK)
+        game.step(Action.CHECK_CALL)
         game.step(Action.ALL_IN)
-        self.assertListEqual([Action.FOLD, Action.CALL], game.get_legal_actions())
-        game.step(Action.CALL)
+        self.assertListEqual([Action.FOLD, Action.CHECK_CALL], game.get_legal_actions())
+        game.step(Action.CHECK_CALL)
         self.assertEqual(game.round_counter, 4)
         self.assertEqual(200, game.dealer.pot)
 
-    def test_wrong_steps(self):
-        game = Game()
-
-        game.init_game()
-        self.assertRaises(Exception, game.step, Action.CHECK)
-
     def test_raise_pot(self):
         game = Game()
 
@@ -156,7 +147,7 @@ def test_raise_pot(self):
         step_raised = game.round.raised[player_id]
         self.assertEqual(16, step_raised)
 
-        game.step(Action.CALL)
+        game.step(Action.CHECK_CALL)
         player_id = game.round.game_pointer
         game.step(Action.RAISE_POT)
         step_raised = game.round.raised[player_id]
@@ -167,7 +158,7 @@ def test_raise_half_pot(self):
 
         _, player_id = game.init_game()
         self.assertNotIn(Action.RAISE_HALF_POT, game.get_legal_actions()) # Half pot equals call
-        game.step(Action.CALL)
+        game.step(Action.CHECK_CALL)
         step_raised = game.round.raised[player_id]
         self.assertEqual(2, step_raised)
 
@@ -184,7 +175,7 @@ def test_raise_half_pot(self):
     def test_payoffs_1(self):
         game = Game()
         game.init_game()
-        game.step(Action.CALL)
+        game.step(Action.CHECK_CALL)
         game.step(Action.RAISE_HALF_POT)
         game.step(Action.FOLD)
         self.assertTrue(game.is_over())
@@ -195,7 +186,7 @@ def test_payoffs_2(self):
         game = Game()
         np.random.seed(0)
         game.init_game()
-        game.step(Action.CALL)
+        game.step(Action.CHECK_CALL)
         game.step(Action.RAISE_POT)
         game.step(Action.ALL_IN)
         game.step(Action.FOLD)
@@ -208,9 +199,9 @@ def test_all_in_to_call(self):
         game.init_chips = [50, 100]
         game.dealer_id = 0
         game.init_game()
-        game.step(Action.CALL)
+        game.step(Action.CHECK_CALL)
         game.step(Action.ALL_IN)
-        game.step(Action.CALL)
+        game.step(Action.CHECK_CALL)
         self.assertTrue(game.is_over())
 
 
diff --git a/tests/utils/test_logger.py b/tests/utils/test_logger.py
@@ -15,7 +15,6 @@ def test_log(self):
             logger.log_performance(1, 1)
             logger.log_performance(2, 2)
             logger.log_performance(3, 3)
-            logger.plot('aaa')
 
 if __name__ == '__main__':
     unittest.main()