opendilab
diff --git a/‎ding/entry/tests/test_serial_entry.py
Lines changed: 19 additions & 0 deletions b/‎ding/entry/tests/test_serial_entry.py
Lines changed: 19 additions & 0 deletions
diff --git a/‎ding/entry/tests/test_serial_entry_algo.py
Lines changed: 23 additions & 0 deletions b/‎ding/entry/tests/test_serial_entry_algo.py
Lines changed: 23 additions & 0 deletions
diff --git a/‎ding/league/base_league.py
Lines changed: 11 additions & 11 deletions b/‎ding/league/base_league.py
Lines changed: 11 additions & 11 deletions
diff --git a/‎ding/league/player.py
Lines changed: 1 addition & 1 deletion b/‎ding/league/player.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎ding/league/shared_payoff.py
Lines changed: 27 additions & 6 deletions b/‎ding/league/shared_payoff.py
Lines changed: 27 additions & 6 deletions
diff --git a/‎ding/league/tests/test_one_vs_one_league.py
Lines changed: 1 addition & 2 deletions b/‎ding/league/tests/test_one_vs_one_league.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎ding/worker/collector/__init__.py
Lines changed: 2 additions & 0 deletions b/‎ding/worker/collector/__init__.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎ding/worker/collector/base_serial_collector.py
Lines changed: 1 addition & 1 deletion b/‎ding/worker/collector/base_serial_collector.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎ding/worker/collector/comm/flask_fs_collector.py
Lines changed: 2 additions & 1 deletion b/‎ding/worker/collector/comm/flask_fs_collector.py
Lines changed: 2 additions & 1 deletion
@@ -28,6 +28,9 @@
 from dizoo.multiagent_particle.config import cooperative_navigation_coma_config, cooperative_navigation_coma_create_config  # noqa
 from dizoo.multiagent_particle.config import cooperative_navigation_collaq_config, cooperative_navigation_collaq_create_config  # noqa
 from dizoo.multiagent_particle.config import cooperative_navigation_atoc_config, cooperative_navigation_atoc_create_config  # noqa
+from dizoo.league_demo.league_demo_ppo_config import league_demo_ppo_config
+from dizoo.league_demo.selfplay_demo_ppo_main import main as selfplay_main
+from dizoo.league_demo.league_demo_ppo_main import main as league_main
 
 
 @pytest.mark.unittest
@@ -254,6 +257,22 @@ def test_sqn():
         os.popen('rm -rf log ckpt*')
 
 
+@pytest.mark.unittest
+def test_selfplay():
+    try:
+        selfplay_main(deepcopy(league_demo_ppo_config), seed=0, max_iterations=1)
+    except Exception:
+        assert False, "pipeline fail"
+
+
+@pytest.mark.unittest
+def test_league():
+    try:
+        league_main(deepcopy(league_demo_ppo_config), seed=0, max_iterations=1)
+    except Exception as e:
+        assert False, "pipeline fail"
+
+
 @pytest.mark.unittest
 def test_acer():
     config = [deepcopy(cartpole_acer_config), deepcopy(cartpole_acer_create_config)]
 
@@ -28,6 +28,9 @@
 from dizoo.multiagent_particle.config import cooperative_navigation_coma_config, cooperative_navigation_coma_create_config  # noqa
 from dizoo.multiagent_particle.config import cooperative_navigation_collaq_config, cooperative_navigation_collaq_create_config  # noqa
 from dizoo.multiagent_particle.config import cooperative_navigation_atoc_config, cooperative_navigation_atoc_create_config  # noqa
+from dizoo.league_demo.league_demo_ppo_config import league_demo_ppo_config
+from dizoo.league_demo.selfplay_demo_ppo_main import main as selfplay_main
+from dizoo.league_demo.league_demo_ppo_main import main as league_main
 
 with open("./algo_record.log", "w+") as f:
     f.write("ALGO TEST STARTS\n")
@@ -274,3 +277,23 @@ def test_acer():
         assert False, "pipeline fail"
     with open("./algo_record.log", "a+") as f:
         f.write("22. acer\n")
+
+
+@pytest.mark.algotest
+def test_selfplay():
+    try:
+        selfplay_main(league_demo_ppo_config, seed=0)
+    except Exception:
+        assert False, "pipeline fail"
+    with open("./algo_record.log", "a+") as f:
+        f.write("23. selfplay\n")
+
+
+@pytest.mark.algotest
+def test_league():
+    try:
+        league_main(league_demo_ppo_config, seed=0)
+    except Exception:
+        assert False, "pipeline fail"
+    with open("./algo_record.log", "a+") as f:
+        f.write("24. league\n")
@@ -1,5 +1,6 @@
 import uuid
 import copy
+import os
 from abc import abstractmethod
 from easydict import EasyDict
 import os.path as osp
@@ -35,6 +36,9 @@ def __init__(self, cfg: EasyDict) -> None:
         """
         self.cfg = cfg
         self.path_policy = cfg.path_policy
+        if not osp.exists(self.path_policy):
+            os.mkdir(self.path_policy)
+
         self.league_uid = str(uuid.uuid1())
         self.active_players = []
         self.historical_players = []
@@ -52,13 +56,11 @@ def _init_players(self) -> None:
         for cate in self.cfg.player_category:  # Player's category (Depends on the env)
             for k, n in self.cfg.active_players.items():  # Active player's type
                 for i in range(n):  # This type's active player number
-                    name = '{}_{}_{}_{}'.format(k, cate, i, self.league_uid)
-                    ckpt_path = '{}_ckpt.pth'.format(name)
+                    name = '{}_{}_{}'.format(k, cate, i)
+                    ckpt_path = osp.join(self.path_policy, '{}_ckpt.pth'.format(name))
                     player = create_player(self.cfg, k, self.cfg[k], cate, self.payoff, ckpt_path, name, 0)
                     if self.cfg.use_pretrain:
-                        self.save_checkpoint(
-                            self.cfg.pretrain_checkpoint_path[cate], osp.join(self.path_policy, player.checkpoint_path)
-                        )
+                        self.save_checkpoint(self.cfg.pretrain_checkpoint_path[cate], ckpt_path)
                     self.active_players.append(player)
                     self.payoff.add_player(player)
 
@@ -68,7 +70,7 @@ def _init_players(self) -> None:
                 main_player_name = [k for k in self.cfg.keys() if 'main_player' in k]
                 assert len(main_player_name) == 1, main_player_name
                 main_player_name = main_player_name[0]
-                name = '{}_{}_0_pretrain'.format(main_player_name, cate)
+                name = '{}_{}_0_pretrain_historical'.format(main_player_name, cate)
                 parent_name = '{}_{}_0'.format(main_player_name, cate)
                 hp = HistoricalPlayer(
                     self.cfg.get(main_player_name),
@@ -122,7 +124,7 @@ def _get_job_info(self, player: ActivePlayer, eval_flag: bool = False) -> dict:
         """
         raise NotImplementedError
 
-    def judge_snapshot(self, player_id: str) -> bool:
+    def judge_snapshot(self, player_id: str, force: bool = False) -> bool:
         """
         Overview:
             Judge whether a player is trained enough for snapshot. If yes, call player's ``snapshot``, create a
@@ -136,12 +138,10 @@ def judge_snapshot(self, player_id: str) -> bool:
         with self._active_players_lock:
             idx = self.active_players_ids.index(player_id)
             player = self.active_players[idx]
-            if player.is_trained_enough():
+            if force or player.is_trained_enough():
                 # Snapshot
                 hp = player.snapshot()
-                self.save_checkpoint(
-                    osp.join(self.path_policy, player.checkpoint_path), osp.join(self.path_policy, hp.checkpoint_path)
-                )
+                self.save_checkpoint(player.checkpoint_path, hp.checkpoint_path)
                 self.historical_players.append(hp)
                 self.payoff.add_player(hp)
                 # Mutate
 
@@ -185,7 +185,7 @@ def snapshot(self) -> HistoricalPlayer:
             self.category,
             self.payoff,
             path,
-            self.player_id + '_{}'.format(int(self._total_agent_step)),
+            self.player_id + '_{}_historical'.format(int(self._total_agent_step)),
             self._total_agent_step,
             parent_id=self.player_id
         )
 
@@ -2,6 +2,7 @@
 from collections import defaultdict
 from typing import Tuple, Optional
 from easydict import EasyDict
+from tabulate import tabulate
 import numpy as np
 
 from ding.utils import LockContext, LockContextType
@@ -76,6 +77,23 @@ def __init__(self, cfg: EasyDict):
         # Thread lock.
         self._lock = LockContext(type_=LockContextType.THREAD_LOCK)
 
+    def __repr__(self) -> str:
+        headers = ["Home Player", "Away Player", "Wins", "Draws", "Losses", "Naive Win Rate"]
+        data = []
+        for k, v in self._data.items():
+            k1 = k.split('-')
+            # k is the format of '{}-{}'.format(name1, name2), and each HistoricalPlayer has `historical` suffix
+            if 'historical' in k1[0]:
+                # reverse representation
+                naive_win_rate = (v['losses'] + v['draws'] / 2) / (v['wins'] + v['losses'] + v['draws'] + 1e-8)
+                data.append([k1[1], k1[0], v['losses'], v['draws'], v['wins'], naive_win_rate])
+            else:
+                naive_win_rate = (v['wins'] + v['draws'] / 2) / (v['wins'] + v['losses'] + v['draws'] + 1e-8)
+                data.append([k1[0], k1[1], v['wins'], v['draws'], v['losses'], naive_win_rate])
+        data = sorted(data, key=lambda x: x[0])
+        s = tabulate(data, headers=headers, tablefmt='grid')
+        return s
+
     def __getitem__(self, players: tuple) -> np.ndarray:
         """
         Overview:
@@ -172,18 +190,21 @@ def _win_loss_reverse(result_: str, reverse_: bool) -> str:
 
         with self._lock:
             home_id, away_id = job_info['player_id']
+            job_info_result = job_info['result']
+            # for compatibility of one-layer list
+            if not isinstance(job_info_result[0], list):
+                job_info_result = [job_info_result]
             try:
-                assert home_id in self._players_ids
-                assert away_id in self._players_ids
+                assert home_id in self._players_ids, "home_id error"
+                assert away_id in self._players_ids, "away_id error"
                 # Assert all results are in ['wins', 'losses', 'draws']
-                assert all([i in BattleRecordDict.data_keys[:3] for j in job_info['result'] for i in j])
+                assert all([i in BattleRecordDict.data_keys[:3] for j in job_info_result for i in j]), "results error"
             except Exception as e:
-                print("[ERROR] invalid job_info: {}".format(job_info))
-                print(e)
+                print("[ERROR] invalid job_info: {}\n\tError reason is: {}".format(job_info, e))
                 return False
             key, reverse = self.get_key(home_id, away_id)
             # Update with decay
-            for j in job_info['result']:
+            for j in job_info_result:
                 for i in j:
                     # All categories should decay
                     self._data[key] *= self._decay
 
@@ -66,8 +66,7 @@ def test_naive(self):
         active_player_ckpt = league.active_players[0].checkpoint_path
         tmp = torch.tensor([1, 2, 3])
         path_policy = one_vs_one_league_default_config.league.path_policy
-        os.makedirs(path_policy)
-        torch.save(tmp, os.path.join(path_policy, active_player_ckpt))
+        torch.save(tmp, active_player_ckpt)
 
         # judge_snapshot & update_active_player
         assert not league.judge_snapshot(active_player_id)
 
@@ -3,7 +3,9 @@
     to_tensor_transitions
 from .sample_serial_collector import SampleCollector
 from .episode_serial_collector import EpisodeCollector
+from .episode_one_vs_one_serial_collector import Episode1v1Collector
 from .base_serial_evaluator import BaseSerialEvaluator
+from .one_vs_one_serial_evaluator import OnevOneEvaluator
 # parallel
 from .base_parallel_collector import BaseCollector, create_parallel_collector, get_parallel_collector_cls
 from .zergling_collector import ZerglingCollector
 
@@ -177,7 +177,7 @@ def __init__(self, maxlen: int, *args, **kwargs) -> None:
         Overview:
             Initialization trajBuffer.
         Arguments:
-            - maxlen (:obj:`int`): the max len of trajBuffer
+            - maxlen (:obj:`int`): The maximum length of trajectory buffer.
         """
         self._maxlen = maxlen
         super().__init__(*args, **kwargs)
 
@@ -161,7 +161,8 @@ def get_policy_update_info(self, path: str) -> dict:
         """
         if self._collector_close_flag:
             return
-        path = os.path.join(self._path_policy, path)
+        if self._path_policy not in path:
+            path = os.path.join(self._path_policy, path)
         return read_file(path, use_lock=True)
 
     # override
Original file line number	Diff line number	Diff line change
`@@ -185,7 +185,7 @@ def snapshot(self) -> HistoricalPlayer:`
`185`	`185`	`self.category,`
`186`	`186`	`self.payoff,`
`187`	`187`	`path,`
`188`		`- self.player_id + '_{}'.format(int(self._total_agent_step)),`
	`188`	`+ self.player_id + '_{}_historical'.format(int(self._total_agent_step)),`
`189`	`189`	`self._total_agent_step,`
`190`	`190`	`parent_id=self.player_id`
`191`	`191`	`)`