Skip to content

实现全决策随机化降重 #54

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 17 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
68293da
将 bot 中的 获取立直牌和设置已立直标识符设置独立出来,供其他类调用
diverpet Jun 27, 2024
92a7239
mj_helper 新增判断吃消耗和碰类型的方法
diverpet Jun 27, 2024
ab565f3
重写 Automation 类部分方法逻辑,使得基本全决策可以随机化降重
diverpet Jun 27, 2024
d32ae09
game_state 可以查询上一条记录
diverpet Jun 27, 2024
7219e98
在确认操作后更改已立直标示物
diverpet Jun 27, 2024
feb1191
修正了 mjai_type 在随机化前后不一致导致的错误,添加 debug 日志打印
diverpet Jun 27, 2024
190d5f1
添加被遗忘的三麻拔北
diverpet Jun 27, 2024
8a5a276
添加 碰杠的 consumed 牌
diverpet Jun 28, 2024
f5461bf
修复在不存在备选项时会出错的问题,直接返回原 reaction
diverpet Jun 28, 2024
b82319c
实现立直时查询克隆 bot
diverpet Jun 28, 2024
b95d03c
修复返回值为 Nonetype 的问题
diverpet Jun 28, 2024
e26fff6
使用回滚法代替克隆法实现获取立直牌。删除多余的变量和方法
diverpet Jun 29, 2024
1ccc4d4
解决立直问题,在执行立直前不再尝试获取立直牌。实现立直决策的随机化
diverpet Jun 29, 2024
00994d6
修正会返回前局立直牌的错误
diverpet Jun 29, 2024
f737cd5
修正已立直标示变量清理逻辑
diverpet Jun 29, 2024
a5d6b6c
构建吃牌时默认使用红宝牌
diverpet Jun 30, 2024
0274738
当随机化选项结果为最高概率结果时,直接返回原 action
diverpet Jun 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 53 additions & 48 deletions bot/bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,69 +2,73 @@
implement wrappers for supportting different bot types
"""
import json
import time
from abc import ABC, abstractmethod

from common.log_helper import LOGGER
from common.mj_helper import meta_to_options, MjaiType
from common.utils import GameMode, BotNotSupportingMode


def reaction_convert_meta(reaction:dict, is_3p:bool=False):
def reaction_convert_meta(reaction: dict, is_3p: bool = False):
""" add meta_options to reaction """
if 'meta' in reaction:
meta = reaction['meta']
reaction['meta_options'] = meta_to_options(meta, is_3p)


class Bot(ABC):
""" Bot Interface class
bot follows mjai protocol
ref: https://mjai.app/docs/highlevel-api
Note: Reach msg has additional 'reach_dahai' key attached,
which is a 'dahai' msg, representing the subsequent dahai action after reach
"""

def __init__(self, name:str="Bot") -> None:
def __init__(self, name: str = "Bot") -> None:
self.name = name
self._initialized:bool = False
self.seat:int = None

self._initialized: bool = False
self.seat: int = None
self.mode = None
self.ignore_next_turn_self_reach: bool = False
self.reach_dahai:dict = None

@property
def supported_modes(self) -> list[GameMode]:
""" return suported game modes"""
return [GameMode.MJ4P]

@property
def info_str(self) -> str:
""" return description info"""
return self.name

def init_bot(self, seat:int, mode:GameMode=GameMode.MJ4P):
def init_bot(self, seat: int, mode: GameMode = GameMode.MJ4P):
""" Initialize the bot before the game starts. Bot must be initialized before a new game
params:
seat(int): Player seat index
mode(GameMode): Game mode, defaults to normal 4p mahjong"""
if mode not in self.supported_modes:
raise BotNotSupportingMode(mode)
self.seat = seat
self.mode = mode
self._init_bot_impl(mode)
self._initialized = True

@property
def initialized(self) -> bool:
""" return True if bot is initialized"""
return self._initialized

@abstractmethod
def _init_bot_impl(self, mode:GameMode=GameMode.MJ4P):
def _init_bot_impl(self, mode: GameMode = GameMode.MJ4P):
""" Initialize the bot before the game starts."""

@abstractmethod
def react(self, input_msg:dict) -> dict | None:
def react(self, input_msg: dict) -> dict | None:
""" input mjai msg and get bot output if any, or None if not"""

def react_batch(self, input_list:list[dict]) -> dict | None:
def react_batch(self, input_list: list[dict]) -> dict | None:
""" input list of mjai msg and get the last output, if any"""

# default implementation is to iterate and feed to bot
if len(input_list) == 0:
return None
Expand All @@ -73,32 +77,45 @@ def react_batch(self, input_list:list[dict]) -> dict | None:
self.react(msg)
last_reaction = self.react(input_list[-1])
return last_reaction

def log_game_result(self, mode_id: int, rank: int, score: int):
""" log game results"""
return

def get_reach_dahai(self) -> dict:
"""
get the reach_dahai message
Only call this method when it is reachable.
"""
self.generate_reach_dahai()
return self.reach_dahai


def generate_reach_dahai(self):
reach_msg = {'type': MjaiType.REACH, 'actor': self.seat}
reach_dahai_from_originalbot = self.react(reach_msg)
self.reach_dahai = reach_dahai_from_originalbot
LOGGER.debug(f"Generated and saved reach_dahai: {self.reach_dahai}")
self.ignore_next_turn_self_reach = True


class BotMjai(Bot):
""" base class for libriichi.mjai Bots"""
def __init__(self, name:str) -> None:

def __init__(self, name: str) -> None:
super().__init__(name)

self.mjai_bot = None
self.ignore_next_turn_self_reach:bool = False



@property
def info_str(self) -> str:
return f"{self.name}: [{','.join([m.value for m in self.supported_modes])}]"


def _get_engine(self, mode:GameMode):

def _get_engine(self, mode: GameMode):
# return MortalEngine object
raise NotImplementedError("Subclass must implement this method")


def _init_bot_impl(self, mode:GameMode=GameMode.MJ4P):

def _init_bot_impl(self, mode: GameMode = GameMode.MJ4P):
engine = self._get_engine(mode)
if not engine:
raise BotNotSupportingMode(mode)
Expand All @@ -112,35 +129,23 @@ def _init_bot_impl(self, mode:GameMode=GameMode.MJ4P):
import libriichi3p
self.mjai_bot = libriichi3p.mjai.Bot(engine, self.seat)
else:
raise BotNotSupportingMode(mode)
def react(self, input_msg:dict) -> dict:
raise BotNotSupportingMode(mode)

def react(self, input_msg: dict) -> dict:
msg_type = input_msg['type']
if self.mjai_bot is None:
return None
if self.ignore_next_turn_self_reach: # ignore repetitive self reach. only for the very next msg
if input_msg['type'] == MjaiType.REACH and input_msg['actor'] == self.seat:
LOGGER.debug("Ignoring repetitive self reach msg, reach msg already sent to AI last turn")
return None
if self.ignore_next_turn_self_reach == True:
if msg_type == MjaiType.REACH and input_msg['actor'] == self.seat:
LOGGER.debug("Ignoring Reach msg, already fed reach msg to the bot.")
return None
self.ignore_next_turn_self_reach = False



str_input = json.dumps(input_msg)

react_str = self.mjai_bot.react(str_input)
if react_str is None:
return None
reaction = json.loads(react_str)
# Special treatment for self reach output msg
# mjai only outputs dahai msg after the reach msg
if reaction['type'] == MjaiType.REACH and reaction['actor'] == self.seat: # Self reach
# get the subsequent dahai message,
# appeding it to the reach reaction msg as 'reach_dahai' key
LOGGER.debug("Send reach msg to get reach_dahai. Cannot go back to unreach!")
# TODO make a clone of mjai_bot so reach can be tested to get dahai without affecting the game

reach_msg = {'type': MjaiType.REACH, 'actor': self.seat}
reach_dahai_str = self.mjai_bot.react(json.dumps(reach_msg))
reach_dahai = json.loads(reach_dahai_str)
reaction['reach_dahai'] = reach_dahai
self.ignore_next_turn_self_reach = True # ignore very next reach msg
return reaction
2 changes: 1 addition & 1 deletion bot/local/bot_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def __init__(self, model_files:dict[GameMode, str]) -> None:
""" params:
model_files(dicty): model files for different modes {mode, file_path}
"""
super().__init__("Local Mortal Bot")
super().__init__("Local Mortal Bot")
self._supported_modes: list[GameMode] = []
self.model_files = model_files
self._engines:dict[GameMode, any] = {}
Expand Down
8 changes: 0 additions & 8 deletions bot/mjapi/bot_mjapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,6 @@ def _process_reaction(self, reaction, recurse):
else:
return None

# process self reach
if recurse and reaction['type'] == MjaiType.REACH and reaction['actor'] == self.seat:
LOGGER.debug("Send reach msg to get reach_dahai.")
reach_msg = {'type': MjaiType.REACH, 'actor': self.seat}
reach_dahai = self.react(reach_msg, recurse=False)
reaction['reach_dahai'] = self._process_reaction(reach_dahai, False)
self.ignore_next_turn_self_reach = True

return reaction

def react(self, input_msg:dict, recurse=True) -> dict | None:
Expand Down
6 changes: 2 additions & 4 deletions bot_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,10 +547,8 @@ def get_tile_str(mjai_tile:str): # unicode + language specific name
elif re_type == MjaiType.ANKAN:
tile_str = get_tile_str(reaction['consumed'][1])
action_str = f"{ActionUnicode.KAN}{lan_str.KAN}{tile_str}({lan_str.ANKAN})"
elif re_type == MjaiType.REACH: # attach reach dahai options
reach_dahai_reaction = reaction['reach_dahai']
dahai_action_str, _dahai_options = mjai_reaction_2_guide(reach_dahai_reaction, 0, lan_str)
action_str = f"{ActionUnicode.REACH}{lan_str.RIICHI}," + dahai_action_str
elif re_type == MjaiType.REACH:
action_str = f"{ActionUnicode.REACH}{lan_str.RIICHI}"
elif re_type == MjaiType.HORA:
if reaction['actor'] == reaction['target']:
action_str = f"{ActionUnicode.AGARI}{lan_str.AGARI}({lan_str.TSUMO})"
Expand Down
131 changes: 131 additions & 0 deletions common/mj_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,137 @@ def decode_mjai_tehai(tehai34, akas, tsumohai) -> tuple[list[str], str]:
return (tile_list, tsumohai)


def normalize_pai(pai):
"""Turn red dora to normal pai"""
return pai.replace('r', '')


def determine_kan_type(last_action, hand):
"""
Determines the type of kan based on the last action taken and the player's hand.

Description:
This function assumes it is called only when a kan is possible and
distinguishes which type of kan it is: Ankan, Kakan, or Daiminkan.
It first normalizes all tiles in the hand, converting red dora tiles
to their regular counterparts for uniformity in processing.

Parameters:
last_action (dict): Information about the last action taken, including the type of action
('tsumo' for a self-draw or 'dahai' for a discard),
the actor's identifier, and the tile involved.
Example: {'type': 'tsumo', 'actor': 0, 'pai': '5mr'}
hand (list of str): List of tiles in the player's hand, excluding the tile drawn most recently,
which is added during processing if necessary.
Example: ['1m', '1m', '1m', '2p', '2p', '2p', '3s', '3s', '3s',
'7m', '7m', '7m', '4p', '5m', '5m', '5m']

Returns:
MjaiType: ANKAN, KAKAN, or DAIMINKAN.
The function will return the appropriate enum value based on the conditions met.

"""
normalized_hand = [normalize_pai(p) for p in hand]
pai = normalize_pai(last_action['pai'])

if last_action['type'] == 'tsumo':
normalized_hand.append(pai) # Include the drawn tile in the hand

pai_counts = {p: normalized_hand.count(p) for p in set(normalized_hand)}

if pai_counts[pai] == 4:
if last_action['type'] == 'tsumo':
return MjaiType.ANKAN
else:
return MjaiType.KAKAN
elif last_action['type'] == 'dahai':
return MjaiType.DAIMINKAN


def generate_chi_consume_sequence(tile, chi_type):
"""Generate the other two tiles in a sequence based on the type of chi and a given tile."""
base_number = int(tile[0])
suit = tile[1]

# Calculate the other two tiles in the sequence based on the chi_type
if chi_type == 'chi_low':
return [str(base_number + 1) + suit, str(base_number + 2) + suit]
elif chi_type == 'chi_mid':
return [str(base_number - 1) + suit, str(base_number + 1) + suit]
elif chi_type == 'chi_high':
return [str(base_number - 2) + suit, str(base_number - 1) + suit]
else:
raise ValueError("Invalid chi type")


def determine_chi_tiles(chi_type, called_tile, hand):
"""
Determine the tiles used for a chi action based on the type of chi and the tile that was called.

Parameters:
chi_type (str): 'chi_low', 'chi_mid', or 'chi_high', representing the position of the called tile in the sequence.
called_tile (str): The tile that was called, formatted as '5m'.
hand (list of str): Current list of tiles in hand.

Returns:
list of str: Returns a list of two tiles needed to complete the chow, defaulting to not choosing red
dora tiles when multiple combinations are possible.
"""
sequence_tiles = generate_chi_consume_sequence(called_tile, chi_type)

# Check if each tile in the sequence is in the hand, considering red dora tiles
needed_tiles = []
for tile in sequence_tiles:
if tile in hand:
needed_tiles.append(tile)
else:
# Check if the normalized tile (in case of red dora tiles) is in the hand
normalized_tile = normalize_pai(tile)
for hand_tile in hand:
if normalize_pai(hand_tile) == normalized_tile:
needed_tiles.append(hand_tile)
break
# Switch needed_tiles to red dora tile if possible
needed_tiles = [f"{tile}r" if f"{tile}r" in hand else tile for tile in needed_tiles]

if len(needed_tiles) == len(sequence_tiles):
return needed_tiles
else:
return []

def determine_pon_tiles(called_tile, hand):
if called_tile[0] != 5:
return [called_tile] * 2
else:
pon_tiles_count = 0
exists_dora = False
tiles_list = []
for tile in hand:
if tile[0:1] == called_tile[0:1]:
pon_tiles_count += 1
if tile[2] == 'r':
exists_dora = True
tiles_list.append(tile)
if pon_tiles_count == 3 and exists_dora:
return [called_tile] * 2
else:
return tiles_list


def determine_kan_tiles(called_tile):
if called_tile[0] != 5:
return [called_tile] * 4
else:
tiles_list = []
if called_tile[2] == 'r':
tiles_list = [called_tile[0:1]] * 3
tiles_list.append(called_tile)
else:
tiles_list = [called_tile[0:1]] * 3
tiles_list.append(f"{called_tile}r")
return tiles_list


@dataclass
class GameInfo:
""" data class containing game info"""
Expand Down
Loading