Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 64 additions & 30 deletions moonfish/engines/alpha_beta.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,30 @@
from copy import copy
from enum import IntEnum
from multiprocessing.managers import DictProxy
from typing import Dict, Optional, Tuple
from typing import Dict, Optional, Tuple, Union

import chess.polyglot
import chess.syzygy
from chess import Board, Move
from moonfish.config import Config
from moonfish.engines.random import choice
from moonfish.move_ordering import organize_moves, organize_moves_quiescence
from moonfish.psqt import board_evaluation, count_pieces

CACHE_KEY = Dict[
Tuple[str, int, bool, float, float], Tuple[float | int, Optional[Move]]
]

class Bound(IntEnum):
"""Transposition table bound types."""

EXACT = 0 # Score is exact (PV node, score was within alpha-beta window)
LOWER_BOUND = 1 # Score is at least this value (failed high / beta cutoff)
UPPER_BOUND = 2 # Score is at most this value (failed low)


# Depth value for terminal positions (checkmate/stalemate) - always usable
DEPTH_MAX = 10000

# Cache: zobrist_hash -> (score, best_move, bound_type, depth)
CACHE_TYPE = Dict[int, Tuple[Union[float, int], Optional[Move], Bound, int]]


class AlphaBeta:
Expand Down Expand Up @@ -138,7 +151,7 @@ def negamax(
board: Board,
depth: int,
null_move: bool,
cache: DictProxy | CACHE_KEY,
cache: DictProxy | CACHE_TYPE,
alpha: float = float("-inf"),
beta: float = float("inf"),
) -> Tuple[float | int, Optional[Move]]:
Expand Down Expand Up @@ -171,17 +184,36 @@ def negamax(
Returns:
- best_score, best_move: returns best move that it found and its value.
"""
cache_key = (board.fen(), depth, null_move, alpha, beta)
# check if board was already evaluated
original_alpha = alpha
cache_key = chess.polyglot.zobrist_hash(board)

# Check transposition table
if cache_key in cache:
return cache[cache_key]
cached_score, cached_move, cached_bound, cached_depth = cache[cache_key]

# Only use score if cached search was at least as deep as we need
# Use cached result if:
# - EXACT: score is exact
# - LOWER_BOUND and score >= beta: true score is at least cached, causes cutoff
# - UPPER_BOUND and score <= alpha: true score is at most cached, no improvement
if cached_depth >= depth and (
cached_bound == Bound.EXACT
or (cached_bound == Bound.LOWER_BOUND and cached_score >= beta)
or (cached_bound == Bound.UPPER_BOUND and cached_score <= alpha)
):
return cached_score, cached_move

if board.is_checkmate():
cache[cache_key] = (-self.config.checkmate_score, None)
cache[cache_key] = (
-self.config.checkmate_score,
None,
Bound.EXACT,
DEPTH_MAX,
)
return (-self.config.checkmate_score, None)

if board.is_stalemate():
cache[cache_key] = (0, None)
cache[cache_key] = (0, None, Bound.EXACT, DEPTH_MAX)
return (0, None)

# recursion base case
Expand All @@ -193,12 +225,13 @@ def negamax(
alpha=alpha,
beta=beta,
)
cache[cache_key] = (board_score, None)
cache[cache_key] = (board_score, None, Bound.EXACT, depth)
return board_score, None

# null move prunning
# null move pruning
if (
self.config.null_move
and null_move
and depth >= (self.config.null_move_r + 1)
and not board.is_check()
):
Expand All @@ -215,12 +248,11 @@ def negamax(
)[0]
board.pop()
if board_score >= beta:
cache[cache_key] = (beta, None)
# Null move confirmed beta cutoff - this is a lower bound
cache[cache_key] = (beta, None, Bound.LOWER_BOUND, depth)
return beta, None

best_move = None

# initializing best_score
best_score = float("-inf")
moves = organize_moves(board)

Expand All @@ -244,36 +276,38 @@ def negamax(
# take move back
board.pop()

# beta-cutoff
if board_score >= beta:
cache[cache_key] = (board_score, move)
return board_score, move

# update best move
if board_score > best_score:
best_score = board_score
best_move = move

# setting alpha variable to do pruning
alpha = max(alpha, board_score)
# beta-cutoff: opponent won't allow this position
if best_score >= beta:
# LOWER_BOUND: true score is at least best_score
cache[cache_key] = (best_score, best_move, Bound.LOWER_BOUND, depth)
return best_score, best_move

# alpha beta pruning when we already found a solution that is at least as
# good as the current one those branches won't be able to influence the
# final decision so we don't need to waste time analyzing them
if alpha >= beta:
break
# update alpha
alpha = max(alpha, best_score)

# if no best move, make a random one
if not best_move:
best_move = self.random_move(board)

# save result before returning
cache[cache_key] = (best_score, best_move)
# Determine bound type based on whether we improved alpha
if best_score <= original_alpha:
# Failed low: we didn't find anything better than what we already had
bound = Bound.UPPER_BOUND
else:
# Score is exact: we found a score within the window
bound = Bound.EXACT

cache[cache_key] = (best_score, best_move, bound, depth)
return best_score, best_move

def search_move(self, board: Board) -> Move:
# create shared cache
cache: CACHE_KEY = {}
cache: CACHE_TYPE = {}

best_move = self.negamax(
board, copy(self.config.negamax_depth), self.config.null_move, cache
Expand Down
65 changes: 34 additions & 31 deletions moonfish/engines/l1p_alpha_beta.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,35 +14,38 @@ class Layer1ParallelAlphaBeta(AlphaBeta):
def search_move(self, board: Board) -> Move:
# start multiprocessing
nprocs = cpu_count()
pool = Pool(processes=nprocs)
manager = Manager()
shared_cache = manager.dict()

# creating list of moves at layer 1
moves = list(board.legal_moves)
arguments = []
for move in moves:
board.push(move)
arguments.append(
(
copy(board),
copy(self.config.negamax_depth) - 1,
self.config.null_move,
shared_cache,

with Pool(processes=nprocs) as pool, Manager() as manager:
shared_cache = manager.dict()

# creating list of moves at layer 1
moves = list(board.legal_moves)
arguments = []
for move in moves:
board.push(move)
arguments.append(
(
copy(board),
copy(self.config.negamax_depth) - 1,
self.config.null_move,
shared_cache,
)
)
)
board.pop()

# executing all the moves at layer 1 in parallel
# starmap blocks until all process are done
processes = pool.starmap(self.negamax, arguments)
results = []

# inserting move information in the results
for i in range(len(processes)):
results.append((*processes[i], moves[i]))

# sorting results and getting best move
results.sort(key=lambda a: a[0])
best_move = results[0][2]
return best_move
board.pop()

# executing all the moves at layer 1 in parallel
# starmap blocks until all processes are done
processes = pool.starmap(self.negamax, arguments)
results = []

# inserting move information in the results
# negamax returns (score, best_move) - we negate score since
# it's from opponent's perspective
for i in range(len(processes)):
score = -processes[i][0] # Negate: opponent's -> our perspective
results.append((score, processes[i][1], moves[i]))

# sorting results by score (descending) and getting best move
results.sort(key=lambda a: a[0], reverse=True)
best_move = results[0][2]
return best_move
118 changes: 59 additions & 59 deletions moonfish/engines/l2p_alpha_beta.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,65 +79,65 @@ def search_move(self, board: Board) -> Move:
START_LAYER = 2
# start multiprocessing
nprocs = cpu_count()
pool = Pool(processes=nprocs)
manager = Manager()
shared_cache = manager.dict()

# pointer that help us in finding the best next move
board_to_move_that_generates_it = manager.dict()
with Pool(processes=nprocs) as pool, Manager() as manager:
shared_cache = manager.dict()

# pointer that help us in finding the best next move
board_to_move_that_generates_it = manager.dict()

# starting board list
board_list = [(board, board, 0)]

# generating all possible boards for up to 2 moves ahead
for _ in range(START_LAYER):
arguments = [
(board, board_to_move_that_generates_it, layer)
for board, _, layer in board_list
]
processes = pool.starmap(self.generate_board_and_moves, arguments)
board_list = [board for board in sum(processes, [])]

negamax_arguments = [
(
board,
copy(self.config.negamax_depth) - START_LAYER,
self.config.null_move,
shared_cache,
)
for board, _, _ in board_list
]

# starting board list
board_list = [(board, board, 0)]
parallel_layer_result = pool.starmap(self.negamax, negamax_arguments)

# generating all possible boards for up to 2 moves ahead
for _ in range(START_LAYER):
arguments = [
(board, board_to_move_that_generates_it, layer)
for board, _, layer in board_list
]
processes = pool.starmap(self.generate_board_and_moves, arguments)
board_list = [board for board in sum(processes, [])]

negamax_arguments = [
(
board,
copy(self.config.negamax_depth) - START_LAYER,
self.config.null_move,
shared_cache,
)
for board, _, _ in board_list
]

parallel_layer_result = pool.starmap(self.negamax, negamax_arguments)

# grouping output based on the board that generates it
groups = defaultdict(list)

# adding information about the board and layer
# that generates the results and separating them
# into groups based on the root board
for i in range(len(parallel_layer_result)):
groups[board_list[i][1].fen()].append(
(*parallel_layer_result[i], board_list[i][0], board_list[i][2])
)

best_boards = []

for group in groups.values():
# layer and checkmate corrections
# they are needed to adjust for
# boards from different layers
group = list(map(LAYER_SIGNAL_CORRECTION, group))
group = list(map(self.checkmate_correction, group))
# get best move from group
group.sort(key=lambda a: a[0])
best_boards.append(group[0])

# get best board
best_boards.sort(key=lambda a: a[0], reverse=True)
best_board = best_boards[0][2].fen()

# get move that results in best board
best_move = board_to_move_that_generates_it[best_board]

return best_move
# grouping output based on the board that generates it
groups = defaultdict(list)

# adding information about the board and layer
# that generates the results and separating them
# into groups based on the root board
for i in range(len(parallel_layer_result)):
groups[board_list[i][1].fen()].append(
(*parallel_layer_result[i], board_list[i][0], board_list[i][2])
)

best_boards = []

for group in groups.values():
# layer and checkmate corrections
# they are needed to adjust for
# boards from different layers
group = list(map(LAYER_SIGNAL_CORRECTION, group))
group = list(map(self.checkmate_correction, group))
# get best move from group
group.sort(key=lambda a: a[0])
best_boards.append(group[0])

# get best board
best_boards.sort(key=lambda a: a[0], reverse=True)
best_board = best_boards[0][2].fen()

# get move that results in best board
best_move = board_to_move_that_generates_it[best_board]

return best_move
Loading