From e5ab4c04e0eef3240b0af6143b8200aa61a81bd6 Mon Sep 17 00:00:00 2001 From: luccabb Date: Mon, 19 Jan 2026 19:46:28 -0800 Subject: [PATCH 1/2] [2/9] Fix transposition table with proper bounds Implements correct transposition table behavior with bound types: **Transposition Table Changes:** - Add `Bound` enum: EXACT, LOWER_BOUND, UPPER_BOUND - Use Zobrist hash as cache key (fast integer vs slow FEN string) - Store bound type and depth with each cache entry - Only use cached scores when depth is sufficient - Properly handle bound types in lookups: - EXACT: use score directly - LOWER_BOUND: use if score >= beta (fail high) - UPPER_BOUND: use if score <= alpha (fail low) **Null Move Pruning Fix:** - Added missing `null_move` parameter check (was always trying null move) **Parallel Engine Updates:** - Update lazy_smp, l1p, l2p to use new zobrist hash cache key - Add context managers for Pool/Manager (proper resource cleanup) - Fix score negation in l1p (opponent perspective -> our perspective) Co-Authored-By: Claude Opus 4.5 --- moonfish/engines/alpha_beta.py | 94 +++++++++++++++-------- moonfish/engines/l1p_alpha_beta.py | 65 ++++++++-------- moonfish/engines/l2p_alpha_beta.py | 118 ++++++++++++++--------------- moonfish/engines/lazy_smp.py | 49 ++++++------ 4 files changed, 179 insertions(+), 147 deletions(-) diff --git a/moonfish/engines/alpha_beta.py b/moonfish/engines/alpha_beta.py index 6d25360..f39a515 100644 --- a/moonfish/engines/alpha_beta.py +++ b/moonfish/engines/alpha_beta.py @@ -1,7 +1,9 @@ from copy import copy +from enum import IntEnum from multiprocessing.managers import DictProxy -from typing import Dict, Optional, Tuple +from typing import Dict, Optional, Tuple, Union +import chess.polyglot import chess.syzygy from chess import Board, Move from moonfish.config import Config @@ -9,9 +11,20 @@ from moonfish.move_ordering import organize_moves, organize_moves_quiescence from moonfish.psqt import board_evaluation, count_pieces -CACHE_KEY = Dict[ - Tuple[str, int, bool, float, float], Tuple[float | int, Optional[Move]] -] + +class Bound(IntEnum): + """Transposition table bound types.""" + + EXACT = 0 # Score is exact (PV node, score was within alpha-beta window) + LOWER_BOUND = 1 # Score is at least this value (failed high / beta cutoff) + UPPER_BOUND = 2 # Score is at most this value (failed low) + + +# Depth value for terminal positions (checkmate/stalemate) - always usable +DEPTH_MAX = 10000 + +# Cache: zobrist_hash -> (score, best_move, bound_type, depth) +CACHE_TYPE = Dict[int, Tuple[Union[float, int], Optional[Move], Bound, int]] class AlphaBeta: @@ -138,7 +151,7 @@ def negamax( board: Board, depth: int, null_move: bool, - cache: DictProxy | CACHE_KEY, + cache: DictProxy | CACHE_TYPE, alpha: float = float("-inf"), beta: float = float("inf"), ) -> Tuple[float | int, Optional[Move]]: @@ -171,17 +184,36 @@ def negamax( Returns: - best_score, best_move: returns best move that it found and its value. """ - cache_key = (board.fen(), depth, null_move, alpha, beta) - # check if board was already evaluated + original_alpha = alpha + cache_key = chess.polyglot.zobrist_hash(board) + + # Check transposition table if cache_key in cache: - return cache[cache_key] + cached_score, cached_move, cached_bound, cached_depth = cache[cache_key] + + # Only use score if cached search was at least as deep as we need + # Use cached result if: + # - EXACT: score is exact + # - LOWER_BOUND and score >= beta: true score is at least cached, causes cutoff + # - UPPER_BOUND and score <= alpha: true score is at most cached, no improvement + if cached_depth >= depth and ( + cached_bound == Bound.EXACT + or (cached_bound == Bound.LOWER_BOUND and cached_score >= beta) + or (cached_bound == Bound.UPPER_BOUND and cached_score <= alpha) + ): + return cached_score, cached_move if board.is_checkmate(): - cache[cache_key] = (-self.config.checkmate_score, None) + cache[cache_key] = ( + -self.config.checkmate_score, + None, + Bound.EXACT, + DEPTH_MAX, + ) return (-self.config.checkmate_score, None) if board.is_stalemate(): - cache[cache_key] = (0, None) + cache[cache_key] = (0, None, Bound.EXACT, DEPTH_MAX) return (0, None) # recursion base case @@ -193,12 +225,13 @@ def negamax( alpha=alpha, beta=beta, ) - cache[cache_key] = (board_score, None) + cache[cache_key] = (board_score, None, Bound.EXACT, depth) return board_score, None - # null move prunning + # null move pruning if ( self.config.null_move + and null_move and depth >= (self.config.null_move_r + 1) and not board.is_check() ): @@ -215,12 +248,11 @@ def negamax( )[0] board.pop() if board_score >= beta: - cache[cache_key] = (beta, None) + # Null move confirmed beta cutoff - this is a lower bound + cache[cache_key] = (beta, None, Bound.LOWER_BOUND, depth) return beta, None best_move = None - - # initializing best_score best_score = float("-inf") moves = organize_moves(board) @@ -244,36 +276,38 @@ def negamax( # take move back board.pop() - # beta-cutoff - if board_score >= beta: - cache[cache_key] = (board_score, move) - return board_score, move - # update best move if board_score > best_score: best_score = board_score best_move = move - # setting alpha variable to do pruning - alpha = max(alpha, board_score) + # beta-cutoff: opponent won't allow this position + if best_score >= beta: + # LOWER_BOUND: true score is at least best_score + cache[cache_key] = (best_score, best_move, Bound.LOWER_BOUND, depth) + return best_score, best_move - # alpha beta pruning when we already found a solution that is at least as - # good as the current one those branches won't be able to influence the - # final decision so we don't need to waste time analyzing them - if alpha >= beta: - break + # update alpha + alpha = max(alpha, best_score) # if no best move, make a random one if not best_move: best_move = self.random_move(board) - # save result before returning - cache[cache_key] = (best_score, best_move) + # Determine bound type based on whether we improved alpha + if best_score <= original_alpha: + # Failed low: we didn't find anything better than what we already had + bound = Bound.UPPER_BOUND + else: + # Score is exact: we found a score within the window + bound = Bound.EXACT + + cache[cache_key] = (best_score, best_move, bound, depth) return best_score, best_move def search_move(self, board: Board) -> Move: # create shared cache - cache: CACHE_KEY = {} + cache: CACHE_TYPE = {} best_move = self.negamax( board, copy(self.config.negamax_depth), self.config.null_move, cache diff --git a/moonfish/engines/l1p_alpha_beta.py b/moonfish/engines/l1p_alpha_beta.py index 2890255..10821fc 100644 --- a/moonfish/engines/l1p_alpha_beta.py +++ b/moonfish/engines/l1p_alpha_beta.py @@ -14,35 +14,38 @@ class Layer1ParallelAlphaBeta(AlphaBeta): def search_move(self, board: Board) -> Move: # start multiprocessing nprocs = cpu_count() - pool = Pool(processes=nprocs) - manager = Manager() - shared_cache = manager.dict() - - # creating list of moves at layer 1 - moves = list(board.legal_moves) - arguments = [] - for move in moves: - board.push(move) - arguments.append( - ( - copy(board), - copy(self.config.negamax_depth) - 1, - self.config.null_move, - shared_cache, + + with Pool(processes=nprocs) as pool, Manager() as manager: + shared_cache = manager.dict() + + # creating list of moves at layer 1 + moves = list(board.legal_moves) + arguments = [] + for move in moves: + board.push(move) + arguments.append( + ( + copy(board), + copy(self.config.negamax_depth) - 1, + self.config.null_move, + shared_cache, + ) ) - ) - board.pop() - - # executing all the moves at layer 1 in parallel - # starmap blocks until all process are done - processes = pool.starmap(self.negamax, arguments) - results = [] - - # inserting move information in the results - for i in range(len(processes)): - results.append((*processes[i], moves[i])) - - # sorting results and getting best move - results.sort(key=lambda a: a[0]) - best_move = results[0][2] - return best_move + board.pop() + + # executing all the moves at layer 1 in parallel + # starmap blocks until all processes are done + processes = pool.starmap(self.negamax, arguments) + results = [] + + # inserting move information in the results + # negamax returns (score, best_move) - we negate score since + # it's from opponent's perspective + for i in range(len(processes)): + score = -processes[i][0] # Negate: opponent's -> our perspective + results.append((score, processes[i][1], moves[i])) + + # sorting results by score (descending) and getting best move + results.sort(key=lambda a: a[0], reverse=True) + best_move = results[0][2] + return best_move diff --git a/moonfish/engines/l2p_alpha_beta.py b/moonfish/engines/l2p_alpha_beta.py index 61f9444..307ccd9 100644 --- a/moonfish/engines/l2p_alpha_beta.py +++ b/moonfish/engines/l2p_alpha_beta.py @@ -79,65 +79,65 @@ def search_move(self, board: Board) -> Move: START_LAYER = 2 # start multiprocessing nprocs = cpu_count() - pool = Pool(processes=nprocs) - manager = Manager() - shared_cache = manager.dict() - # pointer that help us in finding the best next move - board_to_move_that_generates_it = manager.dict() + with Pool(processes=nprocs) as pool, Manager() as manager: + shared_cache = manager.dict() + + # pointer that help us in finding the best next move + board_to_move_that_generates_it = manager.dict() + + # starting board list + board_list = [(board, board, 0)] + + # generating all possible boards for up to 2 moves ahead + for _ in range(START_LAYER): + arguments = [ + (board, board_to_move_that_generates_it, layer) + for board, _, layer in board_list + ] + processes = pool.starmap(self.generate_board_and_moves, arguments) + board_list = [board for board in sum(processes, [])] + + negamax_arguments = [ + ( + board, + copy(self.config.negamax_depth) - START_LAYER, + self.config.null_move, + shared_cache, + ) + for board, _, _ in board_list + ] - # starting board list - board_list = [(board, board, 0)] + parallel_layer_result = pool.starmap(self.negamax, negamax_arguments) - # generating all possible boards for up to 2 moves ahead - for _ in range(START_LAYER): - arguments = [ - (board, board_to_move_that_generates_it, layer) - for board, _, layer in board_list - ] - processes = pool.starmap(self.generate_board_and_moves, arguments) - board_list = [board for board in sum(processes, [])] - - negamax_arguments = [ - ( - board, - copy(self.config.negamax_depth) - START_LAYER, - self.config.null_move, - shared_cache, - ) - for board, _, _ in board_list - ] - - parallel_layer_result = pool.starmap(self.negamax, negamax_arguments) - - # grouping output based on the board that generates it - groups = defaultdict(list) - - # adding information about the board and layer - # that generates the results and separating them - # into groups based on the root board - for i in range(len(parallel_layer_result)): - groups[board_list[i][1].fen()].append( - (*parallel_layer_result[i], board_list[i][0], board_list[i][2]) - ) - - best_boards = [] - - for group in groups.values(): - # layer and checkmate corrections - # they are needed to adjust for - # boards from different layers - group = list(map(LAYER_SIGNAL_CORRECTION, group)) - group = list(map(self.checkmate_correction, group)) - # get best move from group - group.sort(key=lambda a: a[0]) - best_boards.append(group[0]) - - # get best board - best_boards.sort(key=lambda a: a[0], reverse=True) - best_board = best_boards[0][2].fen() - - # get move that results in best board - best_move = board_to_move_that_generates_it[best_board] - - return best_move + # grouping output based on the board that generates it + groups = defaultdict(list) + + # adding information about the board and layer + # that generates the results and separating them + # into groups based on the root board + for i in range(len(parallel_layer_result)): + groups[board_list[i][1].fen()].append( + (*parallel_layer_result[i], board_list[i][0], board_list[i][2]) + ) + + best_boards = [] + + for group in groups.values(): + # layer and checkmate corrections + # they are needed to adjust for + # boards from different layers + group = list(map(LAYER_SIGNAL_CORRECTION, group)) + group = list(map(self.checkmate_correction, group)) + # get best move from group + group.sort(key=lambda a: a[0]) + best_boards.append(group[0]) + + # get best board + best_boards.sort(key=lambda a: a[0], reverse=True) + best_board = best_boards[0][2].fen() + + # get move that results in best board + best_move = board_to_move_that_generates_it[best_board] + + return best_move diff --git a/moonfish/engines/lazy_smp.py b/moonfish/engines/lazy_smp.py index 8451771..ab38aa0 100644 --- a/moonfish/engines/lazy_smp.py +++ b/moonfish/engines/lazy_smp.py @@ -1,6 +1,7 @@ from copy import copy from multiprocessing import cpu_count, Manager, Pool +import chess.polyglot from chess import Board, Move from moonfish.engines.alpha_beta import AlphaBeta @@ -10,31 +11,25 @@ class LazySMP(AlphaBeta): def search_move(self, board: Board) -> Move: # start multiprocessing nprocs = cpu_count() - pool = Pool(processes=nprocs) - manager = Manager() - shared_cache = manager.dict() - # executing all the moves at layer 1 in parallel - # starmap blocks until all process are done - pool.starmap( - self.negamax, - [ - ( - board, - copy(self.config.negamax_depth), - self.config.null_move, - shared_cache, - ) - for _ in range(nprocs) - ], - ) - - # return best move for our original board - return shared_cache[ - ( - board.fen(), - self.config.negamax_depth, - self.config.null_move, - float("-inf"), - float("inf"), + with Pool(processes=nprocs) as pool, Manager() as manager: + shared_cache = manager.dict() + # executing negamax in parallel N times + # all processes share the cache for faster convergence + # starmap blocks until all processes are done + pool.starmap( + self.negamax, + [ + ( + board, + copy(self.config.negamax_depth), + self.config.null_move, + shared_cache, + ) + for _ in range(nprocs) + ], ) - ][1] + + # return best move for our original board + # cache key is now just the zobrist hash + cache_key = chess.polyglot.zobrist_hash(board) + return shared_cache[cache_key][1] From 1c7b8d9a398e01a09d52bf760803ce2330f7e3f3 Mon Sep 17 00:00:00 2001 From: luccabb Date: Tue, 20 Jan 2026 23:01:36 -0800 Subject: [PATCH 2/2] Trigger CI