/** Copyright (c) Scott Gasch Module Name: split.c Abstract: This code is the glue needed to run a parallel search. Let me encourage other chess programmers to read this code carefully. Implementing a parallel search is pretty easy and I procrastinated doing it for WAY too long because I was afraid to sink multiple months into coding it up. It's not as hard as you think. If you have a good handle on multi-threaded programming then you can do something that works pretty well in a week. Here's my system (which is not original -- I believe this is the same idea that Bruce Moreland used for Ferret and I've been told it's simiar to what Bob Hyatt does in Crafty). You want to split the search on all-nodes. There is some overhead involved in splitting the tree and, after you paid the price, you do not want one of the moves failing high... because then you have to abort the split and return a fail high. Once my search has one of these nodes, it calls in here to split the tree. What's involved with splitting the tree is: 1. populating a "split node" with data about the position we are splitting in. 2. grabbing zero or more idle "worker threads" to come help with the split. 3. all threads searching under the split grab a move from the move list and search that move's subtree in parallel. 4. when a subtree is evaluated... all threads update the split node with new information (i.e. raise alpha or, in the undesirable case that a thread failed high, toggles a flag in the split node to inform the rest of the searchers). 5. before grabbing the next move, each thread updates its local alpha to pick up window-narrowing information encountered by other threads. 6. goto step 3, stop when there are no more moves or thread finds a fail high. ------------------------------------------------------------------ This is a quote from some win32 blog about the x86 memory model which I found helpful to read while I was thinking about multithreaded code in this module: "The most familiar model is X86. It's a relatively strong model. Stores are never reordered with respect to other stores. But, in the absence of data dependence, loads can be reordered with respect to other loads and stores. Many X86 developers don't realize that this reordering is possible, though it can lead to some nasty failures under stress on big MP machines. In terms of the above, the memory model for X86 can be described as: 1. All stores are actually store.release (upward memory fence). This means that normal loads can be moved above the store.release but nothing can be moved below it. 2. All loads are normal loads. 3. Any use of the LOCK prefix (e.g. LOCK CMPXCHG or LOCK INC) creates a full memory fence." Author: Scott Gasch (scott.gasch@gmail.com) 25 Jun 2004 Revision History: $Id: split.c 345 2007-12-02 22:56:42Z scott $ **/ #ifdef MP #include "chess.h" extern ULONG g_uIterateDepth; ULONG g_uNumHelperThreads = 0; #define MAX_SPLITS (20) #define IDLE ((ULONG)-1) void HelpSearch(SEARCHER_THREAD_CONTEXT *ctx, ULONG u); // // A struct that holds information about helper threads. // typedef struct _HELPER_THREAD { ULONG uHandle; volatile ULONG uAssignment; SEARCHER_THREAD_CONTEXT ctx; #ifdef PERF_COUNTERS UINT64 u64IdleCycles; UINT64 u64BusyCycles; #endif } HELPER_THREAD; // // An array of these structs that is alloced dynamically so that N // cpus is not a hardcoded thing, the engine can scale N based on // where it is running. // static HELPER_THREAD *g_HelperThreads = NULL; // // An array of structs that hold information about where we have split // the search tree. // static SPLIT_INFO g_SplitInfo[MAX_SPLITS]; // // In addition to this main split database lock each split entry has // its own private lock. This way contention between searcher threads // operating on different split nodes is eliminated. // volatile static ULONG g_uSplitLock = 0; #define SPLITS_LOCKED (g_uSplitLock != 0) #define LOCK_SPLITS \ AcquireSpinLock(&g_uSplitLock); \ ASSERT(SPLITS_LOCKED); #define UNLOCK_SPLITS \ ASSERT(SPLITS_LOCKED); \ ReleaseSpinLock(&g_uSplitLock); static ULONG g_uNumSplitsAvailable; volatile ULONG g_uNumHelpersAvailable; ULONG HelperThreadIdleLoop(IN ULONG uMyId) /** Routine description: The entry point of a helper thread. It will spin in the idle loop here until another thread splits the search tree, sees that it is idle, and notifies it to come help by changing the assignment field in its struct. Parameters: ULONG uMyId : this thread's index in g_HelperThreads Return value: ULONG **/ { SEARCHER_THREAD_CONTEXT *ctx = &(g_HelperThreads[uMyId].ctx); ULONG u, v; MOVE mv; ULONG uIdleLoops = 0; #ifdef PERF_COUNTERS UINT64 u64Then; UINT64 u64Now; #endif #ifdef DEBUG POSITION board; #endif InitializeSearcherContext(NULL, ctx); ctx->uThreadNumber = uMyId + 1; do { #ifdef PERF_COUNTERS if (uIdleLoops == 0) u64Then = SystemReadTimeStampCounter(); #endif // // Did someone tell us to come help? // if ((u = g_HelperThreads[uMyId].uAssignment) != IDLE) { // // By now the split info is populated. // uIdleLoops = 0; ReInitializeSearcherContext(&(g_SplitInfo[u].sRootPosition), ctx); ctx->pSplitInfo[0] = &(g_SplitInfo[u]); ctx->uPositional = g_SplitInfo[u].uSplitPositional; // // Note: the main thread could have already exhausted the // split and decremented it from 3->2. When we leave it // will fall to 1 and allow the main thread to continue. // ASSERT(g_SplitInfo[u].uNumThreadsHelping >= 2); // // Get from the root of the search to the split position. // We do this instead of just initializing the searcher // context at the split node so that historic things like // draw detection still work in the helper threads. // v = 0; do { ASSERT(v < MAX_PLY_PER_SEARCH); mv.uMove = g_SplitInfo[u].mvPathToHere[v].uMove; if (ILLEGALMOVE == mv.uMove) break; #ifdef DEBUG if (mv.uMove) { ASSERT(SanityCheckMove(&ctx->sPosition, mv)); } #endif if (FALSE == MakeMove(ctx, mv)) { UtilPanic(CANNOT_INITIALIZE_SPLIT, &ctx->sPosition, (void *)mv.uMove, &g_SplitInfo[u], (void *)v, __FILE__, __LINE__); } v++; } while(1); #ifdef DEBUG ASSERT(g_SplitInfo[u].uSplitPly == ctx->uPly); ASSERT(v > 0); ASSERT(IS_SAME_MOVE(g_SplitInfo[u].mvPathToHere[v-1], g_SplitInfo[u].mvLast)); ASSERT(PositionsAreEquivalent(&(ctx->sPosition), &(g_SplitInfo[u].sSplitPosition))); VerifyPositionConsistency(&(ctx->sPosition), FALSE); memcpy(&board, &(ctx->sPosition), sizeof(POSITION)); #endif // // Populate the move stack of this helper context to make // it look like it generated to moves at the split. The // reason I do this is so that ComputeMoveExtension (which // looks at the move stack) will work at a split node even // if the context it gets is a helper thread's. // ctx->sMoveStack.uBegin[ctx->uPly] = 0; ctx->sMoveStack.uBegin[ctx->uPly + 1] = ctx->sMoveStack.uEnd[ctx->uPly] = g_SplitInfo[u].uNumMoves; for (v = 0; v < g_SplitInfo[u].uNumMoves; v++) { ctx->sMoveStack.mvf[v] = g_SplitInfo[u].mvf[v]; ASSERT(SanityCheckMove(&ctx->sPosition, g_SplitInfo[u].mvf[v].mv)); } // // Go help with the search // HelpSearch(ctx, u); ASSERT(PositionsAreEquivalent(&board, &(ctx->sPosition))); // // Done with this assignment, wrap up and go back to idle state // LOCK_SPLITS; g_HelperThreads[uMyId].uAssignment = IDLE; g_uNumHelpersAvailable++; UNLOCK_SPLITS; #ifdef PERF_COUNTERS u64Now = SystemReadTimeStampCounter(); g_HelperThreads[uMyId].u64BusyCycles += (u64Now - u64Then); #endif } #if PERF_COUNTERS // // There was nothing for us to do, if that happens often // enough remember the idle cycles. // else { uIdleLoops++; if (uIdleLoops > 1000) { u64Now = SystemReadTimeStampCounter(); LOCK_SPLITS; g_HelperThreads[uMyId].u64IdleCycles += (u64Now - u64Then); UNLOCK_SPLITS; uIdleLoops = 0; #ifdef DEBUG SystemDeferExecution(1); #endif } } #endif } while(FALSE == g_fExitProgram); Trace("HELPER THREAD: thread terminating.\n"); return(0); // ExitThread } FLAG InitializeParallelSearch(void) /** Routine description: This routine must be called before any thread can split the search tree because it sets up the parallel search system. Parameters: void Return value: FLAG **/ { ULONG u; if (g_Options.uNumProcessors < 2) return(FALSE); // // Initialize split entries // g_uSplitLock = 0; for (u = 0; u < MAX_SPLITS; u++) { memset(&(g_SplitInfo[u]), 0, sizeof(g_SplitInfo[u])); } g_uNumSplitsAvailable = MAX_SPLITS; ASSERT(NUM_SPLIT_PTRS_IN_CONTEXT <= MAX_SPLITS); // // Create and initialize helper threads // g_uNumHelperThreads = g_Options.uNumProcessors - 1; ASSERT(g_uNumHelperThreads >= 1); g_HelperThreads = SystemAllocateMemory(sizeof(HELPER_THREAD) * g_uNumHelperThreads); ASSERT(g_HelperThreads != NULL); for (u = 0; u < g_uNumHelperThreads; u++) { memset(&(g_HelperThreads[u]), 0, sizeof(g_HelperThreads[u])); g_HelperThreads[u].uAssignment = IDLE; if (FALSE == SystemCreateThread(HelperThreadIdleLoop, u, &(g_HelperThreads[u].uHandle))) { UtilPanic(UNEXPECTED_SYSTEM_CALL_FAILURE, NULL, "creating a thread", 0, NULL, __FILE__, __LINE__); } } g_uNumHelpersAvailable = g_uNumHelperThreads; return(TRUE); } #ifdef PERF_COUNTERS void ClearHelperThreadIdleness(void) /** Routine description: Called at the start of a search, if PERF_COUNTERS is defined... this function's job is to reset the idleness counter for all helper threads. Parameters: void Return value: void **/ { ULONG u; LOCK_SPLITS; for (u = 0; u < g_uNumHelperThreads; u++) { g_HelperThreads[u].u64BusyCycles = g_HelperThreads[u].u64IdleCycles = 0ULL; } UNLOCK_SPLITS; } void DumpHelperIdlenessReport(void) /** Routine description: Called at the end of a search, this function's job is to print out a report of how busy/idle each helper threads was. Parameters: void Return value: void **/ { ULONG u; double n, d; for (u = 0; u < g_uNumHelperThreads; u++) { n = (double)g_HelperThreads[u].u64BusyCycles; d = (double)g_HelperThreads[u].u64IdleCycles; d += n; Trace("Helper thread %u: %5.2f percent busy.\n", u, (n / d) * 100.0); } } #endif FLAG CleanupParallelSearch(void) /** Routine description: Cleanup parallel search system before program shutdown. Parameters: void Return value: FLAG **/ { if (g_HelperThreads != NULL ) { SystemFreeMemory(g_HelperThreads); } g_uNumHelperThreads = 0; return(TRUE); } SCORE StartParallelSearch(IN SEARCHER_THREAD_CONTEXT *ctx, IN OUT SCORE *piAlpha, IN SCORE iBeta, IN OUT SCORE *piBestScore, IN OUT MOVE *pmvBest, IN ULONG uMoveNum, IN INT iPositionExtend, IN ULONG uDepth) { /** Routine description: This routine is called from the main Search (not RootSearch or QSearch) when: 1. it thinks the current search tree node looks like it could be searched in parallel -and- 2. it's likely that there are idle helper thread(s) to help. It job is to find a free split node, populate it, find idle helper threads, assign them to help, and search this node in parallel. It _must_ be called after generating moves at this node. Parameters: SEARCHER_THREAD_CONTEXT *ctx : context of thread requesting split SCORE *piAlpha : in/out alpha of split node SCORE iBeta : in only (beta doesn't change) beta of split node SCORE *piBestScore : in/out best score seen so far at split node MOVE *pmvBest : in/out best move seen so far at split node ULONG uMoveNum : in next move number in ctx->sMoveStack INT iPositionExtend : in position based extensions for split node ULONG uDepth : in the depth we need to search to Return value: SCORE : the score of this split subtree, along with out params above **/ SCORE iScore; ULONG u, v; ULONG uSplitNum; ULONG uOldStart; #ifdef DEBUG POSITION board; ASSERT(IS_VALID_SCORE(*piAlpha)); ASSERT(IS_VALID_SCORE(iBeta)); ASSERT(IS_VALID_SCORE(*piBestScore)); ASSERT(*piBestScore > -INFINITY); ASSERT(pmvBest->uMove); ASSERT(*piAlpha < iBeta); memcpy(&board, &(ctx->sPosition), sizeof(POSITION)); VerifyPositionConsistency(&board, FALSE); #endif // // Note: This is a lazy lock construction: search.c has peeked at // g_uNumHelperThreads before calling us and only calls when it // thinks there's a helper available. (1) We could find that // there is no helper available now because of the race condition. // (2) On IA64 memory model this type of construct is // _not_supported_ (my understanding is that this is supported on // X86 and AMD64, though). // LOCK_SPLITS; for (u = 0; u < MAX_SPLITS; u++) { // // Try to find a vacant split // if (g_SplitInfo[u].uNumThreadsHelping == 0) { // // Found one, populate it. // ASSERT(g_uNumSplitsAvailable > 0); g_uNumSplitsAvailable--; // // We initialize this to two to double-reference this // split node. This guarantees we are the last ones // holding a reference to it (since we want to be the last // one out of this split node) // g_SplitInfo[u].uNumThreadsHelping = 2; // // Store the path from the root to the split node and the // root position to start at. This is done so that // ponders that convert into searches don't crash us and // so that helper threads can detect repeated positions // before the split point. // ASSERT(ctx->uPly >= 1); ASSERT(ctx->uPly < MAX_PLY_PER_SEARCH); for (v = 0; v < ctx->uPly; v++) { g_SplitInfo[u].mvPathToHere[v] = ctx->sPlyInfo[v].mv; } g_SplitInfo[u].mvPathToHere[v].uMove = ILLEGALMOVE; ASSERT(v >= 1); g_SplitInfo[u].mvLast = g_SplitInfo[u].mvPathToHere[v-1]; memcpy(&(g_SplitInfo[u].sRootPosition), GetRootPosition(), sizeof(POSITION)); #if DEBUG g_SplitInfo[u].uSplitPly = ctx->uPly; memcpy(&(g_SplitInfo[u].sSplitPosition), &(ctx->sPosition), sizeof(POSITION)); #endif // // What has happened here is that another thread has // triggered the "stop searching" bit in the move timer. // This also means that the root position may have changed // and therefore the split we just populated can be // useless. Before we grab any helper threads, see if we // need to bail out of this split. // if (g_MoveTimer.bvFlags & TIMER_STOPPING) { g_uNumSplitsAvailable++; g_SplitInfo[u].uNumThreadsHelping = 0; UNLOCK_SPLITS; return(INVALID_SCORE); } // // More split node initialization // g_SplitInfo[u].uLock = 0; g_SplitInfo[u].fTerminate = FALSE; g_SplitInfo[u].uDepth = uDepth; g_SplitInfo[u].iPositionExtend = iPositionExtend; g_SplitInfo[u].iAlpha = *piAlpha; g_SplitInfo[u].iBeta = iBeta; g_SplitInfo[u].uSplitPositional = ctx->uPositional; g_SplitInfo[u].sSearchFlags = ctx->sSearchFlags; ASSERT(FALSE == ctx->sSearchFlags.fAvoidNullmove); g_SplitInfo[u].mvBest = *pmvBest; g_SplitInfo[u].iBestScore = *piBestScore; ASSERT(g_SplitInfo[u].iBestScore <= g_SplitInfo[u].iAlpha); g_SplitInfo[u].sCounters.tree.u64TotalNodeCount = 0; g_SplitInfo[u].sCounters.tree.u64BetaCutoffs = 0; g_SplitInfo[u].sCounters.tree.u64BetaCutoffsOnFirstMove = 0; g_SplitInfo[u].PV[0] = NULLMOVE; // // Copy the remaining moves to be searched from the // searcher context that called us into the split node. // Note: this thread must have already called // GenerateMoves at the split node! // uOldStart = ctx->sMoveStack.uBegin[ctx->uPly]; g_SplitInfo[u].uAlreadyDone = uMoveNum - uOldStart + 1; ASSERT(g_SplitInfo[u].uAlreadyDone >= 1); ctx->sMoveStack.uBegin[ctx->uPly] = uMoveNum; for (v = uMoveNum, g_SplitInfo[u].uRemainingMoves = 0; (v < ctx->sMoveStack.uEnd[ctx->uPly]); v++, g_SplitInfo[u].uRemainingMoves++) { ASSERT(g_SplitInfo[u].uRemainingMoves >= 0); ASSERT(g_SplitInfo[u].uRemainingMoves < MAX_MOVES_PER_PLY); // // If we fail high at this node we have done a lot of // work for naught. We also want to know as soon as // possible so that we can vacate this split point, // free up a worker thread and get back to the main // search. So forget about the SEARCH_SORT_LIMIT // stuff here and sort the whole list of moves from // best..worst. // SelectBestWithHistory(ctx, v); ctx->sMoveStack.mvf[v].mv.bvFlags |= WouldGiveCheck(ctx, ctx->sMoveStack.mvf[v].mv); ASSERT(!(ctx->sMoveStack.mvf[v].bvFlags & MVF_MOVE_SEARCHED)); g_SplitInfo[u].mvf[g_SplitInfo[u].uRemainingMoves] = ctx->sMoveStack.mvf[v]; #ifdef DEBUG ctx->sMoveStack.mvf[v].bvFlags |= MVF_MOVE_SEARCHED; #endif } g_SplitInfo[u].uOnDeckMove = 0; g_SplitInfo[u].uNumMoves = g_SplitInfo[u].uRemainingMoves; #ifdef DEBUG for (v = uMoveNum; v < ctx->sMoveStack.uEnd[ctx->uPly]; v++) { ASSERT(ctx->sMoveStack.mvf[v].bvFlags & MVF_MOVE_SEARCHED); ASSERT(SanityCheckMove(&ctx->sPosition, ctx->sMoveStack.mvf[v].mv)); } #endif // // See if we can get some help here or we have to go it // alone. Note: past this point the split we are using // may have threads under it -- be careful. // for (v = 0; v < g_uNumHelperThreads; v++) { if (g_HelperThreads[v].uAssignment == IDLE) { // // Note: there could already be a thread searching // this split; we must obtain its lock now to mess // with the helper count. // AcquireSpinLock(&(g_SplitInfo[u].uLock)); g_SplitInfo[u].uNumThreadsHelping += 1; ReleaseSpinLock(&(g_SplitInfo[u].uLock)); ASSERT(g_SplitInfo[u].uNumThreadsHelping > 2); ASSERT(g_uNumHelpersAvailable > 0); g_uNumHelpersAvailable -= 1; ASSERT(g_uNumHelpersAvailable >= 0); ASSERT(g_uNumHelpersAvailable < g_uNumHelperThreads); g_HelperThreads[v].uAssignment = u; } } UNLOCK_SPLITS; // // Update the context of the thread that is initiating the // split with a pointer to the split info node we are using. // for (uSplitNum = 0; uSplitNum < NUM_SPLIT_PTRS_IN_CONTEXT; uSplitNum++) { if (ctx->pSplitInfo[uSplitNum] == NULL) { ctx->pSplitInfo[uSplitNum] = &(g_SplitInfo[u]); break; } } if (uSplitNum >= NUM_SPLIT_PTRS_IN_CONTEXT) { ASSERT(FALSE); return(INVALID_SCORE); } ASSERT(ctx->pSplitInfo[uSplitNum] == &(g_SplitInfo[u])); // // Go search it // INC(ctx->sCounters.parallel.uNumSplits); HelpSearch(ctx, u); // // We are done searching under this node... make sure all // helpers are done too. When everyone is finished the // refcount on this split node will be one because every // thread decremented it once and we double referenced // it initially. // while(g_SplitInfo[u].uNumThreadsHelping != 1) { ASSERT(g_SplitInfo[u].uNumThreadsHelping != 0); if (g_fExitProgram) break; } // // Note: past this point we are the only ones using the // split until we return it to the pool by making its // refcount zero again. // #ifdef DEBUG ASSERT((g_SplitInfo[u].uNumThreadsHelping == 1) || (g_fExitProgram)); SystemDeferExecution(rand() % 2); ASSERT((g_SplitInfo[u].uNumThreadsHelping == 1) || (g_fExitProgram)); if (g_SplitInfo[u].iBestScore < g_SplitInfo[u].iBeta) { for (v = 0; v < g_SplitInfo[u].uRemainingMoves; v++) { ASSERT(g_SplitInfo[u].mvf[v].mv.uMove); ASSERT(g_SplitInfo[u].mvf[v].bvFlags & MVF_MOVE_SEARCHED); } } #endif #ifdef PERF_COUNTERS // // Grab counters. Technically we should do with under a // lock b/c we want to ensure that any pending memory // operations from other cpus are flushed. But I don't // really care too much about these counters and am trying // to reduce lock contention. // if (TRUE == g_SplitInfo[u].fTerminate) { ASSERT(g_SplitInfo[u].iBestScore >= g_SplitInfo[u].iBeta); INC(ctx->sCounters.parallel.uNumSplitsTerminated); } ctx->sCounters.tree.u64BetaCutoffs = g_SplitInfo[u].sCounters.tree.u64BetaCutoffs; ctx->sCounters.tree.u64BetaCutoffsOnFirstMove = g_SplitInfo[u].sCounters.tree.u64BetaCutoffsOnFirstMove; #endif // // Pop off the split info ptr from the stack in the thread's // context. // ASSERT(ctx->pSplitInfo[uSplitNum] == &(g_SplitInfo[u])); ctx->pSplitInfo[uSplitNum] = NULL; // // Grab alpha, bestscore, bestmove, PV etc... The lock // needs to be here to flush any pending memory writes // from other processors. // LOCK_SPLITS; ctx->sCounters.tree.u64TotalNodeCount = g_SplitInfo[u].sCounters.tree.u64TotalNodeCount; iScore = *piBestScore = g_SplitInfo[u].iBestScore; *pmvBest = g_SplitInfo[u].mvBest; if ((*piAlpha < iScore) && (iScore < iBeta)) { ASSERT(IS_SAME_MOVE(*pmvBest, g_SplitInfo[u].PV[0])); ASSERT((*pmvBest).uMove != 0); v = 0; do { ASSERT((ctx->uPly + v) < MAX_PLY_PER_SEARCH); ASSERT(v < MAX_PLY_PER_SEARCH); ctx->sPlyInfo[ctx->uPly].PV[ctx->uPly+v] = g_SplitInfo[u].PV[v]; if (0 == g_SplitInfo[u].PV[v].uMove) break; v++; } while(1); } *piAlpha = g_SplitInfo[u].iAlpha; ASSERT(iBeta == g_SplitInfo[u].iBeta); g_uNumSplitsAvailable++; g_SplitInfo[u].uNumThreadsHelping = 0; UNLOCK_SPLITS; ctx->sMoveStack.uBegin[ctx->uPly] = uOldStart; #ifdef DEBUG ASSERT(PositionsAreEquivalent(&board, &ctx->sPosition)); VerifyPositionConsistency(&ctx->sPosition, FALSE); ASSERT(IS_VALID_SCORE(iScore) || WE_SHOULD_STOP_SEARCHING); #endif return(iScore); } } // // There was no split available for us; unlock and continue in // search. // UNLOCK_SPLITS; return(INVALID_SCORE); } static void _UpdateSplitInfo(IN SEARCHER_THREAD_CONTEXT *ctx, IN MOVE mv, IN SCORE iScore, IN ULONG u) /** Routine description: We think we have some information learned from search to store in the split node. Take the lock and see if we really do. Parameters: SEARCHER_THREAD_CONTEXT *ctx : context of thread updating split MOVE mv : move it just searched SCORE iScore : score of the move's subtree ULONG u : split node number Return value: static void **/ { ULONG v; AcquireSpinLock(&(g_SplitInfo[u].uLock)); // // See if this split is shutting down // if (TRUE == g_SplitInfo[u].fTerminate) { ReleaseSpinLock(&(g_SplitInfo[u].uLock)); return; } if (iScore > g_SplitInfo[u].iBestScore) { // // We found a move better than the best so far, so we want to // update the split node and possibly raise alpha. // g_SplitInfo[u].iBestScore = iScore; g_SplitInfo[u].mvBest = mv; if (iScore > g_SplitInfo[u].iAlpha) { if (iScore >= g_SplitInfo[u].iBeta) { // // We failed high so we want to update the split node // g_SplitInfo[u].fTerminate = TRUE; g_SplitInfo[u].PV[0] = NULLMOVE; ReleaseSpinLock(&(g_SplitInfo[u].uLock)); return; } // // Normal PV move, update the split's PV. // g_SplitInfo[u].iAlpha = iScore; UpdatePV(ctx, mv); ASSERT(ctx->uPly < MAX_PLY_PER_SEARCH); ASSERT(ctx->sPlyInfo[ctx->uPly].PV[ctx->uPly].uMove); for (v = 0; v < MAX_PLY_PER_SEARCH; v++) { ASSERT((ctx->uPly + v) < MAX_PLY_PER_SEARCH); mv = ctx->sPlyInfo[ctx->uPly].PV[ctx->uPly + v]; g_SplitInfo[u].PV[v] = mv; if (0 == mv.uMove) break; } } } ReleaseSpinLock(&(g_SplitInfo[u].uLock)); } static void _SetFinalStats(IN SEARCHER_THREAD_CONTEXT *ctx, IN ULONG u) /** Routine description: We are exiting the split node (because it ran out of moves or because someone failed high). Update some stats on the way out. Parameters: SEARCHER_THREAD_CONTEXT *ctx, ULONG u Return value: static void **/ { // // Before we stop searching this node, update some stuff. // AcquireSpinLock(&(g_SplitInfo[u].uLock)); // // Counters to persist in the main counter struct via the split. // g_SplitInfo[u].sCounters.tree.u64TotalNodeCount += ctx->sCounters.tree.u64TotalNodeCount; g_SplitInfo[u].sCounters.tree.u64BetaCutoffs += ctx->sCounters.tree.u64BetaCutoffs; g_SplitInfo[u].sCounters.tree.u64BetaCutoffsOnFirstMove += ctx->sCounters.tree.u64BetaCutoffsOnFirstMove; // // TODO: Any other counters we care about? // // // IDEA: Save the killers from this context to bring back to main // // // Decrement threadcount in this split. Note: the main thread // incremented it by two. // ASSERT((g_SplitInfo[u].uNumThreadsHelping > 1) || (g_fExitProgram)); g_SplitInfo[u].uNumThreadsHelping -= 1; ASSERT((g_SplitInfo[u].uNumThreadsHelping >= 1) || (g_fExitProgram)); ReleaseSpinLock(&(g_SplitInfo[u].uLock)); } static MOVE _GetNextParallelMove(OUT SCORE *piAlpha, OUT SCORE *piBestScore, OUT ULONG *puMoveNumber, IN ULONG u) /** Routine description: Retrieve the next parallel move to search at the split node. Also update alpha and bestscore. Parameters: SCORE *piAlpha : current alpha SCORE *piBestScore : current bestscore ULONG u : split number Return value: static MOVE **/ { MOVE mv = {0}; AcquireSpinLock(&(g_SplitInfo[u].uLock)); if (g_SplitInfo[u].fTerminate) { ReleaseSpinLock(&(g_SplitInfo[u].uLock)); return(mv); } if (g_SplitInfo[u].uRemainingMoves != 0) { // // There is another move to search, get it. // mv = g_SplitInfo[u].mvf[g_SplitInfo[u].uOnDeckMove].mv; #ifdef DEBUG ASSERT(!(g_SplitInfo[u].mvf[g_SplitInfo[u].uOnDeckMove].bvFlags & MVF_MOVE_SEARCHED)); g_SplitInfo[u].mvf[g_SplitInfo[u].uOnDeckMove].bvFlags |= MVF_MOVE_SEARCHED; ASSERT(mv.uMove); ASSERT(SanityCheckMove(&g_SplitInfo[u].sSplitPosition, mv)); #endif g_SplitInfo[u].uRemainingMoves--; *puMoveNumber = g_SplitInfo[u].uOnDeckMove; g_SplitInfo[u].uOnDeckMove++; } *piAlpha = g_SplitInfo[u].iAlpha; *piBestScore = g_SplitInfo[u].iBestScore; ReleaseSpinLock(&(g_SplitInfo[u].uLock)); ASSERT(*piBestScore <= *piAlpha); return(mv); } void HelpSearch(IN OUT SEARCHER_THREAD_CONTEXT *ctx, IN ULONG u) /** Routine description: Help search the split position. Parameters: SEARCHER_THREAD_CONTEXT *ctx : thread context ULONG u : the split index to help search Return value: void **/ { SCORE iScore; SCORE iAlpha = 0; SCORE iBeta; SCORE iBestScore = 0; SCORE iRoughEval; ULONG uOrigDepth; ULONG uDepth; ULONG uMoveNum = 0; INT iOrigExtend; INT iExtend; MOVE mv; #ifdef DEBUG POSITION board; memcpy(&board, &ctx->sPosition, sizeof(POSITION)); ASSERT(PositionsAreEquivalent(&board, &g_SplitInfo[u].sSplitPosition)); #endif iOrigExtend = g_SplitInfo[u].iPositionExtend; iBeta = g_SplitInfo[u].iBeta; uOrigDepth = g_SplitInfo[u].uDepth; ctx->sSearchFlags = g_SplitInfo[u].sSearchFlags; do { iExtend = iOrigExtend; uDepth = uOrigDepth; ASSERT(ctx->uPly == g_SplitInfo[u].uSplitPly); mv = _GetNextParallelMove(&iAlpha, &iBestScore, &uMoveNum, u); if (mv.uMove == 0) break; // Split is terminating ASSERT(IS_VALID_SCORE(iBestScore)); ASSERT(uMoveNum < MAX_MOVES_PER_PLY); ASSERT(IS_SAME_MOVE(mv, ctx->sMoveStack.mvf[ctx->sMoveStack.uBegin[ctx->uPly]+uMoveNum].mv)); ASSERT(uDepth <= MAX_DEPTH_PER_SEARCH); ASSERT(IS_VALID_SCORE(iAlpha)); ASSERT(IS_VALID_SCORE(iBeta)); ASSERT(iAlpha < iBeta); ASSERT(iExtend >= -ONE_PLY); ASSERT(iExtend <= +ONE_PLY); if (MakeMove(ctx, mv)) { ASSERT((IS_CHECKING_MOVE(mv) && InCheck(&ctx->sPosition, ctx->sPosition.uToMove)) || (!IS_CHECKING_MOVE(mv) && !InCheck(&ctx->sPosition, ctx->sPosition.uToMove))); iRoughEval = GetRoughEvalScore(ctx, iAlpha, iBeta, TRUE); // Compute extension ComputeMoveExtension(ctx, iAlpha, iBeta, (ctx->sMoveStack.uBegin[ctx->uPly - 1] + uMoveNum), iRoughEval, uDepth, &iExtend); // // Decide whether to history prune // if (TRUE == WeShouldDoHistoryPruning(iRoughEval, iAlpha, iBeta, ctx, uDepth, (g_SplitInfo[u].uAlreadyDone + uMoveNum + 1), mv, (g_SplitInfo[u].uAlreadyDone + uMoveNum + 1), iExtend)) { ASSERT(iExtend == 0); iExtend = -ONE_PLY; ctx->sPlyInfo[ctx->uPly].iExtensionAmount = -ONE_PLY; } // // Compute next depth // uDepth = uDepth - ONE_PLY + iExtend; if (uDepth >= MAX_DEPTH_PER_SEARCH) uDepth = 0; iScore = -Search(ctx, -iAlpha - 1, -iAlpha, uDepth); if ((iAlpha < iScore) && (iScore < iBeta)) { iScore = -Search(ctx, -iBeta, -iAlpha, uDepth); } // // Decide whether to research reduced branches to full depth. // if ((iExtend < 0) && (iScore >= iBeta)) { uDepth += ONE_PLY; ctx->sPlyInfo[ctx->uPly].iExtensionAmount = 0; iScore = -Search(ctx, -iBeta, -iAlpha, uDepth); } UnmakeMove(ctx, mv); ASSERT(PositionsAreEquivalent(&ctx->sPosition, &board)); if (TRUE == g_SplitInfo[u].fTerminate) break; if (iScore > iBestScore) { _UpdateSplitInfo(ctx, mv, iScore, u); } } } while(1); _SetFinalStats(ctx, u); } #endif