examples/wordle/wordle.py

   1 #!/usr/bin/env python3
   2 # -*- coding: utf-8 -*-
   3
   4 """
   5 Wordle!  PLAY, AUTOPLAY, CHEAT, PRECOMPUTE and SELFTEST.
   6 """
   7
   8 import enum
   9 import hashlib
  10 import logging
  11 import math
  12 import multiprocessing
  13 import random
  14 import re
  15 import string
  16 import sys
  17 import time
  18 from collections import Counter, defaultdict
  19 from typing import Any, Dict, List, NamedTuple, Optional, Set, Tuple
  20
  21 from pyutils import ansi, bootstrap, config, list_utils, string_utils
  22 from pyutils.collectionz.shared_dict import SharedDict
  23 from pyutils.files import file_utils
  24 from pyutils.parallelize import executors
  25 from pyutils.parallelize import parallelize as par
  26 from pyutils.parallelize import smart_future
  27 from pyutils.typez import histogram
  28
  29 logger = logging.getLogger(__name__)
  30 args = config.add_commandline_args(
  31     f'Wordle! ({__file__})',
  32     'Args related to cheating at Wordle.',
  33 )
  34 args.add_argument(
  35     '--mode',
  36     type=str,
  37     default='PLAY',
  38     choices=['CHEAT', 'AUTOPLAY', 'SELFTEST', 'PRECOMPUTE', 'PLAY'],
  39     metavar='MODE',
  40     help="""RAW|
  41 Our mode of operation:
  42
  43       PLAY = play wordle with me!  Pick a random solution or
  44              specify a solution with --template.
  45
  46      CHEAT = given a --template and, optionally, --letters_in_word
  47              and/or --letters_to_avoid, return the best guess word;
  48
  49   AUTOPLAY = given a complete word in --template, guess it step
  50              by step showing work;
  51
  52   SELFTEST = autoplay every possible solution keeping track of
  53              wins/losses and average number of guesses;
  54
  55 PRECOMPUTE = populate hash table with optimal guesses.
  56     """,
  57 )
  58 args.add_argument(
  59     '--template',
  60     type=str,
  61     help='The current board in PLAY, CHEAT, AUTOPLAY mode. Use _\'s for unknown letters.',
  62 )
  63 args.add_argument(
  64     '--letters_to_avoid',
  65     type=str,
  66     help='In CHEAT mode, the set of letters known to not be in the solution.',
  67     metavar='LETTERS',
  68 )
  69 args.add_argument(
  70     '--letters_in_word',
  71     type=str,
  72     help="""RAW|
  73 Letters known to be in the solution but whose positions are not yet known.
  74
  75 For example:
  76
  77   t0i23 => says we tried a 't' as the first letter (0) so we
  78            know it's in the word and not there.  We also know
  79            there's an 'i' which is not the middle letter (2)
  80            and is not the 4th letter (3).  Note the zero-based
  81            position counting semantics (i.e. the first letter
  82            is letter 0).
  83   e34f0 => mean we know there's an 'e' and an 'f'; we've attempted
  84            former in positions 3-4 (recall, 4 is the last spot in
  85            a standard 5 letter wordle) and the latter in the
  86            first spot (0) already.
  87     """,
  88     metavar='<LETTER><ZERO-BASED_POSITION(S)_ALREADY_TRIED>...',
  89 )
  90 args.add_argument(
  91     '--solutions_file',
  92     type=str,
  93     default='wordle_solutions.txt',
  94     help='Where can I find a valid word list for solutions?',
  95 )
  96 args.add_argument(
  97     '--guesses_file',
  98     type=str,
  99     default='wordle_guesses.txt',
 100     help='Where can I find a valid word list for guesses?',
 101 )
 102 args.add_argument(
 103     '--hash_file',
 104     type=str,
 105     default='wordle_hash.txt',
 106     help='Where can I find my precomputed hash file?',
 107 )
 108
 109 # Type aliases for code readability
 110 Position = int
 111 Letter = str
 112 Word = str
 113 Fprint = str
 114 Bucket = int
 115
 116
 117 class Hint(enum.IntFlag):
 118     """Green, yellow or gray?"""
 119
 120     GRAY_WRONG = 0
 121     YELLOW_LETTER_RIGHT_POSITION_WRONG = 1
 122     GREEN_LETTER_IN_RIGHT_POSITION = 2
 123
 124
 125 class WordStateUndoType(enum.IntFlag):
 126     """Used to record guess undo information by type."""
 127
 128     LETTER_IN_SOLUTION_MIN_CHANGE = 1
 129     LETTER_IN_SOLUTION_MAX_CHANGE = 2
 130     LETTER_IN_SOLUTION_ADDED_WITH_MIN_MAX = 3
 131     YELLOW_LETTER_ADDED = 4
 132     GREEN_LETTER_ADDED = 5
 133     GRAY_LETTER_ADDED = 6
 134
 135
 136 class WordStateUndo(NamedTuple):
 137     """A record per guess containing all the info needed to undo it."""
 138
 139     guess: Word
 140     hints: Dict[Position, Hint]
 141     mods: List[Dict[str, Any]]
 142
 143
 144 class WordState(object):
 145     """Keeps track of the current board state, previous guesses and hints,
 146     and which letters are known/unknown/eliminated, etc...
 147
 148     """
 149
 150     def __init__(
 151         self,
 152         solution_length: int,
 153         max_letter_population_per_word: Dict[Letter, int],
 154         *,
 155         letters_at_known_positions: Optional[Dict[Position, Letter]] = None,
 156         letters_at_unknown_positions: Optional[Dict[Letter, Set[Position]]] = None,
 157         letters_excluded: Optional[Set[Letter]] = None,
 158     ):
 159         """Initialize the WordState given the length of the solution word
 160         (number of letters), maximum number of times a given letter
 161         may appear in the solution, and, optionally, some letter
 162         restrictions.  All positions below are zero-based.
 163
 164         letters_at_known_positions: position => letter map
 165         letters_at_unknown_positions: letter => set(position(s) tried) map
 166         letters_excluded: set of letters known to not be in the word
 167
 168         """
 169         super().__init__()
 170         assert solution_length > 0
 171         self.solution_length: int = solution_length
 172
 173         # All info necessary to undo a guess later.  We'll add an entry
 174         # for every guess in a stack.
 175         self.undo_info: List[WordStateUndo] = []
 176
 177         # We're going to use internal methods to set up the initial
 178         # state and they all require an undo record to populate... but
 179         # there's no initial guess and we'll never undo beyond here.
 180         # So create a fake undo record for them to scribble on and
 181         # throw it away later.
 182         fake_undo: WordStateUndo = WordStateUndo(
 183             guess="fake, will be thrown away",
 184             hints={},
 185             mods=[],
 186         )
 187
 188         # Across all solutions, how many times did each letter appear
 189         # per word, at max.  For each letter we learn is in the word
 190         # we'll track a min..max valid occurrances; this is the
 191         # initial max.  It's the max times a given letter appears in
 192         # any word in the solution set.
 193         assert max_letter_population_per_word
 194         self.max_letter_population_per_word = max_letter_population_per_word
 195
 196         # The min..max population for every letter we know in the solution.
 197         # The List[int] here has two entries: min and max.
 198         self.letters_in_solution: Dict[Letter, List[int]] = {}
 199
 200         # Green letters by where they are.
 201         self.letters_at_known_positions: Dict[Position, Letter] = {}
 202         if letters_at_known_positions is not None:
 203             for n, letter in letters_at_known_positions.items():
 204                 self.record_green(n, letter, fake_undo)
 205
 206         # Yellow letters to where we've already tried them (i.e. where
 207         # the cannot be.)
 208         self.letters_at_unknown_positions: Dict[Letter, Set[Position]] = defaultdict(
 209             set
 210         )
 211         if letters_at_unknown_positions is not None:
 212             for letter, tried_pos in letters_at_unknown_positions.items():
 213                 for n in tried_pos:
 214                     self.record_yellow(n, letter, fake_undo)
 215
 216         # Excluded letters discovered so far.
 217         self.letters_excluded: Set[Letter] = set()
 218         if letters_excluded is not None:
 219             for letter in letters_excluded:
 220                 self.record_gray(letter, fake_undo)
 221
 222     def get_template(self) -> str:
 223         """Return a representation of the board, e.g. __a_e"""
 224
 225         template = '_' * self.solution_length
 226         for n in self.letters_at_known_positions:
 227             letter = self.letters_at_known_positions[n]
 228             template = template[0:n] + letter + template[n + 1 :]
 229         return template
 230
 231     def get_alphabet(self) -> str:
 232         """Return a colorized set of letters eliminated and still viable."""
 233
 234         letters_remaining = set(string.ascii_lowercase)
 235         green_letters = set()
 236         yellow_letters = set()
 237         for letter in self.letters_excluded:
 238             letters_remaining.remove(letter)
 239         for letter in self.letters_at_known_positions.values():
 240             assert letter in letters_remaining
 241             green_letters.add(letter)
 242         for letter in self.letters_at_unknown_positions.keys():
 243             if len(self.letters_at_unknown_positions[letter]) > 0:
 244                 assert letter in letters_remaining
 245                 yellow_letters.add(letter)
 246         ret = ''
 247         for letter in string.ascii_lowercase:
 248             if letter in letters_remaining:
 249                 if letter in green_letters:
 250                     ret += ansi.bg('forest green')
 251                     ret += ansi.fg('black')
 252                     ret += letter
 253                     ret += ansi.reset()
 254                 elif letter in yellow_letters:
 255                     ret += ansi.bg('energy yellow')
 256                     ret += ansi.fg('black')
 257                     ret += letter
 258                     ret += ansi.reset()
 259                 else:
 260                     ret += letter
 261             else:
 262                 ret += ansi.fg('black olive')
 263                 ret += letter
 264                 ret += ansi.reset()
 265                 # ret += ' '
 266         return ret
 267
 268     def __repr__(self) -> str:
 269         return self.get_alphabet()
 270
 271     def adjust_existing_letters_in_solution_maxes(
 272         self, max_max: int, undo: WordStateUndo
 273     ) -> None:
 274         """We've determined, based on info about letters we know to be in the
 275         solution and their min..max counts, what the maximum max of
 276         any letter should be.  Check all letters we know to be in the
 277         solution and maybe tighten their maxes.  If we do, remember it
 278         for later undo purposes.
 279
 280         """
 281         for letter in self.letters_in_solution.keys():
 282             old_max = self.letters_in_solution[letter][1]
 283             new_max = min(max_max, old_max)
 284             if old_max != new_max:
 285                 self.letters_in_solution[letter][1] = new_max
 286                 undo.mods.append(
 287                     {
 288                         "type": WordStateUndoType.LETTER_IN_SOLUTION_MAX_CHANGE,
 289                         "letter": letter,
 290                         "old_max": old_max,
 291                     }
 292                 )
 293
 294     def record_letter_in_solution(
 295         self,
 296         letter: Letter,
 297         undo: WordStateUndo,
 298         exact_count: Optional[int] = None,
 299     ) -> None:
 300         """Letter is a (maybe new, may existing in a special case, see below)
 301         letter that we know is in the solution.  For each letter we
 302         know is in the solution, we track a min..max window indicating
 303         how many times that letter is permitted to occur in the
 304         solution.  Manage those here.
 305
 306         """
 307
 308         # If letter is already here, allow it to change bounds if it
 309         # came with an exact_count.
 310         if letter in self.letters_in_solution:
 311             if exact_count is not None:
 312                 old_min = self.letters_in_solution[letter][0]
 313                 new_min = exact_count
 314                 if old_min != new_min:
 315                     self.letters_in_solution[letter][0] = new_min
 316                     undo.mods.append(
 317                         {
 318                             "type": WordStateUndoType.LETTER_IN_SOLUTION_MIN_CHANGE,
 319                             "letter": letter,
 320                             "old_min": old_min,
 321                         }
 322                     )
 323                 old_max = self.letters_in_solution[letter][1]
 324                 new_max = exact_count
 325                 if old_max != new_max:
 326                     self.letters_in_solution[letter][1] = new_max
 327                     undo.mods.append(
 328                         {
 329                             "type": WordStateUndoType.LETTER_IN_SOLUTION_MAX_CHANGE,
 330                             "letter": letter,
 331                             "old_max": old_max,
 332                         }
 333                     )
 334
 335                 # Also... since we now know exactly how many of this
 336                 # letter there are, maybe tighten the rest's maxes.
 337                 if exact_count != 1:
 338                     num_letters_known = len(self.letters_in_solution) - 1 + exact_count
 339                     max_max = self.solution_length - (num_letters_known - 1)
 340                     self.adjust_existing_letters_in_solution_maxes(max_max, undo)
 341             return
 342
 343         # If we're here, letter is a newly discovered letter in the
 344         # solution.  Such a letter will likely affect the max count of
 345         # existing letters since, as new letters are discovered, there
 346         # is less room for the other ones in the solution.
 347         if exact_count is None:
 348             num_letters_known = len(self.letters_in_solution) + 1
 349         else:
 350             num_letters_known = len(self.letters_in_solution) + exact_count
 351         max_max = self.solution_length - (num_letters_known - 1)
 352         self.adjust_existing_letters_in_solution_maxes(max_max, undo)
 353
 354         # Finally, add the new letter's record with its initial min/max.
 355         if exact_count is None:
 356             initial_max = min(
 357                 max_max,
 358                 self.max_letter_population_per_word[letter],
 359             )
 360             initial_min = 1
 361         else:
 362             initial_max = exact_count
 363             initial_min = exact_count
 364         self.letters_in_solution[letter] = [initial_min, initial_max]
 365         undo.mods.append(
 366             {
 367                 "type": WordStateUndoType.LETTER_IN_SOLUTION_ADDED_WITH_MIN_MAX,
 368                 "min": initial_min,
 369                 "max": initial_max,
 370                 "letter": letter,
 371             }
 372         )
 373
 374     def record_yellow(
 375         self,
 376         n: Position,
 377         letter: Letter,
 378         undo: WordStateUndo,
 379         num_correct_doppelgangers: Optional[int] = None,
 380     ) -> None:
 381         """We've discovered a new yellow letter or a new place where an
 382         existing yellow letter is still yellow.  If
 383         num_correct_doppelgangers is non-None, it means this letter is
 384         wrong (gray) in the current location but right (green or
 385         yellow) in other locations and we now know the precise count
 386         of this letter in the solution -- tell the
 387         record_letter_in_solution method.
 388
 389         """
 390         if n not in self.letters_at_unknown_positions[letter]:
 391             if (
 392                 len(self.letters_at_unknown_positions[letter]) == 0
 393                 or num_correct_doppelgangers is not None
 394             ):
 395                 self.record_letter_in_solution(letter, undo, num_correct_doppelgangers)
 396             self.letters_at_unknown_positions[letter].add(n)
 397             undo.mods.append(
 398                 {
 399                     "type": WordStateUndoType.YELLOW_LETTER_ADDED,
 400                     "letter": letter,
 401                     "position": n,
 402                 }
 403             )
 404
 405     def record_green(self, n: Position, letter: Letter, undo: WordStateUndo):
 406         """We found a new green letter."""
 407
 408         if n not in self.letters_at_known_positions:
 409             self.letters_at_known_positions[n] = letter
 410             undo.mods.append(
 411                 {
 412                     "type": WordStateUndoType.GREEN_LETTER_ADDED,
 413                     "letter": letter,
 414                     "position": n,
 415                 }
 416             )
 417             self.record_letter_in_solution(letter, undo)
 418
 419     def record_gray(self, letter: Letter, undo: WordStateUndo) -> None:
 420         """We found a new gray letter."""
 421
 422         if letter not in self.letters_excluded:
 423             self.letters_excluded.add(letter)
 424             undo.mods.append(
 425                 {
 426                     "type": WordStateUndoType.GRAY_LETTER_ADDED,
 427                     "letter": letter,
 428                 }
 429             )
 430
 431     def record_guess_and_hint(self, guess: Word, hints: Dict[Position, Hint]):
 432         """Make a guess and change state based on the hint.  Remember how to
 433         undo everything later.
 434
 435         """
 436         undo = WordStateUndo(guess=guess, hints=hints, mods=[])
 437         for n, letter in enumerate(guess):
 438             hint = hints[n]
 439
 440             if hint is Hint.GRAY_WRONG:
 441                 # Exclude letters we got WRONG _unless_ we guessed the
 442                 # same letter elsewhere and got a yellow/green there.
 443                 # In this case the WRONG hint is not saying this
 444                 # letter isn't in the word (it is) but rather that
 445                 # there aren't N+1 instances of this letter.  In this
 446                 # edge case we _do_ know that the letter is not valid
 447                 # in this position, though; treat it as yellow.
 448                 num_correct_doppelgangers = 0
 449                 for k, other in enumerate(guess):
 450                     if other == letter and n != k and hints[k] != Hint.GRAY_WRONG:
 451                         num_correct_doppelgangers += 1
 452                 if num_correct_doppelgangers == 0:
 453                     self.record_gray(letter, undo)
 454                 else:
 455                     # Treat this as a yellow; we know letter can't go
 456                     # here or it would have been yellow/green.
 457                     self.record_yellow(n, letter, undo, num_correct_doppelgangers)
 458             elif hint is Hint.YELLOW_LETTER_RIGHT_POSITION_WRONG:
 459                 self.record_yellow(n, letter, undo)
 460             elif hint is Hint.GREEN_LETTER_IN_RIGHT_POSITION:
 461                 self.record_green(n, letter, undo)
 462         self.undo_info.append(undo)
 463
 464     def undo_guess(self) -> None:
 465         """Unmake a guess and revert back to a previous state."""
 466
 467         assert len(self.undo_info) > 0
 468         undo = self.undo_info[-1]
 469         self.undo_info = self.undo_info[:-1]
 470
 471         # We iterate the mods in reverse order so we never have weird
 472         # apply/undo ordering artifacts.
 473         for mod in reversed(undo.mods):
 474             mod_type = mod['type']
 475             letter = mod['letter']
 476             if mod_type is WordStateUndoType.GRAY_LETTER_ADDED:
 477                 self.letters_excluded.remove(letter)
 478             elif mod_type is WordStateUndoType.YELLOW_LETTER_ADDED:
 479                 pos = mod['position']
 480                 self.letters_at_unknown_positions[letter].remove(pos)
 481             elif mod_type is WordStateUndoType.GREEN_LETTER_ADDED:
 482                 pos = mod['position']
 483                 del self.letters_at_known_positions[pos]
 484             elif mod_type is WordStateUndoType.LETTER_IN_SOLUTION_MIN_CHANGE:
 485                 old_min = mod['old_min']
 486                 self.letters_in_solution[letter][0] = old_min
 487             elif mod_type is WordStateUndoType.LETTER_IN_SOLUTION_MAX_CHANGE:
 488                 old_max = mod['old_max']
 489                 self.letters_in_solution[letter][1] = old_max
 490             elif mod_type is WordStateUndoType.LETTER_IN_SOLUTION_ADDED_WITH_MIN_MAX:
 491                 del self.letters_in_solution[letter]
 492
 493
 494 class AutoplayOracle(object):
 495     """The class that knows the right solution and can give hints in
 496     response to guesses.
 497
 498     """
 499
 500     def __init__(self, solution: Word):
 501         super().__init__()
 502         self.solution = solution.lower()
 503         self.solution_letter_count = Counter(self.solution)
 504         self.solution_letter_to_pos = defaultdict(set)
 505         for n, letter in enumerate(self.solution):
 506             self.solution_letter_to_pos[letter].add(n)
 507         self.hint_quota_by_letter = Counter(self.solution)
 508
 509     def judge_guess(self, guess: Word) -> Dict[Position, Hint]:
 510         """Returns a mapping from position -> Hint to indicate
 511         whether each letter in the guess string is green, yellow
 512         or gray.
 513
 514         """
 515         assert len(guess) == len(self.solution)
 516         hint_by_pos: Dict[Position, Hint] = {}
 517         letter_to_pos = defaultdict(set)
 518         hint_quota_by_letter = self.hint_quota_by_letter.copy()
 519
 520         for position, letter in enumerate(guess):
 521             letter_to_pos[letter].add(position)
 522
 523             if letter == self.solution[position]:  # green
 524                 if hint_quota_by_letter[letter] == 0:
 525                     # We must tell them this letter is green in this
 526                     # position but we've exhausted our quota of hints
 527                     # for this letter.  There must exist some yellow
 528                     # hint we previously gave that we need to turn to
 529                     # gray (wrong).
 530                     for other in letter_to_pos[letter]:
 531                         if other != position:
 532                             if (
 533                                 hint_by_pos[other]
 534                                 == Hint.YELLOW_LETTER_RIGHT_POSITION_WRONG
 535                             ):
 536                                 hint_by_pos[other] = Hint.GRAY_WRONG
 537                                 break
 538                 hint_by_pos[position] = Hint.GREEN_LETTER_IN_RIGHT_POSITION
 539
 540             elif self.solution_letter_count[letter] > 0:  # yellow
 541                 if hint_quota_by_letter[letter] == 0:
 542                     # We're out of hints for this letter.  All the
 543                     # other hints must be green or yellow.  Tell them
 544                     # this one is gray (wrong).
 545                     hint_by_pos[position] = Hint.GRAY_WRONG
 546                 else:
 547                     hint_by_pos[position] = Hint.YELLOW_LETTER_RIGHT_POSITION_WRONG
 548
 549             else:  # gray
 550                 hint_by_pos[position] = Hint.GRAY_WRONG
 551             if hint_quota_by_letter[letter] > 0:
 552                 hint_quota_by_letter[letter] -= 1
 553         return hint_by_pos
 554
 555
 556 class AutoPlayer(object):
 557     """This is the class that knows how to guess given the current game
 558     state along with several support methods.
 559
 560     """
 561
 562     def __init__(self):
 563         super().__init__()
 564         self.solution_length = None
 565
 566         # Guess judge
 567         self.oracle = None
 568
 569         # Board state tracker and move undoer
 570         self.word_state = None
 571
 572         # The position hash has known best guesses for some subset of
 573         # remaining words.  See --mode=PRECOMPUTE for how to populate.
 574         self.position_hash = {}
 575         filename = config.config['hash_file']
 576         if filename is not None and file_utils.is_readable(filename):
 577             logger.debug('Initializing position hash from %s...', filename)
 578             with open(filename, 'r') as rf:
 579                 for line in rf:
 580                     line = line[:-1]
 581                     line = line.strip()
 582                     line = re.sub(r'#.*$', '', line)
 583                     if len(line) == 0:
 584                         continue
 585                     (key, word) = line.split(':')
 586                     (count, fprint) = key.split('@')
 587                     count = count.strip()
 588                     count = int(count)
 589                     fprint = fprint.strip()
 590                     word = word.strip()
 591                     self.position_hash[(count, fprint)] = word
 592             logger.debug('...hash contains %s entries.', len(self.position_hash))
 593
 594         # All legal solutions pre-sorted by length.
 595         self.all_possible_solutions_by_length = defaultdict(list)
 596         filename = config.config['solutions_file']
 597         if filename is not None and file_utils.is_readable(filename):
 598             logger.debug('Initializing valid solution word list from %s...', filename)
 599             with open(filename) as rf:
 600                 for word in rf:
 601                     word = word[:-1]
 602                     word = word.lower()
 603                     self.all_possible_solutions_by_length[len(word)].append(word)
 604         else:
 605             logger.error('A valid --solutions_file is required.')
 606             sys.exit(0)
 607
 608         # All legal guesses pre-sorted by length.
 609         self.all_possible_guesses_by_length = defaultdict(list)
 610         filename = config.config['guesses_file']
 611         if filename is not None and file_utils.is_readable(filename):
 612             logger.debug('Initializing legal guess word list from %s...', filename)
 613             with open(filename) as rf:
 614                 for word in rf:
 615                     word = word[:-1]
 616                     word = word.lower()
 617                     self.all_possible_guesses_by_length[len(word)].append(word)
 618         else:
 619             logger.error('A valid --guesses_file is required.')
 620             sys.exit(0)
 621
 622     def new_word(
 623         self,
 624         length: int,
 625         oracle: Optional[AutoplayOracle],
 626         word_state: Optional[WordState],
 627     ) -> None:
 628         """Here's a new word to guess.  Reset state to play a new game.  On
 629         principle we don't give this class the solution.  Instead, we
 630         just give it length to indicate the number of characters in
 631         the solution.  Guesses will be turned into hints by the
 632         oracle.  The current game state is tracked by the
 633         word_state.
 634
 635         """
 636         self.solution_length = length
 637         self.oracle = oracle
 638         self.word_state = word_state
 639
 640     def get_all_possible_solutions(self) -> List[Word]:
 641         """Given the current known word state, compute the subset of all
 642         possible solutions that are still possible.  Note: this method
 643         guarantees to return the possible solutions list in sorted
 644         order.
 645
 646         """
 647
 648         def is_possible_solution(solution: Word, word_state: WordState) -> bool:
 649             """Note: very perf sensitive code; inner loop."""
 650
 651             letters_seen: Dict[Letter, int] = defaultdict(int)
 652             for n, letter in enumerate(solution):
 653                 # The word can't contain letters we know aren't in the
 654                 # solution.
 655                 if letter in word_state.letters_excluded:
 656                     return False
 657
 658                 # If we know a letter is in a position, solution words
 659                 # must have that letter in that position.
 660                 if (
 661                     n in word_state.letters_at_known_positions
 662                     and letter != word_state.letters_at_known_positions[n]
 663                 ):
 664                     return False
 665
 666                 # If we already tried this letter in this position and
 667                 # it wasn't green, this isn't a possible solution.
 668                 if n in word_state.letters_at_unknown_positions[letter]:
 669                     return False
 670
 671                 letters_seen[letter] += 1
 672
 673             # Finally, the word must include all letters presently
 674             # known to be in the solution to be viable.
 675             for letter, min_max in word_state.letters_in_solution.items():
 676                 num_seen = letters_seen[letter]
 677                 if num_seen < min_max[0]:
 678                     return False
 679                 elif num_seen > min_max[1]:
 680                     return False
 681             return True
 682
 683         possible_solutions = []
 684         for word in self.all_possible_solutions_by_length[self.solution_length]:
 685             if is_possible_solution(word, self.word_state):
 686                 possible_solutions.append(word)
 687
 688         # Note: because self.all_possible_solutions_by_length is sorted
 689         # and we iterated it in order, possible_solutions is also sorted
 690         # already.
 691         # assert possible_solutions == sorted(possible_solutions)
 692         return possible_solutions
 693
 694     def get_frequency_and_frequency_by_position_tables(
 695         self,
 696         possible_solutions: List[Word],
 697     ) -> Tuple[Dict[Letter, float], List[Dict[Letter, float]]]:
 698         """This method is used by heuristic mode.  It returns two tables:
 699
 700         1. The frequency count by letter for words in possible_solutions
 701            to encourage guesses composed of common letters.
 702         2. A letter-in-position bonus term to encourage guesses with
 703            letters in positions that occur frequently in the solutions.
 704
 705         """
 706         template = self.word_state.get_template()
 707         pfreq: List[Dict[Letter, float]] = []
 708         pop_letters: List[Dict[Letter, int]] = []
 709         for n in range(len(template)):
 710             pop_letters.append(defaultdict(int))
 711             pfreq.append({})
 712
 713         freq: Dict[Letter, float] = {}
 714         for word in possible_solutions:
 715             letter_filter = set()
 716             for n, letter in enumerate(word):
 717
 718                 # Only count frequency of letters still to be filled in.
 719                 if template[n] != '_':
 720                     continue
 721
 722                 # Do not give a full frequency bonus to repeated letters.
 723                 if letter not in letter_filter:
 724                     freq[letter] = freq.get(letter, 0) + 1
 725                     letter_filter.add(letter)
 726                 else:
 727                     freq[letter] += 0.1
 728                 pop_letters[n][letter] += 1  # type: ignore
 729
 730         # Reward guesses that place the most popular letter in a position.
 731         # Save the top 3 letters per position.
 732         # don't give a bonus if letters spread all over the place?
 733         # only give a bonus to a given letter in one position?
 734         total_letters_in_position = len(possible_solutions)
 735         for n in range(len(template)):
 736             for letter, count in sorted(pop_letters[n].items(), key=lambda x: -x[1]):
 737                 if count <= 1:
 738                     break
 739                 normalized = count / total_letters_in_position
 740                 assert 0.0 < normalized <= 1.0
 741                 if normalized > 0.08:
 742                     pfreq[n][letter] = normalized
 743                 else:
 744                     break
 745         return (freq, pfreq)
 746
 747     def dump_frequency_data(
 748         self,
 749         possible_solutions: List[Word],
 750         letter_frequency: Dict[Letter, float],
 751         letter_position_frequency: List[Dict[Letter, float]],
 752     ) -> None:
 753         """Just logs the frequency tables we computed above."""
 754
 755         logger.debug('Unknown letter(s) frequency table: ')
 756         out = ''
 757         for letter, weight in sorted(letter_frequency.items(), key=lambda x: -x[1]):
 758             out += f'{letter}:{weight}, '
 759         if len(out) > 0:
 760             out = out[:-2]
 761             logger.debug(out)
 762
 763         logger.debug('Unknown letter-in-position bonus table: ')
 764         out = ''
 765         for n in range(len(possible_solutions[0])):
 766             pop = letter_position_frequency[n]
 767             for letter, weight in sorted(pop.items(), key=lambda x: -x[1]):
 768                 out += f'pos{n}:{letter}@{weight:.5f}, '
 769         if len(out) > 0:
 770             out = out[:-2]
 771             logger.debug(out)
 772
 773     def position_in_hash(self, num_potential_solutions: int, fprint: Fprint):
 774         """Is a position in our hash table?"""
 775         return (num_potential_solutions, fprint) in self.position_hash
 776
 777     def guess_word(self) -> Optional[Word]:
 778         """Compute a guess word and return it.  Returns None on error."""
 779
 780         template = self.word_state.get_template()
 781         possible_solutions = self.get_all_possible_solutions()
 782         num_possible_solutions = len(possible_solutions)
 783         fprint = hashlib.md5(repr(possible_solutions).encode('ascii')).hexdigest()
 784
 785         n = num_possible_solutions
 786         logger.debug(
 787             string_utils.make_contractions(
 788                 f'There {string_utils.is_are(n)} {n} word{string_utils.pluralize(n)} '
 789                 + f'({template} @ {fprint}).'
 790             )
 791         )
 792         if num_possible_solutions < 30:
 793             logger.debug(
 794                 string_utils.make_contractions(
 795                     f'{string_utils.capitalize_first_letter(string_utils.it_they(n))} '
 796                     + f'{string_utils.is_are(n)}: {possible_solutions}'
 797                 )
 798             )
 799         logger.debug(
 800             'Letter count restrictions: %s', self.word_state.letters_in_solution
 801         )
 802         if num_possible_solutions == 0:
 803             logger.error('No possible solutions?!')
 804             print('No possible solutions?!', file=sys.stderr)
 805             print(self.word_state)
 806             print(self.word_state.letters_in_solution)
 807             return None
 808
 809         elif num_possible_solutions == 1:
 810             return possible_solutions[0]
 811
 812         # Check the hash table for a precomputed best guess.
 813         elif self.position_in_hash(num_possible_solutions, fprint):
 814             guess = self.position_hash[(num_possible_solutions, fprint)]
 815             logger.debug('hash hit: %s', guess)
 816             return guess
 817
 818         # If there are just a few solutions possible, brute force the
 819         # guess.  This is expensive: for every possible solution it
 820         # computes the entropy of every possible guess.  Not fast for
 821         # large numbers of solutions.
 822         elif num_possible_solutions < 20:
 823             logger.debug(
 824                 'Only %d solutions; using brute force strategy.', num_possible_solutions
 825             )
 826             return self.brute_force_internal(
 827                 possible_solutions,
 828                 self.all_possible_guesses_by_length[len(possible_solutions[0])],
 829                 'brute_force',
 830             )
 831
 832         # A hybrid approach: narrow down the guess list using
 833         # heuristic scoring and then compute the entropy of the topN
 834         # guesses via brute force.
 835         elif num_possible_solutions < 100:
 836             logger.debug(
 837                 'Only %d solutions; using hybrid strategy.', num_possible_solutions
 838             )
 839             return self.hybrid_search(possible_solutions)
 840
 841         # There are a lot of words left; score the guesses based on
 842         # fast heuristics (i.e. letter frequency).
 843         else:
 844             logger.debug(
 845                 'There are %s solutions; using fast heuristics.', num_possible_solutions
 846             )
 847             return self.heuristics_search(possible_solutions)
 848
 849     def brute_force_internal(
 850         self,
 851         possible_solutions: List[Word],
 852         all_guesses: List[Word],
 853         label: str,
 854     ) -> Optional[Word]:
 855         """Assume every possible solution is the answer, in turn.  For each
 856         one, try all_guesses and pay attention to the hint we would
 857         get back.  Compute the entropy of each guess and return the
 858         guess with the highest entropy -- i.e. the guess that gives us
 859         the most information on average; or, the guess whose results
 860         would take the most bits to represent.
 861
 862         Note, this is expensive.  O(num_possible_solutions * all_guesses)
 863
 864         """
 865         num_possible_solutions = len(possible_solutions)
 866         if num_possible_solutions == 1:
 867             return possible_solutions[0]
 868         possible_solutions_set = set(possible_solutions)  # for O(1) lookups
 869
 870         # Buckets count distinct outcomes of a guess.  e.g. if a guess
 871         # yields the hint G_Y__ that outcome is assigned a bucket
 872         # number and we will increment the count of how many times
 873         # that outcome happened for this guess across all possible
 874         # solutions.
 875         bucket_population_by_guess: Dict[Word, Dict[Bucket, int]] = {}
 876         for guess in all_guesses:
 877             bucket_population_by_guess[guess] = defaultdict(int)
 878
 879         # Pretend that each possible solution is the real solution
 880         # and make every possible guess for each.
 881         for solution in possible_solutions:
 882             oracle = AutoplayOracle(solution)
 883             for guess in all_guesses:
 884                 hints = oracle.judge_guess(guess)
 885
 886                 # Note: this is a really good way to make sure that
 887                 # make/unmake moves works:
 888                 #
 889                 # before = self.word_state.__repr__()
 890                 self.word_state.record_guess_and_hint(guess, hints)
 891                 # during = self.word_state.__repr__()
 892
 893                 # Map the hint returned into a bucket number and
 894                 # keep track of population per bucket.  Below:
 895                 #
 896                 #   n is a position := {0, 1, 2, 3, 4}
 897                 #   hint[n] := {1, 2, 3}
 898                 #
 899                 bucket: Bucket = 0
 900                 for n in range(len(guess)):
 901                     bucket += hints[n] * (3**n)
 902                 bucket_population_by_guess[guess][bucket] += 1
 903                 self.word_state.undo_guess()
 904
 905                 # after = self.word_state.__repr__()
 906                 # if before != after:
 907                 #    print(f'BEFORE: {before}')
 908                 #    print(f'  WORD: {colorize_guess(guess, hints)}')
 909                 #    print(f'  HINT: {hints}')
 910                 #    print(f'DURING: {during}')
 911                 #    print(f' AFTER: {after}')
 912                 #    assert False
 913
 914         # Compute the entropy of every guess across all possible
 915         # solutions:
 916         #
 917         # https://markmliu.medium.com/what-in-the-wordle-5dc5ed94fe2
 918         # https://machinelearningmastery.com/what-is-information-entropy/
 919         # https://en.wikipedia.org/wiki/Entropy_(information_theory)
 920         best_entropy = None
 921         best_guess = None
 922         entropy: Dict[Word, float] = {}
 923         for guess in all_guesses:
 924             entropy[guess] = 0.0
 925             for bucket in bucket_population_by_guess[guess]:
 926
 927                 # We counted how many times this outcome occurred.
 928                 # The probabilty of this outcome = count / total.
 929                 p = float(bucket_population_by_guess[guess][bucket])
 930                 p /= num_possible_solutions
 931                 entropy[guess] += p * math.log2(p)
 932             entropy[guess] = -entropy[guess]
 933
 934             if best_entropy is None:
 935                 best_entropy = entropy[guess]
 936                 best_guess = guess
 937             else:
 938
 939                 # We always choose the guess with the highest entropy
 940                 # because this guess gives us the most information in
 941                 # the average case, i.e. it takes the most bits to
 942                 # represent the average outcome of this guess.
 943                 #
 944                 # However, in practice, usually several guesses tie
 945                 # for best.  Prefer guesses that are also potential
 946                 # solutions (not just legal guesses)
 947                 if entropy[guess] > best_entropy or (
 948                     entropy[guess] == best_entropy and guess in possible_solutions_set
 949                 ):
 950                     best_entropy = entropy[guess]
 951                     best_guess = guess
 952
 953         # This is just logging the results.  Display the guesses with
 954         # the highest entropy but also display the entropy of every
 955         # possible solution, too, even if they are not best.
 956         possible_solutions_seen = 0
 957         best_entropy = None
 958         best_count = 0
 959         for n, (guess, guess_entropy) in enumerate(
 960             sorted(entropy.items(), key=lambda x: -x[1])
 961         ):
 962             if best_entropy is None:
 963                 best_entropy = guess_entropy
 964             if guess in possible_solutions_set:
 965                 possible_solutions_seen += 1
 966                 logger.debug(
 967                     f'{label}: #{n}: {guess} with {guess_entropy:.5f} bits <--'
 968                 )
 969             elif guess_entropy == best_entropy and best_count < 15:
 970                 logger.debug(f'{label}: #{n}: {guess} with {guess_entropy:.5f} bits')
 971                 best_count += 1
 972         logger.debug('%s: best guess is %s.', label, best_guess)
 973         return best_guess
 974
 975     def hybrid_search(self, possible_solutions: List[Word]) -> Optional[Word]:
 976         """Use heuristic scoring to reduce the number of guesses before
 977         calling brute force search.
 978
 979         """
 980         (freq, pfreq) = self.get_frequency_and_frequency_by_position_tables(
 981             possible_solutions
 982         )
 983         self.dump_frequency_data(possible_solutions, freq, pfreq)
 984         scored_guesses = self.assign_scores(possible_solutions, freq, pfreq)
 985         best_guess = None
 986
 987         topn_guesses = []
 988         count = 1
 989         for guess, score in sorted(scored_guesses.items(), key=lambda x: -x[1]):
 990             logger.debug(f'hybrid: heuristic #{count} = {guess} with {score:.5f}')
 991             topn_guesses.append(guess)
 992             count += 1
 993             if count > 10:
 994                 break
 995
 996         best_guess = self.brute_force_internal(
 997             possible_solutions, topn_guesses, 'hybrid: brute_force'
 998         )
 999         return best_guess
1000
1001     def assign_scores(
1002         self,
1003         possible_solutions: List[Word],
1004         freq: Dict[Letter, float],
1005         pfreq: List[Dict[Letter, float]],
1006     ) -> Dict[Word, float]:
1007         """Given some letter frequency and letter+position bonus data, assign
1008         a score to every possible guess.
1009
1010         """
1011         num_possible_solutions = len(possible_solutions)
1012         assert num_possible_solutions > 1
1013         template = self.word_state.get_template()
1014
1015         # Give every word a score composed of letter frequencies and letter
1016         # in position frequencies.  This score attempts to approximate the
1017         # result of brute_force_search less expensively.
1018         word_scores: Dict[Word, float] = {}
1019         for guess in possible_solutions:
1020             freq_term = 0.0
1021             pfreq_term = 0.0
1022
1023             letter_filter = set()
1024             for n, letter in enumerate(guess):
1025
1026                 # Ignore letters we already know.
1027                 if template[n] != '_':
1028                     continue
1029
1030                 # Is there a bonus for this letter in this position?  (pfreq)
1031                 pop = pfreq[n]
1032                 if letter in pop:
1033                     pfreq_term += pop[letter] * num_possible_solutions / 4
1034
1035                 # Don't count freq bonus more than once per letter.  (freq)
1036                 if letter in letter_filter:
1037                     continue
1038                 freq_term += freq.get(letter, 0.0)
1039                 letter_filter.add(letter)
1040             score = freq_term + pfreq_term
1041             word_scores[guess] = score
1042         return word_scores
1043
1044     def heuristics_search(self, possible_solutions: List[Word]) -> Word:
1045         """The dumbest but fastest search.  Use fast heuristics to score each
1046         possible guess and then guess the one with the highest
1047         score.
1048
1049         """
1050         (freq, pfreq) = self.get_frequency_and_frequency_by_position_tables(
1051             possible_solutions
1052         )
1053         self.dump_frequency_data(possible_solutions, freq, pfreq)
1054         scored_guesses = self.assign_scores(possible_solutions, freq, pfreq)
1055         best_guess = None
1056         for n, (guess, score) in enumerate(
1057             sorted(scored_guesses.items(), key=lambda x: -x[1])
1058         ):
1059             if best_guess is None:
1060                 best_guess = guess
1061             if n < 20:
1062                 logger.debug(f'heuristic: #{n}: {guess} with {score:.5f}')
1063         assert best_guess is not None
1064         return best_guess
1065
1066
1067 def colorize_guess(guess: Word, hints: Dict[Position, Hint]) -> str:
1068     """Use hints to make the letters green/yellow/gray."""
1069     ret = ''
1070     for n, letter in enumerate(guess):
1071         hint = hints[n]
1072         if hint is Hint.GRAY_WRONG:
1073             ret += ansi.bg('mist gray')
1074         elif hint is Hint.YELLOW_LETTER_RIGHT_POSITION_WRONG:
1075             ret += ansi.bg('energy yellow')
1076         elif hint is Hint.GREEN_LETTER_IN_RIGHT_POSITION:
1077             ret += ansi.bg('forest green')
1078         ret += ansi.fg('black')
1079         ret += letter
1080         ret += ansi.reset()
1081     return ret
1082
1083
1084 def get_max_letter_population() -> Dict[Letter, int]:
1085     filename = config.config['solutions_file']
1086     max_letter_population_per_word: Dict[Letter, int] = defaultdict(int)
1087     if filename is not None and file_utils.is_readable(filename):
1088         logger.debug(
1089             'Figuring out all letters\' max frequency in the solution space...'
1090         )
1091         with open(filename) as rf:
1092             for word in rf:
1093                 word = word[:-1]
1094                 word = word.lower()
1095                 letter_pop = Counter(word)
1096                 for letter, pop in letter_pop.items():
1097                     if pop > max_letter_population_per_word[letter]:
1098                         max_letter_population_per_word[letter] = pop
1099     else:
1100         logger.error('A valid --solutions_file is required.')
1101         sys.exit(0)
1102     return max_letter_population_per_word
1103
1104
1105 def autoplay(
1106     solution: Word,
1107     oracle: AutoplayOracle,
1108     word_state: WordState,
1109     player: AutoPlayer,
1110     quiet: bool = False,
1111 ):
1112     """Make guesses one by one until arriving at a known solution."""
1113     if not quiet:
1114         logger.debug('Autoplayer mode...')
1115     player.new_word(len(solution), oracle, word_state)
1116
1117     guesses = []
1118     while True:
1119         guess = player.guess_word()
1120         if guess is not None:
1121             hints = oracle.judge_guess(guess)
1122             word_state.record_guess_and_hint(guess, hints)
1123             guesses.append(guess)
1124             if not quiet:
1125                 colorized_guess = colorize_guess(guess, hints)
1126                 print(f'Guess #{len(guesses)}: {colorized_guess} => {word_state}')
1127         if guess == solution:
1128             break
1129         if guess is None:
1130             logger.error(f'"{solution}" is not in my --solutions_file.')
1131             break
1132     return guesses
1133
1134
1135 def cheat():
1136     """Cheater!  Given a board state, determine the best guess.  Note that
1137     in this mode the solution is not known.
1138
1139     """
1140     logger.debug('Cheater mode...')
1141
1142     template = config.config['template']
1143     assert template
1144
1145     # Extract known letter positions from the template.
1146     template = template.lower()
1147     letters_at_known_positions = {}
1148     for n, letter in enumerate(template):
1149         if letter != '_':
1150             letters_at_known_positions[n] = letter
1151
1152     # Initialize set of letters to be avoided.
1153     avoid = config.config['letters_to_avoid']
1154     if not avoid:
1155         avoid = ''
1156     avoid = avoid.lower()
1157     letters_to_avoid = set(list(avoid))
1158
1159     # Initialize the set of letters we know are in the solution but
1160     # not where, yet.
1161     in_word = config.config['letters_in_word']
1162     if not in_word:
1163         in_word = ''
1164     in_word = in_word.lower()
1165
1166     # This is parsing out the --letters_in_word argument.  The
1167     # format is:
1168     #
1169     #   <letter><1 or more zero-based positions we already tried it>
1170     #
1171     # So, if we know an E is in the word (i.e. it's yellow) and
1172     # we tried it in the first and third letter already:
1173     #
1174     #   e02
1175     #
1176     # Note: 0 means "the first letter", i.e. position is zero based.
1177     #
1178     # You can stack letters this way.  e.g.:
1179     #
1180     #   e02a3
1181     letters_in_word_at_unknown_position = defaultdict(set)
1182     last_letter = None
1183     for letter in in_word:
1184         if letter.isdigit():
1185             assert last_letter
1186             letters_in_word_at_unknown_position[last_letter].add(int(letter))
1187         elif letter.isalpha():
1188             last_letter = letter
1189
1190     max_letter_pop_per_word = get_max_letter_population()
1191     word_state = WordState(
1192         len(template),
1193         max_letter_pop_per_word,
1194         letters_at_known_positions=letters_at_known_positions,
1195         letters_at_unknown_positions=letters_in_word_at_unknown_position,
1196         letters_excluded=letters_to_avoid,
1197     )
1198
1199     player = AutoPlayer()
1200     player.new_word(len(template), None, word_state)
1201     return player.guess_word()
1202
1203
1204 def selftest():
1205     """Autoplay every possible solution and pay attention to statistics."""
1206
1207     logger.debug('Selftest mode...')
1208     total_guesses = 0
1209     total_words = 0
1210     max_guesses = None
1211     max_guesses_words = []
1212     every_guess = set()
1213     hist = histogram.SimpleHistogram(
1214         [(1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 100)]
1215     )
1216     top_guess_number = defaultdict(dict)
1217     num_losses = 0
1218
1219     player = AutoPlayer()
1220     with open(config.config['solutions_file'], 'r') as rf:
1221         contents = rf.readlines()
1222
1223     max_letter_pop_per_word = get_max_letter_population()
1224     start = time.time()
1225     for word in contents:
1226         word = word[:-1]
1227         word = word.lower()
1228         if len(word) != 5:
1229             logger.warning(
1230                 f'Found word "{word}" in solutions file that is not 5 letters in length.  Skipping it.'
1231             )
1232             continue
1233         oracle = AutoplayOracle(word)
1234         word_state = WordState(len(word), max_letter_pop_per_word)
1235         player.new_word(len(word), oracle, word_state)
1236
1237         total_words += 1
1238         runtime = time.time() - start
1239         print(
1240             f'{total_words} / {len(contents)} ("{word}") = {total_words/len(contents)*100.0:.2f}% | {total_guesses/total_words:.3f} guesses/word | {runtime:.1f}s @ {runtime/total_words:.3f}s/word\r',
1241             end='',
1242         )
1243         if total_words % 100 == 0:
1244             print(f'\nAfter {total_words} words:')
1245             print(
1246                 f'...I made {total_guesses} guesses; ({total_guesses/total_words:.3f}/word)'
1247             )
1248             print(
1249                 f'...Max guesses was {max_guesses} for {max_guesses_words}; I lost {num_losses} times.'
1250             )
1251             print(f'...I made {len(every_guess)} total distinct "interior" guesses.')
1252             print()
1253
1254         guesses = autoplay(word, oracle, word_state, player, True)
1255         guess_count = len(guesses)
1256         if guess_count > 6:
1257             num_losses += 1
1258         hist.add_item(guess_count)
1259         total_guesses += guess_count
1260         for n, guess in enumerate(guesses):
1261             tops = top_guess_number[n]
1262             tops[guess] = tops.get(guess, 0) + 1
1263             if n != len(guesses) - 1:
1264                 every_guess.add(guess)
1265         if max_guesses is None or guess_count > max_guesses:
1266             max_guesses = guess_count
1267             max_guesses_words = [word]
1268         elif guess_count == max_guesses:
1269             max_guesses_words.append(word)
1270     print("\nFinal Report:")
1271     print("-------------")
1272     print(f'On {total_words} words:')
1273     print(
1274         f'...I made {total_guesses} guesses; ({total_guesses / total_words:.3f} / word)'
1275     )
1276     print(
1277         f'...Max guesses was {max_guesses} for {max_guesses_words}; I lost {num_losses} times.'
1278     )
1279     print(f'...I made {len(every_guess)} total distinct "interior" guesses.')
1280     print()
1281     for n in range(0, 8):
1282         top_guesses = top_guess_number[n]
1283         num = 0
1284         out = ''
1285         print(f'Top guesses #{n+1}: ', end='')
1286         for guess, count in sorted(top_guesses.items(), key=lambda x: -x[1]):
1287             out += f'{guess}@{count}, '
1288             num += 1
1289             if num > 8:
1290                 break
1291         out = out[:-2]
1292         print(out)
1293     print()
1294     print(hist)
1295
1296
1297 @par.parallelize(method=par.Method.PROCESS)
1298 def do_words(
1299     solutions: List[Word],
1300     shard_num: int,
1301     shared_cache_name: str,
1302     lock: multiprocessing.RLock,
1303     max_letter_pop_per_word: Dict[Letter, int],
1304 ):
1305     """Work on precomputing one shard of the solution space, in parallel."""
1306
1307     logger.debug(f'Shard {shard_num} owns solutions {solutions[0]}..{solutions[-1]}.')
1308     player = AutoPlayer()
1309     length = len(solutions[0])
1310     shared_cache = SharedDict(shared_cache_name, 0, lock)
1311     begin = solutions[0]
1312     end = solutions[-1]
1313
1314     passes = 0
1315     while True:
1316         num_computed = 0
1317         passes += 1
1318         assert passes < 10
1319         local_cache: Dict[Tuple[int, Fprint], Word] = {}
1320
1321         for n, solution in enumerate(solutions):
1322             oracle = AutoplayOracle(solution)
1323             word_state = WordState(length, max_letter_pop_per_word)
1324             player.new_word(length, oracle, word_state)
1325             guesses = []
1326
1327             # Make guesses until the next guess is not in the hash or
1328             # the shared dict.
1329             while True:
1330                 remaining_words = player.get_all_possible_solutions()
1331                 num_remaining_words = len(remaining_words)
1332                 if num_remaining_words <= 1:
1333                     break
1334
1335                 sig_remaining_words = hashlib.md5(
1336                     remaining_words.__repr__().encode('ascii')
1337                 ).hexdigest()
1338                 key = (num_remaining_words, sig_remaining_words)
1339
1340                 if player.position_in_hash(num_remaining_words, sig_remaining_words):
1341                     provenance = 'game_hash'
1342                     guess = player.guess_word()
1343                 elif key in local_cache:
1344                     provenance = 'local_cache'
1345                     guess = local_cache[key]
1346                 elif key in shared_cache:
1347                     provenance = 'shared_cache'
1348                     guess = shared_cache[key]
1349                 else:
1350                     provenance = 'computed'
1351                     guess = player.brute_force_internal(
1352                         remaining_words,
1353                         player.all_possible_guesses_by_length[length],
1354                         'precompute',
1355                     )
1356                     local_cache[key] = guess
1357                     shared_cache[key] = guess
1358                     num_computed += 1
1359                 assert guess
1360                 guesses.append(guess)
1361                 hints = oracle.judge_guess(guess)
1362                 word_state.record_guess_and_hint(guess, hints)
1363                 print(
1364                     f'shard{shard_num}: '
1365                     + f'{passes}/{n}/{len(solutions)}/{solution}> '
1366                     + f'{num_remaining_words} @ {sig_remaining_words}: {guesses} # {provenance}'
1367                 )
1368
1369         # When we can make it through a pass over all solutions and
1370         # never miss the hash or shared dict, we're done.
1371         if num_computed == 0:
1372             print(
1373                 f'{ansi.bg("green")}{ansi.fg("black")}'
1374                 + f'shard{shard_num}: "{begin}".."{end}" is finished!'
1375                 + f'{ansi.reset()}'
1376             )
1377             break
1378     shared_cache.close()
1379     return f'(shard {shard_num} done)'
1380
1381
1382 def precompute():
1383     """Precompute the best guess in every situation via the expensive
1384     brute force / entropy method.  Break the solutions list into
1385     shards and execute each one in parallel.  Persist the concatenated
1386     results in a file.
1387
1388     """
1389     with open(config.config['solutions_file'], 'r') as rf:
1390         contents = rf.readlines()
1391     all_words = []
1392     length = None
1393     for word in contents:
1394         word = word[:-1]
1395         word = word.lower()
1396         if length is None:
1397             length = len(word)
1398         else:
1399             assert len(word) == length
1400         all_words.append(word)
1401
1402     max_letter_pop_per_word = get_max_letter_population()
1403     shards = []
1404     logger.debug('Sharding words into groups of 10.')
1405     for subset in list_utils.shard(all_words, 10):
1406         shards.append([x for x in subset])
1407
1408     logger.debug('Kicking off helper pool.')
1409
1410     # Shared cache is a dict that is slow to read/write but is visible
1411     # to all shards so that they can benefit from results already
1412     # computed in one of the other workers.  10 pages (~40kb) of
1413     # memory.
1414     shared_cache = SharedDict("wordle_shared_dict", 4096 * 10)
1415     results = []
1416     try:
1417         for n, shard in enumerate(shards):
1418             results.append(
1419                 do_words(
1420                     shard,
1421                     n,
1422                     shared_cache.get_name(),
1423                     SharedDict.LOCK,
1424                     max_letter_pop_per_word,
1425                 )
1426             )
1427         smart_future.wait_all(results)
1428
1429         with open(config.config['hash_file'], 'a') as wf:
1430             for key, value in shared_cache.items():
1431                 print(f'{key[0]} @ {key[1]}: {value}', file=wf)
1432     finally:
1433         shared_cache.close()
1434         shared_cache.cleanup()
1435         executors.DefaultExecutors().process_pool().shutdown()
1436
1437
1438 def get_legal_guesses() -> Set[Word]:
1439     legal_guesses = set()
1440     with open(config.config['guesses_file'], 'r') as rf:
1441         contents = rf.readlines()
1442     for line in contents:
1443         line = line[:-1]
1444         line = line.lower()
1445         legal_guesses.add(line)
1446     return legal_guesses
1447
1448
1449 def get_solution() -> Word:
1450     if config.config['template'] is not None:
1451         solution = config.config['template']
1452         print(ansi.clear())
1453     else:
1454         with open(config.config['solutions_file'], 'r') as rf:
1455             contents = rf.readlines()
1456         solution = random.choice(contents)
1457         solution = solution[:-1]
1458         solution = solution.lower()
1459         solution = solution.strip()
1460     return solution
1461
1462
1463 def give_hint(num_hints: int, player: AutoPlayer, word_state: WordState):
1464     """Give a smart(?) hint to the guesser."""
1465
1466     possible_solutions = player.get_all_possible_solutions()
1467     n = len(possible_solutions)
1468     if num_hints == 1:
1469         print(
1470             f'There {string_utils.is_are(n)} {n} possible solution{string_utils.pluralize(n)}.'
1471         )
1472     elif num_hints == 2:
1473         (freq, _) = player.get_frequency_and_frequency_by_position_tables(
1474             possible_solutions
1475         )
1476         hint_letters = sorted(freq.items(), key=lambda x: -x[1])
1477         good_hints = set(string.ascii_lowercase)
1478         for letter in word_state.letters_at_unknown_positions.keys():
1479             if len(word_state.letters_at_unknown_positions[letter]) > 0:
1480                 if letter in good_hints:
1481                     good_hints.remove(letter)
1482         for letter in word_state.letters_at_known_positions.values():
1483             if letter in good_hints:
1484                 good_hints.remove(letter)
1485         limit = 2
1486         if n < 10:
1487             limit = 1
1488         for letter, _ in hint_letters:
1489             if letter in good_hints:
1490                 print(
1491                     f'"{letter}" is popular in the possible solution{string_utils.pluralize(n)}.'
1492                 )
1493                 limit -= 1
1494                 if limit == 0:
1495                     break
1496     elif num_hints == 3:
1497         limit = 3
1498         if n < 10:
1499             limit = 1
1500         for possibility in random.sample(possible_solutions, min(limit, n)):
1501             print(f'Maybe something like "{possibility}"?')
1502     elif num_hints >= 4:
1503         best_guess = player.guess_word()
1504         print(f'Try "{best_guess}"')
1505
1506
1507 def play() -> None:
1508     """Allow users to play and color in the letter for them."""
1509
1510     legal_guesses = get_legal_guesses()
1511     solution = get_solution()
1512     oracle = AutoplayOracle(solution)
1513     max_letter_pop_per_word = get_max_letter_population()
1514     word_state = WordState(len(solution), max_letter_pop_per_word)
1515     player = AutoPlayer()
1516     player.new_word(len(solution), oracle, word_state)
1517
1518     num_guesses = 0
1519     prompt = 'Guess #_/6 (or "?" for a hint): '
1520     padding = ' ' * len(prompt)
1521     colorized_guess = "▢" * len(solution)
1522     guess = None
1523     num_hints = 0
1524
1525     while True:
1526         # Print current state + template.
1527         print(padding + colorized_guess + "     " + word_state.__repr__())
1528         if guess == solution:
1529             print('Nice!')
1530             break
1531         if num_guesses >= 6:
1532             print('Better luck next time.')
1533             print(padding + f'{solution}')
1534             break
1535
1536         # Get a guess / command.
1537         guess = input(prompt.replace('_', str(num_guesses + 1))).lower().strip()
1538
1539         # Parse it.
1540         if guess == '?':
1541             num_hints += 1
1542             give_hint(num_hints, player, word_state)
1543             continue
1544         elif guess == '#brute':
1545             remaining_words = player.get_all_possible_solutions()
1546             brute = player.brute_force_internal(
1547                 remaining_words,
1548                 player.all_possible_guesses_by_length[len(solution)],
1549                 'precompute',
1550             )
1551             print(word_state)
1552             print(brute)
1553             continue
1554         elif len(guess) != len(solution) or guess not in legal_guesses:
1555             print(f'"{guess}" is not a legal guess, try again.')
1556             continue
1557
1558         # If we get here it was a guess.  Process it.
1559         num_guesses += 1
1560         hints = oracle.judge_guess(guess)
1561         colorized_guess = colorize_guess(guess, hints)
1562         word_state.record_guess_and_hint(guess, hints)
1563         num_hints = 0
1564
1565
1566 # The bootstrap.initialize decorator takes care of parsing our commandline
1567 # flags and populating config.  It can also do cool things like time and
1568 # profile the code run within it, audit imports, profile memory usage,
1569 # and break into pdb on unhandled exception.
1570 @bootstrap.initialize
1571 def main() -> Optional[int]:
1572     mode = config.config['mode'].upper().strip()
1573     if mode == 'AUTOPLAY':
1574         solution = config.config['template']
1575         assert solution
1576         solution = solution.lower()
1577         oracle = AutoplayOracle(solution)
1578         max_letter_pop_per_word = get_max_letter_population()
1579         word_state = WordState(len(solution), max_letter_pop_per_word)
1580         player = AutoPlayer()
1581         autoplay(solution, oracle, word_state, player)
1582         return None
1583     elif mode == 'CHEAT':
1584         return cheat()
1585     elif mode == 'PLAY':
1586         play()
1587         return None
1588     elif mode == 'SELFTEST':
1589         selftest()
1590         return None
1591     elif mode == 'PRECOMPUTE':
1592         precompute()
1593         return None
1594     raise Exception('wtf?')
1595
1596
1597 if __name__ == '__main__':
1598     main()