examples/wordle/wordle.py

   1 #!/usr/bin/env python3
   2 # -*- coding: utf-8 -*-
   3
   4 """
   5 Wordle!  PLAY, AUTOPLAY, CHEAT, PRECOMPUTE and SELFTEST.
   6 """
   7
   8 import enum
   9 import hashlib
  10 import logging
  11 import math
  12 import multiprocessing
  13 import random
  14 import re
  15 import string
  16 import sys
  17 import time
  18 from collections import Counter, defaultdict
  19 from typing import Any, Dict, List, NamedTuple, Optional, Set, Tuple
  20
  21 from pyutils import ansi, bootstrap, config, list_utils, string_utils
  22 from pyutils.collectionz.shared_dict import SharedDict
  23 from pyutils.files import file_utils
  24 from pyutils.parallelize import executors
  25 from pyutils.parallelize import parallelize as par
  26 from pyutils.parallelize import smart_future
  27 from pyutils.typez import histogram
  28
  29 logger = logging.getLogger(__name__)
  30 args = config.add_commandline_args(
  31     f'Wordle! ({__file__})',
  32     'Args related to cheating at Wordle.',
  33 )
  34 args.add_argument(
  35     '--mode',
  36     type=str,
  37     default='PLAY',
  38     choices=['CHEAT', 'AUTOPLAY', 'SELFTEST', 'PRECOMPUTE', 'PLAY'],
  39     metavar='MODE',
  40     help="""RAW|
  41 Our mode of operation:
  42
  43       PLAY = play wordle with me!  Pick a random solution or
  44              specify a solution with --template.
  45
  46      CHEAT = given a --template and, optionally, --letters_in_word
  47              and/or --letters_to_avoid, return the best guess word;
  48
  49   AUTOPLAY = given a complete word in --template, guess it step
  50              by step showing work;
  51
  52   SELFTEST = autoplay every possible solution keeping track of
  53              wins/losses and average number of guesses;
  54
  55 PRECOMPUTE = populate hash table with optimal guesses.
  56     """,
  57 )
  58 args.add_argument(
  59     '--template',
  60     type=str,
  61     help='The current board in PLAY, CHEAT, AUTOPLAY mode. Use _\'s for unknown letters.',
  62 )
  63 args.add_argument(
  64     '--letters_to_avoid',
  65     type=str,
  66     help='In CHEAT mode, the set of letters known to not be in the solution.',
  67     metavar='LETTERS',
  68 )
  69 args.add_argument(
  70     '--letters_in_word',
  71     type=str,
  72     help="""RAW|
  73 Letters known to be in the solution but whose positions are not yet known.
  74
  75 For example:
  76
  77   t0i23 => says we tried a 't' as the first letter (0) so we
  78            know it's in the word and not there.  We also know
  79            there's an 'i' which is not the middle letter (2)
  80            and is not the 4th letter (3).  Note the zero-based
  81            position counting semantics (i.e. the first letter
  82            is letter 0).
  83   e34f0 => mean we know there's an 'e' and an 'f'; we've attempted
  84            former in positions 3-4 (recall, 4 is the last spot in
  85            a standard 5 letter wordle) and the latter in the
  86            first spot (0) already.
  87     """,
  88     metavar='<LETTER><ZERO-BASED_POSITION(S)_ALREADY_TRIED>...',
  89 )
  90 args.add_argument(
  91     '--solutions_file',
  92     type=str,
  93     default='wordle_solutions.txt',
  94     help='Where can I find a valid word list for solutions?',
  95 )
  96 args.add_argument(
  97     '--guesses_file',
  98     type=str,
  99     default='wordle_guesses.txt',
 100     help='Where can I find a valid word list for guesses?',
 101 )
 102 args.add_argument(
 103     '--hash_file',
 104     type=str,
 105     default='wordle_hash.txt',
 106     help='Where can I find my precomputed hash file?',
 107 )
 108
 109 # Type aliases for code readability
 110 Position = int
 111 Letter = str
 112 Word = str
 113 Fprint = str
 114 Bucket = int
 115
 116
 117 class Hint(enum.IntFlag):
 118     """Green, yellow or gray?"""
 119
 120     GRAY_WRONG = 0
 121     YELLOW_LETTER_RIGHT_POSITION_WRONG = 1
 122     GREEN_LETTER_IN_RIGHT_POSITION = 2
 123
 124
 125 class WordStateUndoType(enum.IntFlag):
 126     """Used to record guess undo information by type."""
 127
 128     LETTER_IN_SOLUTION_MIN_CHANGE = 1
 129     LETTER_IN_SOLUTION_MAX_CHANGE = 2
 130     LETTER_IN_SOLUTION_ADDED_WITH_MIN_MAX = 3
 131     YELLOW_LETTER_ADDED = 4
 132     GREEN_LETTER_ADDED = 5
 133     GRAY_LETTER_ADDED = 6
 134
 135
 136 class WordStateUndo(NamedTuple):
 137     """A record per guess containing all the info needed to undo it."""
 138
 139     guess: Word
 140     hints: Dict[Position, Hint]
 141     mods: List[Dict[str, Any]]
 142
 143
 144 class WordState(object):
 145     """Keeps track of the current board state, previous guesses and hints,
 146     and which letters are known/unknown/eliminated, etc...
 147
 148     """
 149
 150     def __init__(
 151         self,
 152         solution_length: int,
 153         max_letter_population_per_word: Dict[Letter, int],
 154         *,
 155         letters_at_known_positions: Optional[Dict[Position, Letter]] = None,
 156         letters_at_unknown_positions: Optional[Dict[Letter, Set[Position]]] = None,
 157         letters_excluded: Optional[Set[Letter]] = None,
 158     ):
 159         """Initialize the WordState given the length of the solution word
 160         (number of letters), maximum number of times a given letter
 161         may appear in the solution, and, optionally, some letter
 162         restrictions.  All positions below are zero-based.
 163
 164         letters_at_known_positions: position => letter map
 165         letters_at_unknown_positions: letter => set(position(s) tried) map
 166         letters_excluded: set of letters known to not be in the word
 167
 168         """
 169         super().__init__()
 170         assert solution_length > 0
 171         self.solution_length: int = solution_length
 172
 173         # All info necessary to undo a guess later.  We'll add an entry
 174         # for every guess in a stack.
 175         self.undo_info: List[WordStateUndo] = []
 176
 177         # We're going to use internal methods to set up the initial
 178         # state and they all require an undo record to populate... but
 179         # there's no initial guess and we'll never undo beyond here.
 180         # So create a fake undo record for them to scribble on and
 181         # throw it away later.
 182         fake_undo: WordStateUndo = WordStateUndo(
 183             guess="fake, will be thrown away",
 184             hints={},
 185             mods=[],
 186         )
 187
 188         # Across all solutions, how many times did each letter appear
 189         # per word, at max.  For each letter we learn is in the word
 190         # we'll track a min..max valid occurrances; this is the
 191         # initial max.  It's the max times a given letter appears in
 192         # any word in the solution set.
 193         assert max_letter_population_per_word
 194         self.max_letter_population_per_word = max_letter_population_per_word
 195
 196         # The min..max population for every letter we know in the solution.
 197         # The List[int] here has two entries: min and max.
 198         self.letters_in_solution: Dict[Letter, List[int]] = {}
 199
 200         # Green letters by where they are.
 201         self.letters_at_known_positions: Dict[Position, Letter] = {}
 202         if letters_at_known_positions is not None:
 203             for n, letter in letters_at_known_positions.items():
 204                 self.record_green(n, letter, fake_undo)
 205
 206         # Yellow letters to where we've already tried them (i.e. where
 207         # the cannot be.)
 208         self.letters_at_unknown_positions: Dict[Letter, Set[Position]] = defaultdict(
 209             set
 210         )
 211         if letters_at_unknown_positions is not None:
 212             for letter, tried_pos in letters_at_unknown_positions.items():
 213                 for n in tried_pos:
 214                     self.record_yellow(n, letter, fake_undo)
 215
 216         # Excluded letters discovered so far.
 217         self.letters_excluded: Set[Letter] = set()
 218         if letters_excluded is not None:
 219             for letter in letters_excluded:
 220                 self.record_gray(letter, fake_undo)
 221
 222     def get_template(self) -> str:
 223         """Return a representation of the board, e.g. __a_e"""
 224
 225         template = '_' * self.solution_length
 226         for n in self.letters_at_known_positions:
 227             letter = self.letters_at_known_positions[n]
 228             template = template[0:n] + letter + template[n + 1 :]
 229         return template
 230
 231     def get_alphabet(self) -> str:
 232         """Return a colorized set of letters eliminated and still viable."""
 233
 234         letters_remaining = set(string.ascii_lowercase)
 235         green_letters = set()
 236         yellow_letters = set()
 237         for letter in self.letters_excluded:
 238             letters_remaining.remove(letter)
 239         for letter in self.letters_at_known_positions.values():
 240             assert letter in letters_remaining
 241             green_letters.add(letter)
 242         for letter in self.letters_at_unknown_positions.keys():
 243             if len(self.letters_at_unknown_positions[letter]) > 0:
 244                 assert letter in letters_remaining
 245                 yellow_letters.add(letter)
 246         ret = ''
 247         for letter in string.ascii_lowercase:
 248             if letter in letters_remaining:
 249                 if letter in green_letters:
 250                     ret += ansi.bg('forest green')
 251                     ret += ansi.fg('black')
 252                     ret += letter
 253                     ret += ansi.reset()
 254                 elif letter in yellow_letters:
 255                     ret += ansi.bg('energy yellow')
 256                     ret += ansi.fg('black')
 257                     ret += letter
 258                     ret += ansi.reset()
 259                 else:
 260                     ret += letter
 261             else:
 262                 ret += ansi.fg('black olive')
 263                 ret += letter
 264                 ret += ansi.reset()
 265                 # ret += ' '
 266         return ret
 267
 268     def __repr__(self) -> str:
 269         return self.get_alphabet()
 270
 271     def adjust_existing_letters_in_solution_maxes(
 272         self, max_max: int, undo: WordStateUndo
 273     ) -> None:
 274         """We've determined, based on info about letters we know to be in the
 275         solution and their min..max counts, what the maximum max of
 276         any letter should be.  Check all letters we know to be in the
 277         solution and maybe tighten their maxes.  If we do, remember it
 278         for later undo purposes.
 279
 280         """
 281         for letter in self.letters_in_solution.keys():
 282             old_max = self.letters_in_solution[letter][1]
 283             new_max = min(max_max, old_max)
 284             if old_max != new_max:
 285                 self.letters_in_solution[letter][1] = new_max
 286                 undo.mods.append(
 287                     {
 288                         "type": WordStateUndoType.LETTER_IN_SOLUTION_MAX_CHANGE,
 289                         "letter": letter,
 290                         "old_max": old_max,
 291                     }
 292                 )
 293
 294     def record_letter_in_solution(
 295         self,
 296         letter: Letter,
 297         undo: WordStateUndo,
 298         exact_count: Optional[int] = None,
 299     ) -> None:
 300         """Letter is a (maybe new, may existing in a special case, see below)
 301         letter that we know is in the solution.  For each letter we
 302         know is in the solution, we track a min..max window indicating
 303         how many times that letter is permitted to occur in the
 304         solution.  Manage those here.
 305
 306         """
 307
 308         # If letter is already here, allow it to change bounds if it
 309         # came with an exact_count.
 310         if letter in self.letters_in_solution:
 311             if exact_count is not None:
 312                 old_min = self.letters_in_solution[letter][0]
 313                 new_min = exact_count
 314                 if old_min != new_min:
 315                     self.letters_in_solution[letter][0] = new_min
 316                     undo.mods.append(
 317                         {
 318                             "type": WordStateUndoType.LETTER_IN_SOLUTION_MIN_CHANGE,
 319                             "letter": letter,
 320                             "old_min": old_min,
 321                         }
 322                     )
 323                 old_max = self.letters_in_solution[letter][1]
 324                 new_max = exact_count
 325                 if old_max != new_max:
 326                     self.letters_in_solution[letter][1] = new_max
 327                     undo.mods.append(
 328                         {
 329                             "type": WordStateUndoType.LETTER_IN_SOLUTION_MAX_CHANGE,
 330                             "letter": letter,
 331                             "old_max": old_max,
 332                         }
 333                     )
 334
 335                 # Also... since we now know exactly how many of this
 336                 # letter there are, maybe tighten the rest's maxes.
 337                 if exact_count != 1:
 338                     num_letters_known = len(self.letters_in_solution) - 1 + exact_count
 339                     max_max = self.solution_length - (num_letters_known - 1)
 340                     self.adjust_existing_letters_in_solution_maxes(max_max, undo)
 341             return
 342
 343         # If we're here, letter is a newly discovered letter in the
 344         # solution.  Such a letter will likely affect the max count of
 345         # existing letters since, as new letters are discovered, there
 346         # is less room for the other ones in the solution.
 347         if exact_count is None:
 348             num_letters_known = len(self.letters_in_solution) + 1
 349         else:
 350             num_letters_known = len(self.letters_in_solution) + exact_count
 351         max_max = self.solution_length - (num_letters_known - 1)
 352         self.adjust_existing_letters_in_solution_maxes(max_max, undo)
 353
 354         # Finally, add the new letter's record with its initial min/max.
 355         if exact_count is None:
 356             initial_max = min(
 357                 max_max,
 358                 self.max_letter_population_per_word[letter],
 359             )
 360             initial_min = 1
 361         else:
 362             initial_max = exact_count
 363             initial_min = exact_count
 364         self.letters_in_solution[letter] = [initial_min, initial_max]
 365         undo.mods.append(
 366             {
 367                 "type": WordStateUndoType.LETTER_IN_SOLUTION_ADDED_WITH_MIN_MAX,
 368                 "min": initial_min,
 369                 "max": initial_max,
 370                 "letter": letter,
 371             }
 372         )
 373
 374     def record_yellow(
 375         self,
 376         n: Position,
 377         letter: Letter,
 378         undo: WordStateUndo,
 379         num_correct_doppelgangers: Optional[int] = None,
 380     ) -> None:
 381         """We've discovered a new yellow letter or a new place where an
 382         existing yellow letter is still yellow.  If
 383         num_correct_doppelgangers is non-None, it means this letter is
 384         wrong (gray) in the current location but right (green or
 385         yellow) in other locations and we now know the precise count
 386         of this letter in the solution -- tell the
 387         record_letter_in_solution method.
 388
 389         """
 390         if n not in self.letters_at_unknown_positions[letter]:
 391             if (
 392                 len(self.letters_at_unknown_positions[letter]) == 0
 393                 or num_correct_doppelgangers is not None
 394             ):
 395                 self.record_letter_in_solution(letter, undo, num_correct_doppelgangers)
 396             self.letters_at_unknown_positions[letter].add(n)
 397             undo.mods.append(
 398                 {
 399                     "type": WordStateUndoType.YELLOW_LETTER_ADDED,
 400                     "letter": letter,
 401                     "position": n,
 402                 }
 403             )
 404
 405     def record_green(self, n: Position, letter: Letter, undo: WordStateUndo):
 406         """We found a new green letter."""
 407
 408         if n not in self.letters_at_known_positions:
 409             self.letters_at_known_positions[n] = letter
 410             undo.mods.append(
 411                 {
 412                     "type": WordStateUndoType.GREEN_LETTER_ADDED,
 413                     "letter": letter,
 414                     "position": n,
 415                 }
 416             )
 417             self.record_letter_in_solution(letter, undo)
 418
 419     def record_gray(self, letter: Letter, undo: WordStateUndo) -> None:
 420         """We found a new gray letter."""
 421
 422         if letter not in self.letters_excluded:
 423             self.letters_excluded.add(letter)
 424             undo.mods.append(
 425                 {
 426                     "type": WordStateUndoType.GRAY_LETTER_ADDED,
 427                     "letter": letter,
 428                 }
 429             )
 430
 431     def record_guess_and_hint(self, guess: Word, hints: Dict[Position, Hint]):
 432         """Make a guess and change state based on the hint.  Remember how to
 433         undo everything later.
 434
 435         """
 436         undo = WordStateUndo(guess=guess, hints=hints, mods=[])
 437         for n, letter in enumerate(guess):
 438             hint = hints[n]
 439
 440             if hint is Hint.GRAY_WRONG:
 441                 # Exclude letters we got WRONG _unless_ we guessed the
 442                 # same letter elsewhere and got a yellow/green there.
 443                 # In this case the WRONG hint is not saying this
 444                 # letter isn't in the word (it is) but rather that
 445                 # there aren't N+1 instances of this letter.  In this
 446                 # edge case we _do_ know that the letter is not valid
 447                 # in this position, though; treat it as yellow.
 448                 num_correct_doppelgangers = 0
 449                 for k, other in enumerate(guess):
 450                     if other == letter and n != k and hints[k] != Hint.GRAY_WRONG:
 451                         num_correct_doppelgangers += 1
 452                 if num_correct_doppelgangers == 0:
 453                     self.record_gray(letter, undo)
 454                 else:
 455                     # Treat this as a yellow; we know letter can't go
 456                     # here or it would have been yellow/green.
 457                     self.record_yellow(n, letter, undo, num_correct_doppelgangers)
 458             elif hint is Hint.YELLOW_LETTER_RIGHT_POSITION_WRONG:
 459                 self.record_yellow(n, letter, undo)
 460             elif hint is Hint.GREEN_LETTER_IN_RIGHT_POSITION:
 461                 self.record_green(n, letter, undo)
 462         self.undo_info.append(undo)
 463
 464     def undo_guess(self) -> None:
 465         """Unmake a guess and revert back to a previous state."""
 466
 467         assert len(self.undo_info) > 0
 468         undo = self.undo_info[-1]
 469         self.undo_info = self.undo_info[:-1]
 470
 471         # We iterate the mods in reverse order so we never have weird
 472         # apply/undo ordering artifacts.
 473         for mod in reversed(undo.mods):
 474             mod_type = mod['type']
 475             letter = mod['letter']
 476             if mod_type is WordStateUndoType.GRAY_LETTER_ADDED:
 477                 self.letters_excluded.remove(letter)
 478             elif mod_type is WordStateUndoType.YELLOW_LETTER_ADDED:
 479                 pos = mod['position']
 480                 self.letters_at_unknown_positions[letter].remove(pos)
 481             elif mod_type is WordStateUndoType.GREEN_LETTER_ADDED:
 482                 pos = mod['position']
 483                 del self.letters_at_known_positions[pos]
 484             elif mod_type is WordStateUndoType.LETTER_IN_SOLUTION_MIN_CHANGE:
 485                 old_min = mod['old_min']
 486                 self.letters_in_solution[letter][0] = old_min
 487             elif mod_type is WordStateUndoType.LETTER_IN_SOLUTION_MAX_CHANGE:
 488                 old_max = mod['old_max']
 489                 self.letters_in_solution[letter][1] = old_max
 490             elif mod_type is WordStateUndoType.LETTER_IN_SOLUTION_ADDED_WITH_MIN_MAX:
 491                 del self.letters_in_solution[letter]
 492
 493
 494 class AutoplayOracle(object):
 495     """The class that knows the right solution and can give hints in
 496     response to guesses.
 497
 498     """
 499
 500     def __init__(self, solution: Word):
 501         super().__init__()
 502         self.solution = solution.lower()
 503         self.solution_letter_count = Counter(self.solution)
 504         self.solution_letter_to_pos = defaultdict(set)
 505         for n, letter in enumerate(self.solution):
 506             self.solution_letter_to_pos[letter].add(n)
 507         self.hint_quota_by_letter = Counter(self.solution)
 508
 509     def judge_guess(self, guess: Word) -> Dict[Position, Hint]:
 510         """Returns a mapping from position -> Hint to indicate
 511         whether each letter in the guess string is green, yellow
 512         or gray.
 513
 514         """
 515         assert len(guess) == len(self.solution)
 516         hint_by_pos: Dict[Position, Hint] = {}
 517         letter_to_pos = defaultdict(set)
 518         hint_quota_by_letter = self.hint_quota_by_letter.copy()
 519
 520         for position, letter in enumerate(guess):
 521             letter_to_pos[letter].add(position)
 522
 523             if letter == self.solution[position]:  # green
 524                 if hint_quota_by_letter[letter] == 0:
 525                     # We must tell them this letter is green in this
 526                     # position but we've exhausted our quota of hints
 527                     # for this letter.  There must exist some yellow
 528                     # hint we previously gave that we need to turn to
 529                     # gray (wrong).
 530                     for other in letter_to_pos[letter]:
 531                         if other != position:
 532                             if (
 533                                 hint_by_pos[other]
 534                                 == Hint.YELLOW_LETTER_RIGHT_POSITION_WRONG
 535                             ):
 536                                 hint_by_pos[other] = Hint.GRAY_WRONG
 537                                 break
 538                 hint_by_pos[position] = Hint.GREEN_LETTER_IN_RIGHT_POSITION
 539
 540             elif self.solution_letter_count[letter] > 0:  # yellow
 541                 if hint_quota_by_letter[letter] == 0:
 542                     # We're out of hints for this letter.  All the
 543                     # other hints must be green or yellow.  Tell them
 544                     # this one is gray (wrong).
 545                     hint_by_pos[position] = Hint.GRAY_WRONG
 546                 else:
 547                     hint_by_pos[position] = Hint.YELLOW_LETTER_RIGHT_POSITION_WRONG
 548
 549             else:  # gray
 550                 hint_by_pos[position] = Hint.GRAY_WRONG
 551             if hint_quota_by_letter[letter] > 0:
 552                 hint_quota_by_letter[letter] -= 1
 553         return hint_by_pos
 554
 555
 556 class AutoPlayer(object):
 557     """This is the class that knows how to guess given the current game
 558     state along with several support methods.
 559
 560     """
 561
 562     def __init__(self):
 563         super().__init__()
 564         self.solution_length = None
 565
 566         # Guess judge
 567         self.oracle = None
 568
 569         # Board state tracker and move undoer
 570         self.word_state = None
 571
 572         # The position hash has known best guesses for some subset of
 573         # remaining words.  See --mode=PRECOMPUTE for how to populate.
 574         self.position_hash = {}
 575         filename = config.config['hash_file']
 576         if filename is not None and file_utils.file_is_readable(filename):
 577             logger.debug(f'Initializing position hash from {filename}...')
 578             with open(filename, 'r') as rf:
 579                 for line in rf:
 580                     line = line[:-1]
 581                     line = line.strip()
 582                     line = re.sub(r'#.*$', '', line)
 583                     if len(line) == 0:
 584                         continue
 585                     (key, word) = line.split(':')
 586                     (count, fprint) = key.split('@')
 587                     count = count.strip()
 588                     count = int(count)
 589                     fprint = fprint.strip()
 590                     word = word.strip()
 591                     self.position_hash[(count, fprint)] = word
 592             logger.debug(f'...hash contains {len(self.position_hash)} entries.')
 593
 594         # All legal solutions pre-sorted by length.
 595         self.all_possible_solutions_by_length = defaultdict(list)
 596         filename = config.config['solutions_file']
 597         if filename is not None and file_utils.file_is_readable(filename):
 598             logger.debug(f'Initializing valid solution word list from {filename}...')
 599             with open(filename) as rf:
 600                 for word in rf:
 601                     word = word[:-1]
 602                     word = word.lower()
 603                     self.all_possible_solutions_by_length[len(word)].append(word)
 604         else:
 605             logger.error('A valid --solutions_file is required.')
 606             sys.exit(0)
 607
 608         # All legal guesses pre-sorted by length.
 609         self.all_possible_guesses_by_length = defaultdict(list)
 610         filename = config.config['guesses_file']
 611         if filename is not None and file_utils.file_is_readable(filename):
 612             logger.debug(f'Initializing legal guess word list from {filename}...')
 613             with open(filename) as rf:
 614                 for word in rf:
 615                     word = word[:-1]
 616                     word = word.lower()
 617                     self.all_possible_guesses_by_length[len(word)].append(word)
 618         else:
 619             logger.error('A valid --guesses_file is required.')
 620             sys.exit(0)
 621
 622     def new_word(
 623         self,
 624         length: int,
 625         oracle: Optional[AutoplayOracle],
 626         word_state: Optional[WordState],
 627     ) -> None:
 628         """Here's a new word to guess.  Reset state to play a new game.  On
 629         principle we don't give this class the solution.  Instead, we
 630         just give it length to indicate the number of characters in
 631         the solution.  Guesses will be turned into hints by the
 632         oracle.  The current game state is tracked by the
 633         word_state.
 634
 635         """
 636         self.solution_length = length
 637         self.oracle = oracle
 638         self.word_state = word_state
 639
 640     def get_all_possible_solutions(self) -> List[Word]:
 641         """Given the current known word state, compute the subset of all
 642         possible solutions that are still possible.  Note: this method
 643         guarantees to return the possible solutions list in sorted
 644         order.
 645
 646         """
 647
 648         def is_possible_solution(solution: Word, word_state: WordState) -> bool:
 649             """Note: very perf sensitive code; inner loop."""
 650
 651             letters_seen: Dict[Letter, int] = defaultdict(int)
 652             for n, letter in enumerate(solution):
 653                 # The word can't contain letters we know aren't in the
 654                 # solution.
 655                 if letter in word_state.letters_excluded:
 656                     return False
 657
 658                 # If we already tried this letter in this position and
 659                 # it wasn't green, this isn't a possible solution.
 660                 if n in word_state.letters_at_unknown_positions[letter]:
 661                     return False
 662
 663                 # If we know a letter is in a position, solution words
 664                 # must have that letter in that position.
 665                 if (
 666                     n in word_state.letters_at_known_positions
 667                     and letter != word_state.letters_at_known_positions[n]
 668                 ):
 669                     return False
 670                 letters_seen[letter] += 1
 671
 672             # Finally, the word must include all letters presently
 673             # known to be in the solution to be viable.
 674             for letter, min_max in word_state.letters_in_solution.items():
 675                 num_seen = letters_seen[letter]
 676                 if num_seen < min_max[0]:
 677                     return False
 678                 elif num_seen > min_max[1]:
 679                     return False
 680             return True
 681
 682         possible_solutions = []
 683         for word in self.all_possible_solutions_by_length[self.solution_length]:
 684             if is_possible_solution(word, self.word_state):
 685                 possible_solutions.append(word)
 686
 687         # Note: because self.all_possible_solutions_by_length is sorted
 688         # and we iterated it in order, possible_solutions is also sorted
 689         # already.
 690         # assert possible_solutions == sorted(possible_solutions)
 691         return possible_solutions
 692
 693     def get_frequency_and_frequency_by_position_tables(
 694         self,
 695         possible_solutions: List[Word],
 696     ) -> Tuple[Dict[Letter, float], List[Dict[Letter, float]]]:
 697         """This method is used by heuristic mode.  It returns two tables:
 698
 699         1. The frequency count by letter for words in possible_solutions
 700            to encourage guesses composed of common letters.
 701         2. A letter-in-position bonus term to encourage guesses with
 702            letters in positions that occur frequently in the solutions.
 703
 704         """
 705         template = self.word_state.get_template()
 706         pfreq: List[Dict[Letter, float]] = []
 707         pop_letters: List[Dict[Letter, int]] = []
 708         for n in range(len(template)):
 709             pop_letters.append(defaultdict(int))
 710             pfreq.append({})
 711
 712         freq: Dict[Letter, float] = {}
 713         for word in possible_solutions:
 714             letter_filter = set()
 715             for n, letter in enumerate(word):
 716
 717                 # Only count frequency of letters still to be filled in.
 718                 if template[n] != '_':
 719                     continue
 720
 721                 # Do not give a full frequency bonus to repeated letters.
 722                 if letter not in letter_filter:
 723                     freq[letter] = freq.get(letter, 0) + 1
 724                     letter_filter.add(letter)
 725                 else:
 726                     freq[letter] += 0.1
 727                 pop_letters[n][letter] += 1  # type: ignore
 728
 729         # Reward guesses that place the most popular letter in a position.
 730         # Save the top 3 letters per position.
 731         # don't give a bonus if letters spread all over the place?
 732         # only give a bonus to a given letter in one position?
 733         total_letters_in_position = len(possible_solutions)
 734         for n in range(len(template)):
 735             for letter, count in sorted(pop_letters[n].items(), key=lambda x: -x[1]):
 736                 if count <= 1:
 737                     break
 738                 normalized = count / total_letters_in_position
 739                 assert 0.0 < normalized <= 1.0
 740                 if normalized > 0.08:
 741                     pfreq[n][letter] = normalized
 742                 else:
 743                     break
 744         return (freq, pfreq)
 745
 746     def dump_frequency_data(
 747         self,
 748         possible_solutions: List[Word],
 749         letter_frequency: Dict[Letter, float],
 750         letter_position_frequency: List[Dict[Letter, float]],
 751     ) -> None:
 752         """Just logs the frequency tables we computed above."""
 753
 754         logger.debug('Unknown letter(s) frequency table: ')
 755         out = ''
 756         for letter, weight in sorted(letter_frequency.items(), key=lambda x: -x[1]):
 757             out += f'{letter}:{weight}, '
 758         if len(out):
 759             out = out[:-2]
 760             logger.debug(out)
 761
 762         logger.debug('Unknown letter-in-position bonus table: ')
 763         out = ''
 764         for n in range(len(possible_solutions[0])):
 765             pop = letter_position_frequency[n]
 766             for letter, weight in sorted(pop.items(), key=lambda x: -x[1]):
 767                 out += f'pos{n}:{letter}@{weight:.5f}, '
 768         if len(out):
 769             out = out[:-2]
 770             logger.debug(out)
 771
 772     def position_in_hash(self, num_potential_solutions: int, fprint: Fprint):
 773         """Is a position in our hash table?"""
 774         return (num_potential_solutions, fprint) in self.position_hash
 775
 776     def guess_word(self) -> Optional[Word]:
 777         """Compute a guess word and return it.  Returns None on error."""
 778
 779         template = self.word_state.get_template()
 780         possible_solutions = self.get_all_possible_solutions()
 781         num_possible_solutions = len(possible_solutions)
 782         fprint = hashlib.md5(possible_solutions.__repr__().encode('ascii')).hexdigest()
 783
 784         n = num_possible_solutions
 785         logger.debug(
 786             string_utils.make_contractions(
 787                 f'There {string_utils.is_are(n)} {n} word{string_utils.pluralize(n)} '
 788                 + f'({template} @ {fprint}).'
 789             )
 790         )
 791         if num_possible_solutions < 30:
 792             logger.debug(
 793                 string_utils.make_contractions(
 794                     f'{string_utils.capitalize_first_letter(string_utils.it_they(n))} '
 795                     + f'{string_utils.is_are(n)}: {possible_solutions}'
 796                 )
 797             )
 798         logger.debug(
 799             f'Letter count restrictions: {self.word_state.letters_in_solution}'
 800         )
 801         if num_possible_solutions == 0:
 802             logger.error('No possible solutions?!')
 803             print('No possible solutions?!', file=sys.stderr)
 804             print(self.word_state)
 805             print(self.word_state.letters_in_solution)
 806             return None
 807
 808         elif num_possible_solutions == 1:
 809             return possible_solutions[0]
 810
 811         # Check the hash table for a precomputed best guess.
 812         elif self.position_in_hash(num_possible_solutions, fprint):
 813             guess = self.position_hash[(num_possible_solutions, fprint)]
 814             logger.debug(f'hash hit: {guess}')
 815             return guess
 816
 817         # If there are just a few solutions possible, brute force the
 818         # guess.  This is expensive: for every possible solution it
 819         # computes the entropy of every possible guess.  Not fast for
 820         # large numbers of solutions.
 821         elif num_possible_solutions < 20:
 822             logger.debug(
 823                 f'Only {num_possible_solutions} solutions; using brute force strategy.'
 824             )
 825             return self.brute_force_internal(
 826                 possible_solutions,
 827                 self.all_possible_guesses_by_length[len(possible_solutions[0])],
 828                 'brute_force',
 829             )
 830
 831         # A hybrid approach: narrow down the guess list using
 832         # heuristic scoring and then compute the entropy of the topN
 833         # guesses via brute force.
 834         elif num_possible_solutions < 100:
 835             logger.debug(
 836                 f'Only {num_possible_solutions} solutions; using hybrid strategy.'
 837             )
 838             return self.hybrid_search(possible_solutions)
 839
 840         # There are a lot of words left; score the guesses based on
 841         # fast heuristics (i.e. letter frequency).
 842         else:
 843             logger.debug(
 844                 f'There are {num_possible_solutions} solutions; using fast heuristics.'
 845             )
 846             return self.heuristics_search(possible_solutions)
 847
 848     def brute_force_internal(
 849         self,
 850         possible_solutions: List[Word],
 851         all_guesses: List[Word],
 852         label: str,
 853     ) -> Optional[Word]:
 854         """Assume every possible solution is the answer, in turn.  For each
 855         one, try all_guesses and pay attention to the hint we would
 856         get back.  Compute the entropy of each guess and return the
 857         guess with the highest entropy -- i.e. the guess that gives us
 858         the most information on average; or, the guess whose results
 859         would take the most bits to represent.
 860
 861         Note, this is expensive.  O(num_possible_solutions * all_guesses)
 862
 863         """
 864         num_possible_solutions = len(possible_solutions)
 865         if num_possible_solutions == 1:
 866             return possible_solutions[0]
 867         possible_solutions_set = set(possible_solutions)  # for O(1) lookups
 868
 869         # Buckets count distinct outcomes of a guess.  e.g. if a guess
 870         # yields the hint G_Y__ that outcome is assigned a bucket
 871         # number and we will increment the count of how many times
 872         # that outcome happened for this guess across all possible
 873         # solutions.
 874         bucket_population_by_guess: Dict[Word, Dict[Bucket, int]] = {}
 875         for guess in all_guesses:
 876             bucket_population_by_guess[guess] = defaultdict(int)
 877
 878         # Pretend that each possible solution is the real solution
 879         # and make every possible guess for each.
 880         for solution in possible_solutions:
 881             oracle = AutoplayOracle(solution)
 882             for guess in all_guesses:
 883                 hints = oracle.judge_guess(guess)
 884
 885                 # Note: this is a really good way to make sure that
 886                 # make/unmake moves works:
 887                 #
 888                 # before = self.word_state.__repr__()
 889                 self.word_state.record_guess_and_hint(guess, hints)
 890                 # during = self.word_state.__repr__()
 891
 892                 # Map the hint returned into a bucket number and
 893                 # keep track of population per bucket.  Below:
 894                 #
 895                 #   n is a position := {0, 1, 2, 3, 4}
 896                 #   hint[n] := {1, 2, 3}
 897                 #
 898                 bucket: Bucket = 0
 899                 for n in range(len(guess)):
 900                     bucket += hints[n] * (3**n)
 901                 bucket_population_by_guess[guess][bucket] += 1
 902                 self.word_state.undo_guess()
 903
 904                 # after = self.word_state.__repr__()
 905                 # if before != after:
 906                 #    print(f'BEFORE: {before}')
 907                 #    print(f'  WORD: {colorize_guess(guess, hints)}')
 908                 #    print(f'  HINT: {hints}')
 909                 #    print(f'DURING: {during}')
 910                 #    print(f' AFTER: {after}')
 911                 #    assert False
 912
 913         # Compute the entropy of every guess across all possible
 914         # solutions:
 915         #
 916         # https://markmliu.medium.com/what-in-the-wordle-5dc5ed94fe2
 917         # https://machinelearningmastery.com/what-is-information-entropy/
 918         # https://en.wikipedia.org/wiki/Entropy_(information_theory)
 919         best_entropy = None
 920         best_guess = None
 921         entropy: Dict[Word, float] = {}
 922         for guess in all_guesses:
 923             entropy[guess] = 0.0
 924             for bucket in bucket_population_by_guess[guess]:
 925
 926                 # We counted how many times this outcome occurred.
 927                 # The probabilty of this outcome = count / total.
 928                 p = float(bucket_population_by_guess[guess][bucket])
 929                 p /= num_possible_solutions
 930                 entropy[guess] += p * math.log2(p)
 931             entropy[guess] = -entropy[guess]
 932
 933             if best_entropy is None:
 934                 best_entropy = entropy[guess]
 935                 best_guess = guess
 936             else:
 937
 938                 # We always choose the guess with the highest entropy
 939                 # because this guess gives us the most information in
 940                 # the average case, i.e. it takes the most bits to
 941                 # represent the average outcome of this guess.
 942                 #
 943                 # However, in practice, usually several guesses tie
 944                 # for best.  Prefer guesses that are also potential
 945                 # solutions (not just legal guesses)
 946                 if entropy[guess] > best_entropy or (
 947                     entropy[guess] == best_entropy and guess in possible_solutions_set
 948                 ):
 949                     best_entropy = entropy[guess]
 950                     best_guess = guess
 951
 952         # This is just logging the results.  Display the guesses with
 953         # the highest entropy but also display the entropy of every
 954         # possible solution, too, even if they are not best.
 955         possible_solutions_seen = 0
 956         best_entropy = None
 957         best_count = 0
 958         for n, (guess, guess_entropy) in enumerate(
 959             sorted(entropy.items(), key=lambda x: -x[1])
 960         ):
 961             if best_entropy is None:
 962                 best_entropy = guess_entropy
 963             if guess in possible_solutions_set:
 964                 possible_solutions_seen += 1
 965                 logger.debug(
 966                     f'{label}: #{n}: {guess} with {guess_entropy:.5f} bits <--'
 967                 )
 968             elif guess_entropy == best_entropy and best_count < 15:
 969                 logger.debug(f'{label}: #{n}: {guess} with {guess_entropy:.5f} bits')
 970                 best_count += 1
 971         logger.debug(f'{label}: best guess is {best_guess}.')
 972         return best_guess
 973
 974     def hybrid_search(self, possible_solutions: List[Word]) -> Optional[Word]:
 975         """Use heuristic scoring to reduce the number of guesses before
 976         calling brute force search.
 977
 978         """
 979         (freq, pfreq) = self.get_frequency_and_frequency_by_position_tables(
 980             possible_solutions
 981         )
 982         self.dump_frequency_data(possible_solutions, freq, pfreq)
 983         scored_guesses = self.assign_scores(possible_solutions, freq, pfreq)
 984         best_guess = None
 985
 986         topn_guesses = []
 987         count = 1
 988         for guess, score in sorted(scored_guesses.items(), key=lambda x: -x[1]):
 989             logger.debug(f'hybrid: heuristic #{count} = {guess} with {score:.5f}')
 990             topn_guesses.append(guess)
 991             count += 1
 992             if count > 10:
 993                 break
 994
 995         best_guess = self.brute_force_internal(
 996             possible_solutions, topn_guesses, 'hybrid: brute_force'
 997         )
 998         return best_guess
 999
1000     def assign_scores(
1001         self,
1002         possible_solutions: List[Word],
1003         freq: Dict[Letter, float],
1004         pfreq: List[Dict[Letter, float]],
1005     ) -> Dict[Word, float]:
1006         """Given some letter frequency and letter+position bonus data, assign
1007         a score to every possible guess.
1008
1009         """
1010         num_possible_solutions = len(possible_solutions)
1011         assert num_possible_solutions > 1
1012         template = self.word_state.get_template()
1013
1014         # Give every word a score composed of letter frequencies and letter
1015         # in position frequencies.  This score attempts to approximate the
1016         # result of brute_force_search less expensively.
1017         word_scores: Dict[Word, float] = {}
1018         for guess in possible_solutions:
1019             freq_term = 0.0
1020             pfreq_term = 0.0
1021
1022             letter_filter = set()
1023             for n, letter in enumerate(guess):
1024
1025                 # Ignore letters we already know.
1026                 if template[n] != '_':
1027                     continue
1028
1029                 # Is there a bonus for this letter in this position?  (pfreq)
1030                 pop = pfreq[n]
1031                 if letter in pop:
1032                     pfreq_term += pop[letter] * num_possible_solutions / 4
1033
1034                 # Don't count freq bonus more than once per letter.  (freq)
1035                 if letter in letter_filter:
1036                     continue
1037                 freq_term += freq.get(letter, 0.0)
1038                 letter_filter.add(letter)
1039             score = freq_term + pfreq_term
1040             word_scores[guess] = score
1041         return word_scores
1042
1043     def heuristics_search(self, possible_solutions: List[Word]) -> Word:
1044         """The dumbest but fastest search.  Use fast heuristics to score each
1045         possible guess and then guess the one with the highest
1046         score.
1047
1048         """
1049         (freq, pfreq) = self.get_frequency_and_frequency_by_position_tables(
1050             possible_solutions
1051         )
1052         self.dump_frequency_data(possible_solutions, freq, pfreq)
1053         scored_guesses = self.assign_scores(possible_solutions, freq, pfreq)
1054         best_guess = None
1055         for n, (guess, score) in enumerate(
1056             sorted(scored_guesses.items(), key=lambda x: -x[1])
1057         ):
1058             if best_guess is None:
1059                 best_guess = guess
1060             if n < 20:
1061                 logger.debug(f'heuristic: #{n}: {guess} with {score:.5f}')
1062         assert best_guess is not None
1063         return best_guess
1064
1065
1066 def colorize_guess(guess: Word, hints: Dict[Position, Hint]) -> str:
1067     """Use hints to make the letters green/yellow/gray."""
1068     ret = ''
1069     for n, letter in enumerate(guess):
1070         hint = hints[n]
1071         if hint is Hint.GRAY_WRONG:
1072             ret += ansi.bg('mist gray')
1073         elif hint is Hint.YELLOW_LETTER_RIGHT_POSITION_WRONG:
1074             ret += ansi.bg('energy yellow')
1075         elif hint is Hint.GREEN_LETTER_IN_RIGHT_POSITION:
1076             ret += ansi.bg('forest green')
1077         ret += ansi.fg('black')
1078         ret += letter
1079         ret += ansi.reset()
1080     return ret
1081
1082
1083 def get_max_letter_population() -> Dict[Letter, int]:
1084     filename = config.config['solutions_file']
1085     max_letter_population_per_word: Dict[Letter, int] = defaultdict(int)
1086     if filename is not None and file_utils.file_is_readable(filename):
1087         logger.debug(
1088             'Figuring out all letters\' max frequency in the solution space...'
1089         )
1090         with open(filename) as rf:
1091             for word in rf:
1092                 word = word[:-1]
1093                 word = word.lower()
1094                 letter_pop = Counter(word)
1095                 for letter, pop in letter_pop.items():
1096                     if pop > max_letter_population_per_word[letter]:
1097                         max_letter_population_per_word[letter] = pop
1098     else:
1099         logger.error('A valid --solutions_file is required.')
1100         sys.exit(0)
1101     return max_letter_population_per_word
1102
1103
1104 def autoplay(
1105     solution: Word,
1106     oracle: AutoplayOracle,
1107     word_state: WordState,
1108     player: AutoPlayer,
1109     quiet: bool = False,
1110 ):
1111     """Make guesses one by one until arriving at a known solution."""
1112     if not quiet:
1113         logger.debug('Autoplayer mode...')
1114     player.new_word(len(solution), oracle, word_state)
1115
1116     guesses = []
1117     while True:
1118         guess = player.guess_word()
1119         if guess is not None:
1120             hints = oracle.judge_guess(guess)
1121             word_state.record_guess_and_hint(guess, hints)
1122             guesses.append(guess)
1123             if not quiet:
1124                 colorized_guess = colorize_guess(guess, hints)
1125                 print(f'Guess #{len(guesses)}: {colorized_guess} => {word_state}')
1126         if guess == solution:
1127             break
1128         if guess is None:
1129             logger.error(f'"{solution}" is not in my --solutions_file.')
1130             break
1131     return guesses
1132
1133
1134 def cheat():
1135     """Cheater!  Given a board state, determine the best guess.  Note that
1136     in this mode the solution is not known.
1137
1138     """
1139     logger.debug('Cheater mode...')
1140
1141     template = config.config['template']
1142     assert template
1143
1144     # Extract known letter positions from the template.
1145     template = template.lower()
1146     letters_at_known_positions = {}
1147     for n, letter in enumerate(template):
1148         if letter != '_':
1149             letters_at_known_positions[n] = letter
1150
1151     # Initialize set of letters to be avoided.
1152     avoid = config.config['letters_to_avoid']
1153     if not avoid:
1154         avoid = ''
1155     avoid = avoid.lower()
1156     letters_to_avoid = set([letter for letter in avoid])
1157
1158     # Initialize the set of letters we know are in the solution but
1159     # not where, yet.
1160     in_word = config.config['letters_in_word']
1161     if not in_word:
1162         in_word = ''
1163     in_word = in_word.lower()
1164
1165     # This is parsing out the --letters_in_word argument.  The
1166     # format is:
1167     #
1168     #   <letter><1 or more zero-based positions we already tried it>
1169     #
1170     # So, if we know an E is in the word (i.e. it's yellow) and
1171     # we tried it in the first and third letter already:
1172     #
1173     #   e02
1174     #
1175     # Note: 0 means "the first letter", i.e. position is zero based.
1176     #
1177     # You can stack letters this way.  e.g.:
1178     #
1179     #   e02a3
1180     letters_in_word_at_unknown_position = defaultdict(set)
1181     last_letter = None
1182     for letter in in_word:
1183         if letter.isdigit():
1184             assert last_letter
1185             letters_in_word_at_unknown_position[last_letter].add(int(letter))
1186         elif letter.isalpha():
1187             last_letter = letter
1188
1189     max_letter_pop_per_word = get_max_letter_population()
1190     word_state = WordState(
1191         len(template),
1192         max_letter_pop_per_word,
1193         letters_at_known_positions=letters_at_known_positions,
1194         letters_at_unknown_positions=letters_in_word_at_unknown_position,
1195         letters_excluded=letters_to_avoid,
1196     )
1197
1198     player = AutoPlayer()
1199     player.new_word(len(template), None, word_state)
1200     return player.guess_word()
1201
1202
1203 def selftest():
1204     """Autoplay every possible solution and pay attention to statistics."""
1205
1206     logger.debug('Selftest mode...')
1207     total_guesses = 0
1208     total_words = 0
1209     max_guesses = None
1210     max_guesses_words = []
1211     every_guess = set()
1212     hist = histogram.SimpleHistogram(
1213         [(1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 100)]
1214     )
1215     top_guess_number = defaultdict(dict)
1216     num_losses = 0
1217
1218     player = AutoPlayer()
1219     with open(config.config['solutions_file'], 'r') as rf:
1220         contents = rf.readlines()
1221
1222     max_letter_pop_per_word = get_max_letter_population()
1223     start = time.time()
1224     for word in contents:
1225         word = word[:-1]
1226         word = word.lower()
1227         if len(word) != 5:
1228             logger.warning(
1229                 f'Found word "{word}" in solutions file that is not 5 letters in length.  Skipping it.'
1230             )
1231             continue
1232         oracle = AutoplayOracle(word)
1233         word_state = WordState(len(word), max_letter_pop_per_word)
1234         player.new_word(len(word), oracle, word_state)
1235
1236         total_words += 1
1237         runtime = time.time() - start
1238         print(
1239             f'{total_words} / {len(contents)} ("{word}") = {total_words/len(contents)*100.0:.2f}% | {total_guesses/total_words:.3f} guesses/word | {runtime:.1f}s @ {runtime/total_words:.3f}s/word\r',
1240             end='',
1241         )
1242         if total_words % 100 == 0:
1243             print(f'\nAfter {total_words} words:')
1244             print(
1245                 f'...I made {total_guesses} guesses; ({total_guesses/total_words:.3f}/word)'
1246             )
1247             print(
1248                 f'...Max guesses was {max_guesses} for {max_guesses_words}; I lost {num_losses} times.'
1249             )
1250             print(f'...I made {len(every_guess)} total distinct "interior" guesses.')
1251             print()
1252
1253         guesses = autoplay(word, oracle, word_state, player, True)
1254         guess_count = len(guesses)
1255         if guess_count > 6:
1256             num_losses += 1
1257         hist.add_item(guess_count)
1258         total_guesses += guess_count
1259         for n, guess in enumerate(guesses):
1260             tops = top_guess_number[n]
1261             tops[guess] = tops.get(guess, 0) + 1
1262             if n != len(guesses) - 1:
1263                 every_guess.add(guess)
1264         if max_guesses is None or guess_count > max_guesses:
1265             max_guesses = guess_count
1266             max_guesses_words = [word]
1267         elif guess_count == max_guesses:
1268             max_guesses_words.append(word)
1269     print("\nFinal Report:")
1270     print("-------------")
1271     print(f'On {total_words} words:')
1272     print(
1273         f'...I made {total_guesses} guesses; ({total_guesses / total_words:.3f} / word)'
1274     )
1275     print(
1276         f'...Max guesses was {max_guesses} for {max_guesses_words}; I lost {num_losses} times.'
1277     )
1278     print(f'...I made {len(every_guess)} total distinct "interior" guesses.')
1279     print()
1280     for n in range(0, 8):
1281         top_guesses = top_guess_number[n]
1282         num = 0
1283         out = ''
1284         print(f'Top guesses #{n+1}: ', end='')
1285         for guess, count in sorted(top_guesses.items(), key=lambda x: -x[1]):
1286             out += f'{guess}@{count}, '
1287             num += 1
1288             if num > 8:
1289                 break
1290         out = out[:-2]
1291         print(out)
1292     print()
1293     print(hist)
1294
1295
1296 @par.parallelize(method=par.Method.PROCESS)
1297 def do_words(
1298     solutions: List[Word],
1299     shard_num: int,
1300     shared_cache_name: str,
1301     lock: multiprocessing.RLock,
1302     max_letter_pop_per_word: Dict[Letter, int],
1303 ):
1304     """Work on precomputing one shard of the solution space, in parallel."""
1305
1306     logger.debug(f'Shard {shard_num} owns solutions {solutions[0]}..{solutions[-1]}.')
1307     player = AutoPlayer()
1308     length = len(solutions[0])
1309     shared_cache = SharedDict(shared_cache_name, 0, lock)
1310     begin = solutions[0]
1311     end = solutions[-1]
1312
1313     passes = 0
1314     while True:
1315         num_computed = 0
1316         passes += 1
1317         assert passes < 10
1318         local_cache: Dict[Tuple[int, Fprint], Word] = {}
1319
1320         for n, solution in enumerate(solutions):
1321             oracle = AutoplayOracle(solution)
1322             word_state = WordState(length, max_letter_pop_per_word)
1323             player.new_word(length, oracle, word_state)
1324             guesses = []
1325
1326             # Make guesses until the next guess is not in the hash or
1327             # the shared dict.
1328             while True:
1329                 remaining_words = player.get_all_possible_solutions()
1330                 num_remaining_words = len(remaining_words)
1331                 if num_remaining_words <= 1:
1332                     break
1333
1334                 sig_remaining_words = hashlib.md5(
1335                     remaining_words.__repr__().encode('ascii')
1336                 ).hexdigest()
1337                 key = (num_remaining_words, sig_remaining_words)
1338
1339                 if player.position_in_hash(num_remaining_words, sig_remaining_words):
1340                     provenance = 'game_hash'
1341                     guess = player.guess_word()
1342                 elif key in local_cache:
1343                     provenance = 'local_cache'
1344                     guess = local_cache[key]
1345                 elif key in shared_cache:
1346                     provenance = 'shared_cache'
1347                     guess = shared_cache[key]
1348                 else:
1349                     provenance = 'computed'
1350                     guess = player.brute_force_internal(
1351                         remaining_words,
1352                         player.all_possible_guesses_by_length[length],
1353                         'precompute',
1354                     )
1355                     local_cache[key] = guess
1356                     shared_cache[key] = guess
1357                     num_computed += 1
1358                 assert guess
1359                 guesses.append(guess)
1360                 hints = oracle.judge_guess(guess)
1361                 word_state.record_guess_and_hint(guess, hints)
1362                 print(
1363                     f'shard{shard_num}: '
1364                     + f'{passes}/{n}/{len(solutions)}/{solution}> '
1365                     + f'{num_remaining_words} @ {sig_remaining_words}: {guesses} # {provenance}'
1366                 )
1367
1368         # When we can make it through a pass over all solutions and
1369         # never miss the hash or shared dict, we're done.
1370         if num_computed == 0:
1371             print(
1372                 f'{ansi.bg("green")}{ansi.fg("black")}'
1373                 + f'shard{shard_num}: "{begin}".."{end}" is finished!'
1374                 + f'{ansi.reset()}'
1375             )
1376             break
1377     shared_cache.close()
1378     return f'(shard {shard_num} done)'
1379
1380
1381 def precompute():
1382     """Precompute the best guess in every situation via the expensive
1383     brute force / entropy method.  Break the solutions list into
1384     shards and execute each one in parallel.  Persist the concatenated
1385     results in a file.
1386
1387     """
1388     with open(config.config['solutions_file'], 'r') as rf:
1389         contents = rf.readlines()
1390     all_words = []
1391     length = None
1392     for word in contents:
1393         word = word[:-1]
1394         word = word.lower()
1395         if length is None:
1396             length = len(word)
1397         else:
1398             assert len(word) == length
1399         all_words.append(word)
1400
1401     max_letter_pop_per_word = get_max_letter_population()
1402     shards = []
1403     logger.debug('Sharding words into groups of 10.')
1404     for subset in list_utils.shard(all_words, 10):
1405         shards.append([x for x in subset])
1406
1407     logger.debug('Kicking off helper pool.')
1408
1409     # Shared cache is a dict that is slow to read/write but is visible
1410     # to all shards so that they can benefit from results already
1411     # computed in one of the other workers.  10 pages (~40kb) of
1412     # memory.
1413     shared_cache = SharedDict("wordle_shared_dict", 4096 * 10)
1414     results = []
1415     try:
1416         for n, shard in enumerate(shards):
1417             results.append(
1418                 do_words(
1419                     shard,
1420                     n,
1421                     shared_cache.get_name(),
1422                     SharedDict.LOCK,
1423                     max_letter_pop_per_word,
1424                 )
1425             )
1426         smart_future.wait_all(results)
1427
1428         with open(config.config['hash_file'], 'a') as wf:
1429             for key, value in shared_cache.items():
1430                 print(f'{key[0]} @ {key[1]}: {value}', file=wf)
1431     finally:
1432         shared_cache.close()
1433         shared_cache.cleanup()
1434         executors.DefaultExecutors().process_pool().shutdown()
1435
1436
1437 def get_legal_guesses() -> Set[Word]:
1438     legal_guesses = set()
1439     with open(config.config['guesses_file'], 'r') as rf:
1440         contents = rf.readlines()
1441     for line in contents:
1442         line = line[:-1]
1443         line = line.lower()
1444         legal_guesses.add(line)
1445     return legal_guesses
1446
1447
1448 def get_solution() -> Word:
1449     if config.config['template'] is not None:
1450         solution = config.config['template']
1451         print(ansi.clear())
1452     else:
1453         with open(config.config['solutions_file'], 'r') as rf:
1454             contents = rf.readlines()
1455         solution = random.choice(contents)
1456         solution = solution[:-1]
1457         solution = solution.lower()
1458         solution = solution.strip()
1459     return solution
1460
1461
1462 def give_hint(num_hints: int, player: AutoPlayer, word_state: WordState):
1463     """Give a smart(?) hint to the guesser."""
1464
1465     possible_solutions = player.get_all_possible_solutions()
1466     n = len(possible_solutions)
1467     if num_hints == 1:
1468         print(
1469             f'There {string_utils.is_are(n)} {n} possible solution{string_utils.pluralize(n)}.'
1470         )
1471     elif num_hints == 2:
1472         (freq, _) = player.get_frequency_and_frequency_by_position_tables(
1473             possible_solutions
1474         )
1475         hint_letters = sorted(freq.items(), key=lambda x: -x[1])
1476         good_hints = set(string.ascii_lowercase)
1477         for letter in word_state.letters_at_unknown_positions.keys():
1478             if len(word_state.letters_at_unknown_positions[letter]) > 0:
1479                 if letter in good_hints:
1480                     good_hints.remove(letter)
1481         for letter in word_state.letters_at_known_positions.values():
1482             if letter in good_hints:
1483                 good_hints.remove(letter)
1484         limit = 2
1485         if n < 10:
1486             limit = 1
1487         for letter, _ in hint_letters:
1488             if letter in good_hints:
1489                 print(
1490                     f'"{letter}" is popular in the possible solution{string_utils.pluralize(n)}.'
1491                 )
1492                 limit -= 1
1493                 if limit == 0:
1494                     break
1495     elif num_hints == 3:
1496         limit = 3
1497         if n < 10:
1498             limit = 1
1499         for possibility in random.sample(possible_solutions, min(limit, n)):
1500             print(f'Maybe something like "{possibility}"?')
1501     elif num_hints >= 4:
1502         best_guess = player.guess_word()
1503         print(f'Try "{best_guess}"')
1504
1505
1506 def play() -> None:
1507     """Allow users to play and color in the letter for them."""
1508
1509     legal_guesses = get_legal_guesses()
1510     solution = get_solution()
1511     oracle = AutoplayOracle(solution)
1512     max_letter_pop_per_word = get_max_letter_population()
1513     word_state = WordState(len(solution), max_letter_pop_per_word)
1514     player = AutoPlayer()
1515     player.new_word(len(solution), oracle, word_state)
1516
1517     num_guesses = 0
1518     prompt = 'Guess #_/6 (or "?" for a hint): '
1519     padding = ' ' * len(prompt)
1520     colorized_guess = "▢" * len(solution)
1521     guess = None
1522     num_hints = 0
1523
1524     while True:
1525         # Print current state + template.
1526         print(padding + colorized_guess + "     " + word_state.__repr__())
1527         if guess == solution:
1528             print('Nice!')
1529             break
1530         elif num_guesses >= 6:
1531             print('Better luck next time.')
1532             print(padding + f'{solution}')
1533             break
1534
1535         # Get a guess / command.
1536         guess = input(prompt.replace('_', str(num_guesses + 1))).lower().strip()
1537
1538         # Parse it.
1539         if guess == '?':
1540             num_hints += 1
1541             give_hint(num_hints, player, word_state)
1542             continue
1543         elif guess == '#brute':
1544             remaining_words = player.get_all_possible_solutions()
1545             brute = player.brute_force_internal(
1546                 remaining_words,
1547                 player.all_possible_guesses_by_length[len(solution)],
1548                 'precompute',
1549             )
1550             print(word_state)
1551             print(brute)
1552             continue
1553         elif len(guess) != len(solution) or guess not in legal_guesses:
1554             print(f'"{guess}" is not a legal guess, try again.')
1555             continue
1556
1557         # If we get here it was a guess.  Process it.
1558         num_guesses += 1
1559         hints = oracle.judge_guess(guess)
1560         colorized_guess = colorize_guess(guess, hints)
1561         word_state.record_guess_and_hint(guess, hints)
1562         num_hints = 0
1563
1564
1565 # The bootstrap.initialize decorator takes care of parsing our commandline
1566 # flags and populating config.  It can also do cool things like time and
1567 # profile the code run within it, audit imports, profile memory usage,
1568 # and break into pdb on unhandled exception.
1569 @bootstrap.initialize
1570 def main() -> Optional[int]:
1571     mode = config.config['mode'].upper().strip()
1572     if mode == 'AUTOPLAY':
1573         solution = config.config['template']
1574         assert solution
1575         solution = solution.lower()
1576         oracle = AutoplayOracle(solution)
1577         max_letter_pop_per_word = get_max_letter_population()
1578         word_state = WordState(len(solution), max_letter_pop_per_word)
1579         player = AutoPlayer()
1580         autoplay(solution, oracle, word_state, player)
1581         return None
1582     elif mode == 'CHEAT':
1583         return cheat()
1584     elif mode == 'PLAY':
1585         play()
1586         return None
1587     elif mode == 'SELFTEST':
1588         selftest()
1589         return None
1590     elif mode == 'PRECOMPUTE':
1591         precompute()
1592         return None
1593     raise Exception('wtf?')
1594
1595
1596 if __name__ == '__main__':
1597     main()