2 # -*- coding: utf-8 -*-
5 Wordle! PLAY, AUTOPLAY, CHEAT, PRECOMPUTE and SELFTEST.
12 import multiprocessing
18 from collections import Counter, defaultdict
19 from typing import Any, Dict, List, NamedTuple, Optional, Set, Tuple
21 from pyutils import ansi, bootstrap, config, list_utils, string_utils
22 from pyutils.collectionz.shared_dict import SharedDict
23 from pyutils.files import file_utils
24 from pyutils.parallelize import executors
25 from pyutils.parallelize import parallelize as par
26 from pyutils.parallelize import smart_future
27 from pyutils.typez import histogram
29 logger = logging.getLogger(__name__)
30 args = config.add_commandline_args(
31 f'Wordle! ({__file__})',
32 'Args related to cheating at Wordle.',
38 choices=['CHEAT', 'AUTOPLAY', 'SELFTEST', 'PRECOMPUTE', 'PLAY'],
41 Our mode of operation:
43 PLAY = play wordle with me! Pick a random solution or
44 specify a solution with --template.
46 CHEAT = given a --template and, optionally, --letters_in_word
47 and/or --letters_to_avoid, return the best guess word;
49 AUTOPLAY = given a complete word in --template, guess it step
52 SELFTEST = autoplay every possible solution keeping track of
53 wins/losses and average number of guesses;
55 PRECOMPUTE = populate hash table with optimal guesses.
61 help='The current board in PLAY, CHEAT, AUTOPLAY mode. Use _\'s for unknown letters.',
66 help='In CHEAT mode, the set of letters known to not be in the solution.',
73 Letters known to be in the solution but whose positions are not yet known.
77 t0i23 => says we tried a 't' as the first letter (0) so we
78 know it's in the word and not there. We also know
79 there's an 'i' which is not the middle letter (2)
80 and is not the 4th letter (3). Note the zero-based
81 position counting semantics (i.e. the first letter
83 e34f0 => mean we know there's an 'e' and an 'f'; we've attempted
84 former in positions 3-4 (recall, 4 is the last spot in
85 a standard 5 letter wordle) and the latter in the
86 first spot (0) already.
88 metavar='<LETTER><ZERO-BASED_POSITION(S)_ALREADY_TRIED>...',
93 default='wordle_solutions.txt',
94 help='Where can I find a valid word list for solutions?',
99 default='wordle_guesses.txt',
100 help='Where can I find a valid word list for guesses?',
105 default='wordle_hash.txt',
106 help='Where can I find my precomputed hash file?',
109 # Type aliases for code readability
117 class Hint(enum.IntFlag):
118 """Green, yellow or gray?"""
121 YELLOW_LETTER_RIGHT_POSITION_WRONG = 1
122 GREEN_LETTER_IN_RIGHT_POSITION = 2
125 class WordStateUndoType(enum.IntFlag):
126 """Used to record guess undo information by type."""
128 LETTER_IN_SOLUTION_MIN_CHANGE = 1
129 LETTER_IN_SOLUTION_MAX_CHANGE = 2
130 LETTER_IN_SOLUTION_ADDED_WITH_MIN_MAX = 3
131 YELLOW_LETTER_ADDED = 4
132 GREEN_LETTER_ADDED = 5
133 GRAY_LETTER_ADDED = 6
136 class WordStateUndo(NamedTuple):
137 """A record per guess containing all the info needed to undo it."""
140 hints: Dict[Position, Hint]
141 mods: List[Dict[str, Any]]
144 class WordState(object):
145 """Keeps track of the current board state, previous guesses and hints,
146 and which letters are known/unknown/eliminated, etc...
152 solution_length: int,
153 max_letter_population_per_word: Dict[Letter, int],
155 letters_at_known_positions: Optional[Dict[Position, Letter]] = None,
156 letters_at_unknown_positions: Optional[Dict[Letter, Set[Position]]] = None,
157 letters_excluded: Optional[Set[Letter]] = None,
159 """Initialize the WordState given the length of the solution word
160 (number of letters), maximum number of times a given letter
161 may appear in the solution, and, optionally, some letter
162 restrictions. All positions below are zero-based.
164 letters_at_known_positions: position => letter map
165 letters_at_unknown_positions: letter => set(position(s) tried) map
166 letters_excluded: set of letters known to not be in the word
170 assert solution_length > 0
171 self.solution_length: int = solution_length
173 # All info necessary to undo a guess later. We'll add an entry
174 # for every guess in a stack.
175 self.undo_info: List[WordStateUndo] = []
177 # We're going to use internal methods to set up the initial
178 # state and they all require an undo record to populate... but
179 # there's no initial guess and we'll never undo beyond here.
180 # So create a fake undo record for them to scribble on and
181 # throw it away later.
182 fake_undo: WordStateUndo = WordStateUndo(
183 guess="fake, will be thrown away",
188 # Across all solutions, how many times did each letter appear
189 # per word, at max. For each letter we learn is in the word
190 # we'll track a min..max valid occurrances; this is the
191 # initial max. It's the max times a given letter appears in
192 # any word in the solution set.
193 assert max_letter_population_per_word
194 self.max_letter_population_per_word = max_letter_population_per_word
196 # The min..max population for every letter we know in the solution.
197 # The List[int] here has two entries: min and max.
198 self.letters_in_solution: Dict[Letter, List[int]] = {}
200 # Green letters by where they are.
201 self.letters_at_known_positions: Dict[Position, Letter] = {}
202 if letters_at_known_positions is not None:
203 for n, letter in letters_at_known_positions.items():
204 self.record_green(n, letter, fake_undo)
206 # Yellow letters to where we've already tried them (i.e. where
208 self.letters_at_unknown_positions: Dict[Letter, Set[Position]] = defaultdict(
211 if letters_at_unknown_positions is not None:
212 for letter, tried_pos in letters_at_unknown_positions.items():
214 self.record_yellow(n, letter, fake_undo)
216 # Excluded letters discovered so far.
217 self.letters_excluded: Set[Letter] = set()
218 if letters_excluded is not None:
219 for letter in letters_excluded:
220 self.record_gray(letter, fake_undo)
222 def get_template(self) -> str:
223 """Return a representation of the board, e.g. __a_e"""
225 template = '_' * self.solution_length
226 for n in self.letters_at_known_positions:
227 letter = self.letters_at_known_positions[n]
228 template = template[0:n] + letter + template[n + 1 :]
231 def get_alphabet(self) -> str:
232 """Return a colorized set of letters eliminated and still viable."""
234 letters_remaining = set(string.ascii_lowercase)
235 green_letters = set()
236 yellow_letters = set()
237 for letter in self.letters_excluded:
238 letters_remaining.remove(letter)
239 for letter in self.letters_at_known_positions.values():
240 assert letter in letters_remaining
241 green_letters.add(letter)
242 for letter in self.letters_at_unknown_positions.keys():
243 if len(self.letters_at_unknown_positions[letter]) > 0:
244 assert letter in letters_remaining
245 yellow_letters.add(letter)
247 for letter in string.ascii_lowercase:
248 if letter in letters_remaining:
249 if letter in green_letters:
250 ret += ansi.bg('forest green')
251 ret += ansi.fg('black')
254 elif letter in yellow_letters:
255 ret += ansi.bg('energy yellow')
256 ret += ansi.fg('black')
262 ret += ansi.fg('black olive')
268 def __repr__(self) -> str:
269 return self.get_alphabet()
271 def adjust_existing_letters_in_solution_maxes(
272 self, max_max: int, undo: WordStateUndo
274 """We've determined, based on info about letters we know to be in the
275 solution and their min..max counts, what the maximum max of
276 any letter should be. Check all letters we know to be in the
277 solution and maybe tighten their maxes. If we do, remember it
278 for later undo purposes.
281 for letter in self.letters_in_solution.keys():
282 old_max = self.letters_in_solution[letter][1]
283 new_max = min(max_max, old_max)
284 if old_max != new_max:
285 self.letters_in_solution[letter][1] = new_max
288 "type": WordStateUndoType.LETTER_IN_SOLUTION_MAX_CHANGE,
294 def record_letter_in_solution(
298 exact_count: Optional[int] = None,
300 """Letter is a (maybe new, may existing in a special case, see below)
301 letter that we know is in the solution. For each letter we
302 know is in the solution, we track a min..max window indicating
303 how many times that letter is permitted to occur in the
304 solution. Manage those here.
308 # If letter is already here, allow it to change bounds if it
309 # came with an exact_count.
310 if letter in self.letters_in_solution:
311 if exact_count is not None:
312 old_min = self.letters_in_solution[letter][0]
313 new_min = exact_count
314 if old_min != new_min:
315 self.letters_in_solution[letter][0] = new_min
318 "type": WordStateUndoType.LETTER_IN_SOLUTION_MIN_CHANGE,
323 old_max = self.letters_in_solution[letter][1]
324 new_max = exact_count
325 if old_max != new_max:
326 self.letters_in_solution[letter][1] = new_max
329 "type": WordStateUndoType.LETTER_IN_SOLUTION_MAX_CHANGE,
335 # Also... since we now know exactly how many of this
336 # letter there are, maybe tighten the rest's maxes.
338 num_letters_known = len(self.letters_in_solution) - 1 + exact_count
339 max_max = self.solution_length - (num_letters_known - 1)
340 self.adjust_existing_letters_in_solution_maxes(max_max, undo)
343 # If we're here, letter is a newly discovered letter in the
344 # solution. Such a letter will likely affect the max count of
345 # existing letters since, as new letters are discovered, there
346 # is less room for the other ones in the solution.
347 if exact_count is None:
348 num_letters_known = len(self.letters_in_solution) + 1
350 num_letters_known = len(self.letters_in_solution) + exact_count
351 max_max = self.solution_length - (num_letters_known - 1)
352 self.adjust_existing_letters_in_solution_maxes(max_max, undo)
354 # Finally, add the new letter's record with its initial min/max.
355 if exact_count is None:
358 self.max_letter_population_per_word[letter],
362 initial_max = exact_count
363 initial_min = exact_count
364 self.letters_in_solution[letter] = [initial_min, initial_max]
367 "type": WordStateUndoType.LETTER_IN_SOLUTION_ADDED_WITH_MIN_MAX,
379 num_correct_doppelgangers: Optional[int] = None,
381 """We've discovered a new yellow letter or a new place where an
382 existing yellow letter is still yellow. If
383 num_correct_doppelgangers is non-None, it means this letter is
384 wrong (gray) in the current location but right (green or
385 yellow) in other locations and we now know the precise count
386 of this letter in the solution -- tell the
387 record_letter_in_solution method.
390 if n not in self.letters_at_unknown_positions[letter]:
392 len(self.letters_at_unknown_positions[letter]) == 0
393 or num_correct_doppelgangers is not None
395 self.record_letter_in_solution(letter, undo, num_correct_doppelgangers)
396 self.letters_at_unknown_positions[letter].add(n)
399 "type": WordStateUndoType.YELLOW_LETTER_ADDED,
405 def record_green(self, n: Position, letter: Letter, undo: WordStateUndo):
406 """We found a new green letter."""
408 if n not in self.letters_at_known_positions:
409 self.letters_at_known_positions[n] = letter
412 "type": WordStateUndoType.GREEN_LETTER_ADDED,
417 self.record_letter_in_solution(letter, undo)
419 def record_gray(self, letter: Letter, undo: WordStateUndo) -> None:
420 """We found a new gray letter."""
422 if letter not in self.letters_excluded:
423 self.letters_excluded.add(letter)
426 "type": WordStateUndoType.GRAY_LETTER_ADDED,
431 def record_guess_and_hint(self, guess: Word, hints: Dict[Position, Hint]):
432 """Make a guess and change state based on the hint. Remember how to
433 undo everything later.
436 undo = WordStateUndo(guess=guess, hints=hints, mods=[])
437 for n, letter in enumerate(guess):
440 if hint is Hint.GRAY_WRONG:
441 # Exclude letters we got WRONG _unless_ we guessed the
442 # same letter elsewhere and got a yellow/green there.
443 # In this case the WRONG hint is not saying this
444 # letter isn't in the word (it is) but rather that
445 # there aren't N+1 instances of this letter. In this
446 # edge case we _do_ know that the letter is not valid
447 # in this position, though; treat it as yellow.
448 num_correct_doppelgangers = 0
449 for k, other in enumerate(guess):
450 if other == letter and n != k and hints[k] != Hint.GRAY_WRONG:
451 num_correct_doppelgangers += 1
452 if num_correct_doppelgangers == 0:
453 self.record_gray(letter, undo)
455 # Treat this as a yellow; we know letter can't go
456 # here or it would have been yellow/green.
457 self.record_yellow(n, letter, undo, num_correct_doppelgangers)
458 elif hint is Hint.YELLOW_LETTER_RIGHT_POSITION_WRONG:
459 self.record_yellow(n, letter, undo)
460 elif hint is Hint.GREEN_LETTER_IN_RIGHT_POSITION:
461 self.record_green(n, letter, undo)
462 self.undo_info.append(undo)
464 def undo_guess(self) -> None:
465 """Unmake a guess and revert back to a previous state."""
467 assert len(self.undo_info) > 0
468 undo = self.undo_info[-1]
469 self.undo_info = self.undo_info[:-1]
471 # We iterate the mods in reverse order so we never have weird
472 # apply/undo ordering artifacts.
473 for mod in reversed(undo.mods):
474 mod_type = mod['type']
475 letter = mod['letter']
476 if mod_type is WordStateUndoType.GRAY_LETTER_ADDED:
477 self.letters_excluded.remove(letter)
478 elif mod_type is WordStateUndoType.YELLOW_LETTER_ADDED:
479 pos = mod['position']
480 self.letters_at_unknown_positions[letter].remove(pos)
481 elif mod_type is WordStateUndoType.GREEN_LETTER_ADDED:
482 pos = mod['position']
483 del self.letters_at_known_positions[pos]
484 elif mod_type is WordStateUndoType.LETTER_IN_SOLUTION_MIN_CHANGE:
485 old_min = mod['old_min']
486 self.letters_in_solution[letter][0] = old_min
487 elif mod_type is WordStateUndoType.LETTER_IN_SOLUTION_MAX_CHANGE:
488 old_max = mod['old_max']
489 self.letters_in_solution[letter][1] = old_max
490 elif mod_type is WordStateUndoType.LETTER_IN_SOLUTION_ADDED_WITH_MIN_MAX:
491 del self.letters_in_solution[letter]
494 class AutoplayOracle(object):
495 """The class that knows the right solution and can give hints in
500 def __init__(self, solution: Word):
502 self.solution = solution.lower()
503 self.solution_letter_count = Counter(self.solution)
504 self.solution_letter_to_pos = defaultdict(set)
505 for n, letter in enumerate(self.solution):
506 self.solution_letter_to_pos[letter].add(n)
507 self.hint_quota_by_letter = Counter(self.solution)
509 def judge_guess(self, guess: Word) -> Dict[Position, Hint]:
510 """Returns a mapping from position -> Hint to indicate
511 whether each letter in the guess string is green, yellow
515 assert len(guess) == len(self.solution)
516 hint_by_pos: Dict[Position, Hint] = {}
517 letter_to_pos = defaultdict(set)
518 hint_quota_by_letter = self.hint_quota_by_letter.copy()
520 for position, letter in enumerate(guess):
521 letter_to_pos[letter].add(position)
523 if letter == self.solution[position]: # green
524 if hint_quota_by_letter[letter] == 0:
525 # We must tell them this letter is green in this
526 # position but we've exhausted our quota of hints
527 # for this letter. There must exist some yellow
528 # hint we previously gave that we need to turn to
530 for other in letter_to_pos[letter]:
531 if other != position:
534 == Hint.YELLOW_LETTER_RIGHT_POSITION_WRONG
536 hint_by_pos[other] = Hint.GRAY_WRONG
538 hint_by_pos[position] = Hint.GREEN_LETTER_IN_RIGHT_POSITION
540 elif self.solution_letter_count[letter] > 0: # yellow
541 if hint_quota_by_letter[letter] == 0:
542 # We're out of hints for this letter. All the
543 # other hints must be green or yellow. Tell them
544 # this one is gray (wrong).
545 hint_by_pos[position] = Hint.GRAY_WRONG
547 hint_by_pos[position] = Hint.YELLOW_LETTER_RIGHT_POSITION_WRONG
550 hint_by_pos[position] = Hint.GRAY_WRONG
551 if hint_quota_by_letter[letter] > 0:
552 hint_quota_by_letter[letter] -= 1
556 class AutoPlayer(object):
557 """This is the class that knows how to guess given the current game
558 state along with several support methods.
564 self.solution_length = None
569 # Board state tracker and move undoer
570 self.word_state = None
572 # The position hash has known best guesses for some subset of
573 # remaining words. See --mode=PRECOMPUTE for how to populate.
574 self.position_hash = {}
575 filename = config.config['hash_file']
576 if filename is not None and file_utils.file_is_readable(filename):
577 logger.debug(f'Initializing position hash from {filename}...')
578 with open(filename, 'r') as rf:
582 line = re.sub(r'#.*$', '', line)
585 (key, word) = line.split(':')
586 (count, fprint) = key.split('@')
587 count = count.strip()
589 fprint = fprint.strip()
591 self.position_hash[(count, fprint)] = word
592 logger.debug(f'...hash contains {len(self.position_hash)} entries.')
594 # All legal solutions pre-sorted by length.
595 self.all_possible_solutions_by_length = defaultdict(list)
596 filename = config.config['solutions_file']
597 if filename is not None and file_utils.file_is_readable(filename):
598 logger.debug(f'Initializing valid solution word list from {filename}...')
599 with open(filename) as rf:
603 self.all_possible_solutions_by_length[len(word)].append(word)
605 logger.error('A valid --solutions_file is required.')
608 # All legal guesses pre-sorted by length.
609 self.all_possible_guesses_by_length = defaultdict(list)
610 filename = config.config['guesses_file']
611 if filename is not None and file_utils.file_is_readable(filename):
612 logger.debug(f'Initializing legal guess word list from {filename}...')
613 with open(filename) as rf:
617 self.all_possible_guesses_by_length[len(word)].append(word)
619 logger.error('A valid --guesses_file is required.')
625 oracle: Optional[AutoplayOracle],
626 word_state: Optional[WordState],
628 """Here's a new word to guess. Reset state to play a new game. On
629 principle we don't give this class the solution. Instead, we
630 just give it length to indicate the number of characters in
631 the solution. Guesses will be turned into hints by the
632 oracle. The current game state is tracked by the
636 self.solution_length = length
638 self.word_state = word_state
640 def get_all_possible_solutions(self) -> List[Word]:
641 """Given the current known word state, compute the subset of all
642 possible solutions that are still possible. Note: this method
643 guarantees to return the possible solutions list in sorted
648 def is_possible_solution(solution: Word, word_state: WordState) -> bool:
649 """Note: very perf sensitive code; inner loop."""
651 letters_seen: Dict[Letter, int] = defaultdict(int)
652 for n, letter in enumerate(solution):
653 # The word can't contain letters we know aren't in the
655 if letter in word_state.letters_excluded:
658 # If we already tried this letter in this position and
659 # it wasn't green, this isn't a possible solution.
660 if n in word_state.letters_at_unknown_positions[letter]:
663 # If we know a letter is in a position, solution words
664 # must have that letter in that position.
666 n in word_state.letters_at_known_positions
667 and letter != word_state.letters_at_known_positions[n]
670 letters_seen[letter] += 1
672 # Finally, the word must include all letters presently
673 # known to be in the solution to be viable.
674 for letter, min_max in word_state.letters_in_solution.items():
675 num_seen = letters_seen[letter]
676 if num_seen < min_max[0]:
678 elif num_seen > min_max[1]:
682 possible_solutions = []
683 for word in self.all_possible_solutions_by_length[self.solution_length]:
684 if is_possible_solution(word, self.word_state):
685 possible_solutions.append(word)
687 # Note: because self.all_possible_solutions_by_length is sorted
688 # and we iterated it in order, possible_solutions is also sorted
690 # assert possible_solutions == sorted(possible_solutions)
691 return possible_solutions
693 def get_frequency_and_frequency_by_position_tables(
695 possible_solutions: List[Word],
696 ) -> Tuple[Dict[Letter, float], List[Dict[Letter, float]]]:
697 """This method is used by heuristic mode. It returns two tables:
699 1. The frequency count by letter for words in possible_solutions
700 to encourage guesses composed of common letters.
701 2. A letter-in-position bonus term to encourage guesses with
702 letters in positions that occur frequently in the solutions.
705 template = self.word_state.get_template()
706 pfreq: List[Dict[Letter, float]] = []
707 pop_letters: List[Dict[Letter, int]] = []
708 for n in range(len(template)):
709 pop_letters.append(defaultdict(int))
712 freq: Dict[Letter, float] = {}
713 for word in possible_solutions:
714 letter_filter = set()
715 for n, letter in enumerate(word):
717 # Only count frequency of letters still to be filled in.
718 if template[n] != '_':
721 # Do not give a full frequency bonus to repeated letters.
722 if letter not in letter_filter:
723 freq[letter] = freq.get(letter, 0) + 1
724 letter_filter.add(letter)
727 pop_letters[n][letter] += 1 # type: ignore
729 # Reward guesses that place the most popular letter in a position.
730 # Save the top 3 letters per position.
731 # don't give a bonus if letters spread all over the place?
732 # only give a bonus to a given letter in one position?
733 total_letters_in_position = len(possible_solutions)
734 for n in range(len(template)):
735 for letter, count in sorted(pop_letters[n].items(), key=lambda x: -x[1]):
738 normalized = count / total_letters_in_position
739 assert 0.0 < normalized <= 1.0
740 if normalized > 0.08:
741 pfreq[n][letter] = normalized
746 def dump_frequency_data(
748 possible_solutions: List[Word],
749 letter_frequency: Dict[Letter, float],
750 letter_position_frequency: List[Dict[Letter, float]],
752 """Just logs the frequency tables we computed above."""
754 logger.debug('Unknown letter(s) frequency table: ')
756 for letter, weight in sorted(letter_frequency.items(), key=lambda x: -x[1]):
757 out += f'{letter}:{weight}, '
762 logger.debug('Unknown letter-in-position bonus table: ')
764 for n in range(len(possible_solutions[0])):
765 pop = letter_position_frequency[n]
766 for letter, weight in sorted(pop.items(), key=lambda x: -x[1]):
767 out += f'pos{n}:{letter}@{weight:.5f}, '
772 def position_in_hash(self, num_potential_solutions: int, fprint: Fprint):
773 """Is a position in our hash table?"""
774 return (num_potential_solutions, fprint) in self.position_hash
776 def guess_word(self) -> Optional[Word]:
777 """Compute a guess word and return it. Returns None on error."""
779 template = self.word_state.get_template()
780 possible_solutions = self.get_all_possible_solutions()
781 num_possible_solutions = len(possible_solutions)
782 fprint = hashlib.md5(possible_solutions.__repr__().encode('ascii')).hexdigest()
784 n = num_possible_solutions
786 string_utils.make_contractions(
787 f'There {string_utils.is_are(n)} {n} word{string_utils.pluralize(n)} '
788 + f'({template} @ {fprint}).'
791 if num_possible_solutions < 30:
793 string_utils.make_contractions(
794 f'{string_utils.capitalize_first_letter(string_utils.it_they(n))} '
795 + f'{string_utils.is_are(n)}: {possible_solutions}'
799 f'Letter count restrictions: {self.word_state.letters_in_solution}'
801 if num_possible_solutions == 0:
802 logger.error('No possible solutions?!')
803 print('No possible solutions?!', file=sys.stderr)
804 print(self.word_state)
805 print(self.word_state.letters_in_solution)
808 elif num_possible_solutions == 1:
809 return possible_solutions[0]
811 # Check the hash table for a precomputed best guess.
812 elif self.position_in_hash(num_possible_solutions, fprint):
813 guess = self.position_hash[(num_possible_solutions, fprint)]
814 logger.debug(f'hash hit: {guess}')
817 # If there are just a few solutions possible, brute force the
818 # guess. This is expensive: for every possible solution it
819 # computes the entropy of every possible guess. Not fast for
820 # large numbers of solutions.
821 elif num_possible_solutions < 20:
823 f'Only {num_possible_solutions} solutions; using brute force strategy.'
825 return self.brute_force_internal(
827 self.all_possible_guesses_by_length[len(possible_solutions[0])],
831 # A hybrid approach: narrow down the guess list using
832 # heuristic scoring and then compute the entropy of the topN
833 # guesses via brute force.
834 elif num_possible_solutions < 100:
836 f'Only {num_possible_solutions} solutions; using hybrid strategy.'
838 return self.hybrid_search(possible_solutions)
840 # There are a lot of words left; score the guesses based on
841 # fast heuristics (i.e. letter frequency).
844 f'There are {num_possible_solutions} solutions; using fast heuristics.'
846 return self.heuristics_search(possible_solutions)
848 def brute_force_internal(
850 possible_solutions: List[Word],
851 all_guesses: List[Word],
854 """Assume every possible solution is the answer, in turn. For each
855 one, try all_guesses and pay attention to the hint we would
856 get back. Compute the entropy of each guess and return the
857 guess with the highest entropy -- i.e. the guess that gives us
858 the most information on average; or, the guess whose results
859 would take the most bits to represent.
861 Note, this is expensive. O(num_possible_solutions * all_guesses)
864 num_possible_solutions = len(possible_solutions)
865 if num_possible_solutions == 1:
866 return possible_solutions[0]
867 possible_solutions_set = set(possible_solutions) # for O(1) lookups
869 # Buckets count distinct outcomes of a guess. e.g. if a guess
870 # yields the hint G_Y__ that outcome is assigned a bucket
871 # number and we will increment the count of how many times
872 # that outcome happened for this guess across all possible
874 bucket_population_by_guess: Dict[Word, Dict[Bucket, int]] = {}
875 for guess in all_guesses:
876 bucket_population_by_guess[guess] = defaultdict(int)
878 # Pretend that each possible solution is the real solution
879 # and make every possible guess for each.
880 for solution in possible_solutions:
881 oracle = AutoplayOracle(solution)
882 for guess in all_guesses:
883 hints = oracle.judge_guess(guess)
885 # Note: this is a really good way to make sure that
886 # make/unmake moves works:
888 # before = self.word_state.__repr__()
889 self.word_state.record_guess_and_hint(guess, hints)
890 # during = self.word_state.__repr__()
892 # Map the hint returned into a bucket number and
893 # keep track of population per bucket. Below:
895 # n is a position := {0, 1, 2, 3, 4}
896 # hint[n] := {1, 2, 3}
899 for n in range(len(guess)):
900 bucket += hints[n] * (3**n)
901 bucket_population_by_guess[guess][bucket] += 1
902 self.word_state.undo_guess()
904 # after = self.word_state.__repr__()
905 # if before != after:
906 # print(f'BEFORE: {before}')
907 # print(f' WORD: {colorize_guess(guess, hints)}')
908 # print(f' HINT: {hints}')
909 # print(f'DURING: {during}')
910 # print(f' AFTER: {after}')
913 # Compute the entropy of every guess across all possible
916 # https://markmliu.medium.com/what-in-the-wordle-5dc5ed94fe2
917 # https://machinelearningmastery.com/what-is-information-entropy/
918 # https://en.wikipedia.org/wiki/Entropy_(information_theory)
921 entropy: Dict[Word, float] = {}
922 for guess in all_guesses:
924 for bucket in bucket_population_by_guess[guess]:
926 # We counted how many times this outcome occurred.
927 # The probabilty of this outcome = count / total.
928 p = float(bucket_population_by_guess[guess][bucket])
929 p /= num_possible_solutions
930 entropy[guess] += p * math.log2(p)
931 entropy[guess] = -entropy[guess]
933 if best_entropy is None:
934 best_entropy = entropy[guess]
938 # We always choose the guess with the highest entropy
939 # because this guess gives us the most information in
940 # the average case, i.e. it takes the most bits to
941 # represent the average outcome of this guess.
943 # However, in practice, usually several guesses tie
944 # for best. Prefer guesses that are also potential
945 # solutions (not just legal guesses)
946 if entropy[guess] > best_entropy or (
947 entropy[guess] == best_entropy and guess in possible_solutions_set
949 best_entropy = entropy[guess]
952 # This is just logging the results. Display the guesses with
953 # the highest entropy but also display the entropy of every
954 # possible solution, too, even if they are not best.
955 possible_solutions_seen = 0
958 for n, (guess, guess_entropy) in enumerate(
959 sorted(entropy.items(), key=lambda x: -x[1])
961 if best_entropy is None:
962 best_entropy = guess_entropy
963 if guess in possible_solutions_set:
964 possible_solutions_seen += 1
966 f'{label}: #{n}: {guess} with {guess_entropy:.5f} bits <--'
968 elif guess_entropy == best_entropy and best_count < 15:
969 logger.debug(f'{label}: #{n}: {guess} with {guess_entropy:.5f} bits')
971 logger.debug(f'{label}: best guess is {best_guess}.')
974 def hybrid_search(self, possible_solutions: List[Word]) -> Optional[Word]:
975 """Use heuristic scoring to reduce the number of guesses before
976 calling brute force search.
979 (freq, pfreq) = self.get_frequency_and_frequency_by_position_tables(
982 self.dump_frequency_data(possible_solutions, freq, pfreq)
983 scored_guesses = self.assign_scores(possible_solutions, freq, pfreq)
988 for guess, score in sorted(scored_guesses.items(), key=lambda x: -x[1]):
989 logger.debug(f'hybrid: heuristic #{count} = {guess} with {score:.5f}')
990 topn_guesses.append(guess)
995 best_guess = self.brute_force_internal(
996 possible_solutions, topn_guesses, 'hybrid: brute_force'
1002 possible_solutions: List[Word],
1003 freq: Dict[Letter, float],
1004 pfreq: List[Dict[Letter, float]],
1005 ) -> Dict[Word, float]:
1006 """Given some letter frequency and letter+position bonus data, assign
1007 a score to every possible guess.
1010 num_possible_solutions = len(possible_solutions)
1011 assert num_possible_solutions > 1
1012 template = self.word_state.get_template()
1014 # Give every word a score composed of letter frequencies and letter
1015 # in position frequencies. This score attempts to approximate the
1016 # result of brute_force_search less expensively.
1017 word_scores: Dict[Word, float] = {}
1018 for guess in possible_solutions:
1022 letter_filter = set()
1023 for n, letter in enumerate(guess):
1025 # Ignore letters we already know.
1026 if template[n] != '_':
1029 # Is there a bonus for this letter in this position? (pfreq)
1032 pfreq_term += pop[letter] * num_possible_solutions / 4
1034 # Don't count freq bonus more than once per letter. (freq)
1035 if letter in letter_filter:
1037 freq_term += freq.get(letter, 0.0)
1038 letter_filter.add(letter)
1039 score = freq_term + pfreq_term
1040 word_scores[guess] = score
1043 def heuristics_search(self, possible_solutions: List[Word]) -> Word:
1044 """The dumbest but fastest search. Use fast heuristics to score each
1045 possible guess and then guess the one with the highest
1049 (freq, pfreq) = self.get_frequency_and_frequency_by_position_tables(
1052 self.dump_frequency_data(possible_solutions, freq, pfreq)
1053 scored_guesses = self.assign_scores(possible_solutions, freq, pfreq)
1055 for n, (guess, score) in enumerate(
1056 sorted(scored_guesses.items(), key=lambda x: -x[1])
1058 if best_guess is None:
1061 logger.debug(f'heuristic: #{n}: {guess} with {score:.5f}')
1062 assert best_guess is not None
1066 def colorize_guess(guess: Word, hints: Dict[Position, Hint]) -> str:
1067 """Use hints to make the letters green/yellow/gray."""
1069 for n, letter in enumerate(guess):
1071 if hint is Hint.GRAY_WRONG:
1072 ret += ansi.bg('mist gray')
1073 elif hint is Hint.YELLOW_LETTER_RIGHT_POSITION_WRONG:
1074 ret += ansi.bg('energy yellow')
1075 elif hint is Hint.GREEN_LETTER_IN_RIGHT_POSITION:
1076 ret += ansi.bg('forest green')
1077 ret += ansi.fg('black')
1083 def get_max_letter_population() -> Dict[Letter, int]:
1084 filename = config.config['solutions_file']
1085 max_letter_population_per_word: Dict[Letter, int] = defaultdict(int)
1086 if filename is not None and file_utils.file_is_readable(filename):
1088 'Figuring out all letters\' max frequency in the solution space...'
1090 with open(filename) as rf:
1094 letter_pop = Counter(word)
1095 for letter, pop in letter_pop.items():
1096 if pop > max_letter_population_per_word[letter]:
1097 max_letter_population_per_word[letter] = pop
1099 logger.error('A valid --solutions_file is required.')
1101 return max_letter_population_per_word
1106 oracle: AutoplayOracle,
1107 word_state: WordState,
1109 quiet: bool = False,
1111 """Make guesses one by one until arriving at a known solution."""
1113 logger.debug('Autoplayer mode...')
1114 player.new_word(len(solution), oracle, word_state)
1118 guess = player.guess_word()
1119 if guess is not None:
1120 hints = oracle.judge_guess(guess)
1121 word_state.record_guess_and_hint(guess, hints)
1122 guesses.append(guess)
1124 colorized_guess = colorize_guess(guess, hints)
1125 print(f'Guess #{len(guesses)}: {colorized_guess} => {word_state}')
1126 if guess == solution:
1129 logger.error(f'"{solution}" is not in my --solutions_file.')
1135 """Cheater! Given a board state, determine the best guess. Note that
1136 in this mode the solution is not known.
1139 logger.debug('Cheater mode...')
1141 template = config.config['template']
1144 # Extract known letter positions from the template.
1145 template = template.lower()
1146 letters_at_known_positions = {}
1147 for n, letter in enumerate(template):
1149 letters_at_known_positions[n] = letter
1151 # Initialize set of letters to be avoided.
1152 avoid = config.config['letters_to_avoid']
1155 avoid = avoid.lower()
1156 letters_to_avoid = set([letter for letter in avoid])
1158 # Initialize the set of letters we know are in the solution but
1160 in_word = config.config['letters_in_word']
1163 in_word = in_word.lower()
1165 # This is parsing out the --letters_in_word argument. The
1168 # <letter><1 or more zero-based positions we already tried it>
1170 # So, if we know an E is in the word (i.e. it's yellow) and
1171 # we tried it in the first and third letter already:
1175 # Note: 0 means "the first letter", i.e. position is zero based.
1177 # You can stack letters this way. e.g.:
1180 letters_in_word_at_unknown_position = defaultdict(set)
1182 for letter in in_word:
1183 if letter.isdigit():
1185 letters_in_word_at_unknown_position[last_letter].add(int(letter))
1186 elif letter.isalpha():
1187 last_letter = letter
1189 max_letter_pop_per_word = get_max_letter_population()
1190 word_state = WordState(
1192 max_letter_pop_per_word,
1193 letters_at_known_positions=letters_at_known_positions,
1194 letters_at_unknown_positions=letters_in_word_at_unknown_position,
1195 letters_excluded=letters_to_avoid,
1198 player = AutoPlayer()
1199 player.new_word(len(template), None, word_state)
1200 return player.guess_word()
1204 """Autoplay every possible solution and pay attention to statistics."""
1206 logger.debug('Selftest mode...')
1210 max_guesses_words = []
1212 hist = histogram.SimpleHistogram(
1213 [(1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 100)]
1215 top_guess_number = defaultdict(dict)
1218 player = AutoPlayer()
1219 with open(config.config['solutions_file'], 'r') as rf:
1220 contents = rf.readlines()
1222 max_letter_pop_per_word = get_max_letter_population()
1224 for word in contents:
1229 f'Found word "{word}" in solutions file that is not 5 letters in length. Skipping it.'
1232 oracle = AutoplayOracle(word)
1233 word_state = WordState(len(word), max_letter_pop_per_word)
1234 player.new_word(len(word), oracle, word_state)
1237 runtime = time.time() - start
1239 f'{total_words} / {len(contents)} ("{word}") = {total_words/len(contents)*100.0:.2f}% | {total_guesses/total_words:.3f} guesses/word | {runtime:.1f}s @ {runtime/total_words:.3f}s/word\r',
1242 if total_words % 100 == 0:
1243 print(f'\nAfter {total_words} words:')
1245 f'...I made {total_guesses} guesses; ({total_guesses/total_words:.3f}/word)'
1248 f'...Max guesses was {max_guesses} for {max_guesses_words}; I lost {num_losses} times.'
1250 print(f'...I made {len(every_guess)} total distinct "interior" guesses.')
1253 guesses = autoplay(word, oracle, word_state, player, True)
1254 guess_count = len(guesses)
1257 hist.add_item(guess_count)
1258 total_guesses += guess_count
1259 for n, guess in enumerate(guesses):
1260 tops = top_guess_number[n]
1261 tops[guess] = tops.get(guess, 0) + 1
1262 if n != len(guesses) - 1:
1263 every_guess.add(guess)
1264 if max_guesses is None or guess_count > max_guesses:
1265 max_guesses = guess_count
1266 max_guesses_words = [word]
1267 elif guess_count == max_guesses:
1268 max_guesses_words.append(word)
1269 print("\nFinal Report:")
1270 print("-------------")
1271 print(f'On {total_words} words:')
1273 f'...I made {total_guesses} guesses; ({total_guesses / total_words:.3f} / word)'
1276 f'...Max guesses was {max_guesses} for {max_guesses_words}; I lost {num_losses} times.'
1278 print(f'...I made {len(every_guess)} total distinct "interior" guesses.')
1280 for n in range(0, 8):
1281 top_guesses = top_guess_number[n]
1284 print(f'Top guesses #{n+1}: ', end='')
1285 for guess, count in sorted(top_guesses.items(), key=lambda x: -x[1]):
1286 out += f'{guess}@{count}, '
1296 @par.parallelize(method=par.Method.PROCESS)
1298 solutions: List[Word],
1300 shared_cache_name: str,
1301 lock: multiprocessing.RLock,
1302 max_letter_pop_per_word: Dict[Letter, int],
1304 """Work on precomputing one shard of the solution space, in parallel."""
1306 logger.debug(f'Shard {shard_num} owns solutions {solutions[0]}..{solutions[-1]}.')
1307 player = AutoPlayer()
1308 length = len(solutions[0])
1309 shared_cache = SharedDict(shared_cache_name, 0, lock)
1310 begin = solutions[0]
1318 local_cache: Dict[Tuple[int, Fprint], Word] = {}
1320 for n, solution in enumerate(solutions):
1321 oracle = AutoplayOracle(solution)
1322 word_state = WordState(length, max_letter_pop_per_word)
1323 player.new_word(length, oracle, word_state)
1326 # Make guesses until the next guess is not in the hash or
1329 remaining_words = player.get_all_possible_solutions()
1330 num_remaining_words = len(remaining_words)
1331 if num_remaining_words <= 1:
1334 sig_remaining_words = hashlib.md5(
1335 remaining_words.__repr__().encode('ascii')
1337 key = (num_remaining_words, sig_remaining_words)
1339 if player.position_in_hash(num_remaining_words, sig_remaining_words):
1340 provenance = 'game_hash'
1341 guess = player.guess_word()
1342 elif key in local_cache:
1343 provenance = 'local_cache'
1344 guess = local_cache[key]
1345 elif key in shared_cache:
1346 provenance = 'shared_cache'
1347 guess = shared_cache[key]
1349 provenance = 'computed'
1350 guess = player.brute_force_internal(
1352 player.all_possible_guesses_by_length[length],
1355 local_cache[key] = guess
1356 shared_cache[key] = guess
1359 guesses.append(guess)
1360 hints = oracle.judge_guess(guess)
1361 word_state.record_guess_and_hint(guess, hints)
1363 f'shard{shard_num}: '
1364 + f'{passes}/{n}/{len(solutions)}/{solution}> '
1365 + f'{num_remaining_words} @ {sig_remaining_words}: {guesses} # {provenance}'
1368 # When we can make it through a pass over all solutions and
1369 # never miss the hash or shared dict, we're done.
1370 if num_computed == 0:
1372 f'{ansi.bg("green")}{ansi.fg("black")}'
1373 + f'shard{shard_num}: "{begin}".."{end}" is finished!'
1377 shared_cache.close()
1378 return f'(shard {shard_num} done)'
1382 """Precompute the best guess in every situation via the expensive
1383 brute force / entropy method. Break the solutions list into
1384 shards and execute each one in parallel. Persist the concatenated
1388 with open(config.config['solutions_file'], 'r') as rf:
1389 contents = rf.readlines()
1392 for word in contents:
1398 assert len(word) == length
1399 all_words.append(word)
1401 max_letter_pop_per_word = get_max_letter_population()
1403 logger.debug('Sharding words into groups of 10.')
1404 for subset in list_utils.shard(all_words, 10):
1405 shards.append([x for x in subset])
1407 logger.debug('Kicking off helper pool.')
1409 # Shared cache is a dict that is slow to read/write but is visible
1410 # to all shards so that they can benefit from results already
1411 # computed in one of the other workers. 10 pages (~40kb) of
1413 shared_cache = SharedDict("wordle_shared_dict", 4096 * 10)
1416 for n, shard in enumerate(shards):
1421 shared_cache.get_name(),
1423 max_letter_pop_per_word,
1426 smart_future.wait_all(results)
1428 with open(config.config['hash_file'], 'a') as wf:
1429 for key, value in shared_cache.items():
1430 print(f'{key[0]} @ {key[1]}: {value}', file=wf)
1432 shared_cache.close()
1433 shared_cache.cleanup()
1434 executors.DefaultExecutors().process_pool().shutdown()
1437 def get_legal_guesses() -> Set[Word]:
1438 legal_guesses = set()
1439 with open(config.config['guesses_file'], 'r') as rf:
1440 contents = rf.readlines()
1441 for line in contents:
1444 legal_guesses.add(line)
1445 return legal_guesses
1448 def get_solution() -> Word:
1449 if config.config['template'] is not None:
1450 solution = config.config['template']
1453 with open(config.config['solutions_file'], 'r') as rf:
1454 contents = rf.readlines()
1455 solution = random.choice(contents)
1456 solution = solution[:-1]
1457 solution = solution.lower()
1458 solution = solution.strip()
1462 def give_hint(num_hints: int, player: AutoPlayer, word_state: WordState):
1463 """Give a smart(?) hint to the guesser."""
1465 possible_solutions = player.get_all_possible_solutions()
1466 n = len(possible_solutions)
1469 f'There {string_utils.is_are(n)} {n} possible solution{string_utils.pluralize(n)}.'
1471 elif num_hints == 2:
1472 (freq, _) = player.get_frequency_and_frequency_by_position_tables(
1475 hint_letters = sorted(freq.items(), key=lambda x: -x[1])
1476 good_hints = set(string.ascii_lowercase)
1477 for letter in word_state.letters_at_unknown_positions.keys():
1478 if len(word_state.letters_at_unknown_positions[letter]) > 0:
1479 if letter in good_hints:
1480 good_hints.remove(letter)
1481 for letter in word_state.letters_at_known_positions.values():
1482 if letter in good_hints:
1483 good_hints.remove(letter)
1487 for letter, _ in hint_letters:
1488 if letter in good_hints:
1490 f'"{letter}" is popular in the possible solution{string_utils.pluralize(n)}.'
1495 elif num_hints == 3:
1499 for possibility in random.sample(possible_solutions, min(limit, n)):
1500 print(f'Maybe something like "{possibility}"?')
1501 elif num_hints >= 4:
1502 best_guess = player.guess_word()
1503 print(f'Try "{best_guess}"')
1507 """Allow users to play and color in the letter for them."""
1509 legal_guesses = get_legal_guesses()
1510 solution = get_solution()
1511 oracle = AutoplayOracle(solution)
1512 max_letter_pop_per_word = get_max_letter_population()
1513 word_state = WordState(len(solution), max_letter_pop_per_word)
1514 player = AutoPlayer()
1515 player.new_word(len(solution), oracle, word_state)
1518 prompt = 'Guess #_/6 (or "?" for a hint): '
1519 padding = ' ' * len(prompt)
1520 colorized_guess = "▢" * len(solution)
1525 # Print current state + template.
1526 print(padding + colorized_guess + " " + word_state.__repr__())
1527 if guess == solution:
1530 elif num_guesses >= 6:
1531 print('Better luck next time.')
1532 print(padding + f'{solution}')
1535 # Get a guess / command.
1536 guess = input(prompt.replace('_', str(num_guesses + 1))).lower().strip()
1541 give_hint(num_hints, player, word_state)
1543 elif guess == '#brute':
1544 remaining_words = player.get_all_possible_solutions()
1545 brute = player.brute_force_internal(
1547 player.all_possible_guesses_by_length[len(solution)],
1553 elif len(guess) != len(solution) or guess not in legal_guesses:
1554 print(f'"{guess}" is not a legal guess, try again.')
1557 # If we get here it was a guess. Process it.
1559 hints = oracle.judge_guess(guess)
1560 colorized_guess = colorize_guess(guess, hints)
1561 word_state.record_guess_and_hint(guess, hints)
1565 # The bootstrap.initialize decorator takes care of parsing our commandline
1566 # flags and populating config. It can also do cool things like time and
1567 # profile the code run within it, audit imports, profile memory usage,
1568 # and break into pdb on unhandled exception.
1569 @bootstrap.initialize
1570 def main() -> Optional[int]:
1571 mode = config.config['mode'].upper().strip()
1572 if mode == 'AUTOPLAY':
1573 solution = config.config['template']
1575 solution = solution.lower()
1576 oracle = AutoplayOracle(solution)
1577 max_letter_pop_per_word = get_max_letter_population()
1578 word_state = WordState(len(solution), max_letter_pop_per_word)
1579 player = AutoPlayer()
1580 autoplay(solution, oracle, word_state, player)
1582 elif mode == 'CHEAT':
1584 elif mode == 'PLAY':
1587 elif mode == 'SELFTEST':
1590 elif mode == 'PRECOMPUTE':
1593 raise Exception('wtf?')
1596 if __name__ == '__main__':