2 # -*- coding: utf-8 -*-
5 Wordle! PLAY, AUTOPLAY, CHEAT, PRECOMPUTE and SELFTEST.
12 import multiprocessing
18 from collections import Counter, defaultdict
19 from typing import Any, Dict, List, NamedTuple, Optional, Set, Tuple
21 from pyutils import ansi, bootstrap, config, list_utils, string_utils
22 from pyutils.collectionz.shared_dict import SharedDict
23 from pyutils.files import file_utils
24 from pyutils.parallelize import executors
25 from pyutils.parallelize import parallelize as par
26 from pyutils.parallelize import smart_future
27 from pyutils.typez import histogram
29 logger = logging.getLogger(__name__)
30 args = config.add_commandline_args(
31 f'Wordle! ({__file__})',
32 'Args related to cheating at Wordle.',
38 choices=['CHEAT', 'AUTOPLAY', 'SELFTEST', 'PRECOMPUTE', 'PLAY'],
41 Our mode of operation:
43 PLAY = play wordle with me! Pick a random solution or
44 specify a solution with --template.
46 CHEAT = given a --template and, optionally, --letters_in_word
47 and/or --letters_to_avoid, return the best guess word;
49 AUTOPLAY = given a complete word in --template, guess it step
52 SELFTEST = autoplay every possible solution keeping track of
53 wins/losses and average number of guesses;
55 PRECOMPUTE = populate hash table with optimal guesses.
61 help='The current board in PLAY, CHEAT, AUTOPLAY mode. Use _\'s for unknown letters.',
66 help='In CHEAT mode, the set of letters known to not be in the solution.',
73 Letters known to be in the solution but whose positions are not yet known.
77 t0i23 => says we tried a 't' as the first letter (0) so we
78 know it's in the word and not there. We also know
79 there's an 'i' which is not the middle letter (2)
80 and is not the 4th letter (3). Note the zero-based
81 position counting semantics (i.e. the first letter
83 e34f0 => mean we know there's an 'e' and an 'f'; we've attempted
84 former in positions 3-4 (recall, 4 is the last spot in
85 a standard 5 letter wordle) and the latter in the
86 first spot (0) already.
88 metavar='<LETTER><ZERO-BASED_POSITION(S)_ALREADY_TRIED>...',
93 default='wordle_solutions.txt',
94 help='Where can I find a valid word list for solutions?',
99 default='wordle_guesses.txt',
100 help='Where can I find a valid word list for guesses?',
105 default='wordle_hash.txt',
106 help='Where can I find my precomputed hash file?',
109 # Type aliases for code readability
117 class Hint(enum.IntFlag):
118 """Green, yellow or gray?"""
121 YELLOW_LETTER_RIGHT_POSITION_WRONG = 1
122 GREEN_LETTER_IN_RIGHT_POSITION = 2
125 class WordStateUndoType(enum.IntFlag):
126 """Used to record guess undo information by type."""
128 LETTER_IN_SOLUTION_MIN_CHANGE = 1
129 LETTER_IN_SOLUTION_MAX_CHANGE = 2
130 LETTER_IN_SOLUTION_ADDED_WITH_MIN_MAX = 3
131 YELLOW_LETTER_ADDED = 4
132 GREEN_LETTER_ADDED = 5
133 GRAY_LETTER_ADDED = 6
136 class WordStateUndo(NamedTuple):
137 """A record per guess containing all the info needed to undo it."""
140 hints: Dict[Position, Hint]
141 mods: List[Dict[str, Any]]
144 class WordState(object):
145 """Keeps track of the current board state, previous guesses and hints,
146 and which letters are known/unknown/eliminated, etc...
152 solution_length: int,
153 max_letter_population_per_word: Dict[Letter, int],
155 letters_at_known_positions: Optional[Dict[Position, Letter]] = None,
156 letters_at_unknown_positions: Optional[Dict[Letter, Set[Position]]] = None,
157 letters_excluded: Optional[Set[Letter]] = None,
159 """Initialize the WordState given the length of the solution word
160 (number of letters), maximum number of times a given letter
161 may appear in the solution, and, optionally, some letter
162 restrictions. All positions below are zero-based.
164 letters_at_known_positions: position => letter map
165 letters_at_unknown_positions: letter => set(position(s) tried) map
166 letters_excluded: set of letters known to not be in the word
170 assert solution_length > 0
171 self.solution_length: int = solution_length
173 # All info necessary to undo a guess later. We'll add an entry
174 # for every guess in a stack.
175 self.undo_info: List[WordStateUndo] = []
177 # We're going to use internal methods to set up the initial
178 # state and they all require an undo record to populate... but
179 # there's no initial guess and we'll never undo beyond here.
180 # So create a fake undo record for them to scribble on and
181 # throw it away later.
182 fake_undo: WordStateUndo = WordStateUndo(
183 guess="fake, will be thrown away",
188 # Across all solutions, how many times did each letter appear
189 # per word, at max. For each letter we learn is in the word
190 # we'll track a min..max valid occurrances; this is the
191 # initial max. It's the max times a given letter appears in
192 # any word in the solution set.
193 assert max_letter_population_per_word
194 self.max_letter_population_per_word = max_letter_population_per_word
196 # The min..max population for every letter we know in the solution.
197 # The List[int] here has two entries: min and max.
198 self.letters_in_solution: Dict[Letter, List[int]] = {}
200 # Green letters by where they are.
201 self.letters_at_known_positions: Dict[Position, Letter] = {}
202 if letters_at_known_positions is not None:
203 for n, letter in letters_at_known_positions.items():
204 self.record_green(n, letter, fake_undo)
206 # Yellow letters to where we've already tried them (i.e. where
208 self.letters_at_unknown_positions: Dict[Letter, Set[Position]] = defaultdict(
211 if letters_at_unknown_positions is not None:
212 for letter, tried_pos in letters_at_unknown_positions.items():
214 self.record_yellow(n, letter, fake_undo)
216 # Excluded letters discovered so far.
217 self.letters_excluded: Set[Letter] = set()
218 if letters_excluded is not None:
219 for letter in letters_excluded:
220 self.record_gray(letter, fake_undo)
222 def get_template(self) -> str:
223 """Return a representation of the board, e.g. __a_e"""
225 template = '_' * self.solution_length
226 for n in self.letters_at_known_positions:
227 letter = self.letters_at_known_positions[n]
228 template = template[0:n] + letter + template[n + 1 :]
231 def get_alphabet(self) -> str:
232 """Return a colorized set of letters eliminated and still viable."""
234 letters_remaining = set(string.ascii_lowercase)
235 green_letters = set()
236 yellow_letters = set()
237 for letter in self.letters_excluded:
238 letters_remaining.remove(letter)
239 for letter in self.letters_at_known_positions.values():
240 assert letter in letters_remaining
241 green_letters.add(letter)
242 for letter in self.letters_at_unknown_positions.keys():
243 if len(self.letters_at_unknown_positions[letter]) > 0:
244 assert letter in letters_remaining
245 yellow_letters.add(letter)
247 for letter in string.ascii_lowercase:
248 if letter in letters_remaining:
249 if letter in green_letters:
250 ret += ansi.bg('forest green')
251 ret += ansi.fg('black')
254 elif letter in yellow_letters:
255 ret += ansi.bg('energy yellow')
256 ret += ansi.fg('black')
262 ret += ansi.fg('black olive')
268 def __repr__(self) -> str:
269 return self.get_alphabet()
271 def adjust_existing_letters_in_solution_maxes(
272 self, max_max: int, undo: WordStateUndo
274 """We've determined, based on info about letters we know to be in the
275 solution and their min..max counts, what the maximum max of
276 any letter should be. Check all letters we know to be in the
277 solution and maybe tighten their maxes. If we do, remember it
278 for later undo purposes.
281 for letter in self.letters_in_solution.keys():
282 old_max = self.letters_in_solution[letter][1]
283 new_max = min(max_max, old_max)
284 if old_max != new_max:
285 self.letters_in_solution[letter][1] = new_max
288 "type": WordStateUndoType.LETTER_IN_SOLUTION_MAX_CHANGE,
294 def record_letter_in_solution(
298 exact_count: Optional[int] = None,
300 """Letter is a (maybe new, may existing in a special case, see below)
301 letter that we know is in the solution. For each letter we
302 know is in the solution, we track a min..max window indicating
303 how many times that letter is permitted to occur in the
304 solution. Manage those here.
308 # If letter is already here, allow it to change bounds if it
309 # came with an exact_count.
310 if letter in self.letters_in_solution:
311 if exact_count is not None:
312 old_min = self.letters_in_solution[letter][0]
313 new_min = exact_count
314 if old_min != new_min:
315 self.letters_in_solution[letter][0] = new_min
318 "type": WordStateUndoType.LETTER_IN_SOLUTION_MIN_CHANGE,
323 old_max = self.letters_in_solution[letter][1]
324 new_max = exact_count
325 if old_max != new_max:
326 self.letters_in_solution[letter][1] = new_max
329 "type": WordStateUndoType.LETTER_IN_SOLUTION_MAX_CHANGE,
335 # Also... since we now know exactly how many of this
336 # letter there are, maybe tighten the rest's maxes.
338 num_letters_known = len(self.letters_in_solution) - 1 + exact_count
339 max_max = self.solution_length - (num_letters_known - 1)
340 self.adjust_existing_letters_in_solution_maxes(max_max, undo)
343 # If we're here, letter is a newly discovered letter in the
344 # solution. Such a letter will likely affect the max count of
345 # existing letters since, as new letters are discovered, there
346 # is less room for the other ones in the solution.
347 if exact_count is None:
348 num_letters_known = len(self.letters_in_solution) + 1
350 num_letters_known = len(self.letters_in_solution) + exact_count
351 max_max = self.solution_length - (num_letters_known - 1)
352 self.adjust_existing_letters_in_solution_maxes(max_max, undo)
354 # Finally, add the new letter's record with its initial min/max.
355 if exact_count is None:
358 self.max_letter_population_per_word[letter],
362 initial_max = exact_count
363 initial_min = exact_count
364 self.letters_in_solution[letter] = [initial_min, initial_max]
367 "type": WordStateUndoType.LETTER_IN_SOLUTION_ADDED_WITH_MIN_MAX,
379 num_correct_doppelgangers: Optional[int] = None,
381 """We've discovered a new yellow letter or a new place where an
382 existing yellow letter is still yellow. If
383 num_correct_doppelgangers is non-None, it means this letter is
384 wrong (gray) in the current location but right (green or
385 yellow) in other locations and we now know the precise count
386 of this letter in the solution -- tell the
387 record_letter_in_solution method.
390 if n not in self.letters_at_unknown_positions[letter]:
392 len(self.letters_at_unknown_positions[letter]) == 0
393 or num_correct_doppelgangers is not None
395 self.record_letter_in_solution(letter, undo, num_correct_doppelgangers)
396 self.letters_at_unknown_positions[letter].add(n)
399 "type": WordStateUndoType.YELLOW_LETTER_ADDED,
405 def record_green(self, n: Position, letter: Letter, undo: WordStateUndo):
406 """We found a new green letter."""
408 if n not in self.letters_at_known_positions:
409 self.letters_at_known_positions[n] = letter
412 "type": WordStateUndoType.GREEN_LETTER_ADDED,
417 self.record_letter_in_solution(letter, undo)
419 def record_gray(self, letter: Letter, undo: WordStateUndo) -> None:
420 """We found a new gray letter."""
422 if letter not in self.letters_excluded:
423 self.letters_excluded.add(letter)
426 "type": WordStateUndoType.GRAY_LETTER_ADDED,
431 def record_guess_and_hint(self, guess: Word, hints: Dict[Position, Hint]):
432 """Make a guess and change state based on the hint. Remember how to
433 undo everything later.
436 undo = WordStateUndo(guess=guess, hints=hints, mods=[])
437 for n, letter in enumerate(guess):
440 if hint is Hint.GRAY_WRONG:
441 # Exclude letters we got WRONG _unless_ we guessed the
442 # same letter elsewhere and got a yellow/green there.
443 # In this case the WRONG hint is not saying this
444 # letter isn't in the word (it is) but rather that
445 # there aren't N+1 instances of this letter. In this
446 # edge case we _do_ know that the letter is not valid
447 # in this position, though; treat it as yellow.
448 num_correct_doppelgangers = 0
449 for k, other in enumerate(guess):
450 if other == letter and n != k and hints[k] != Hint.GRAY_WRONG:
451 num_correct_doppelgangers += 1
452 if num_correct_doppelgangers == 0:
453 self.record_gray(letter, undo)
455 # Treat this as a yellow; we know letter can't go
456 # here or it would have been yellow/green.
457 self.record_yellow(n, letter, undo, num_correct_doppelgangers)
458 elif hint is Hint.YELLOW_LETTER_RIGHT_POSITION_WRONG:
459 self.record_yellow(n, letter, undo)
460 elif hint is Hint.GREEN_LETTER_IN_RIGHT_POSITION:
461 self.record_green(n, letter, undo)
462 self.undo_info.append(undo)
464 def undo_guess(self) -> None:
465 """Unmake a guess and revert back to a previous state."""
467 assert len(self.undo_info) > 0
468 undo = self.undo_info[-1]
469 self.undo_info = self.undo_info[:-1]
471 # We iterate the mods in reverse order so we never have weird
472 # apply/undo ordering artifacts.
473 for mod in reversed(undo.mods):
474 mod_type = mod['type']
475 letter = mod['letter']
476 if mod_type is WordStateUndoType.GRAY_LETTER_ADDED:
477 self.letters_excluded.remove(letter)
478 elif mod_type is WordStateUndoType.YELLOW_LETTER_ADDED:
479 pos = mod['position']
480 self.letters_at_unknown_positions[letter].remove(pos)
481 elif mod_type is WordStateUndoType.GREEN_LETTER_ADDED:
482 pos = mod['position']
483 del self.letters_at_known_positions[pos]
484 elif mod_type is WordStateUndoType.LETTER_IN_SOLUTION_MIN_CHANGE:
485 old_min = mod['old_min']
486 self.letters_in_solution[letter][0] = old_min
487 elif mod_type is WordStateUndoType.LETTER_IN_SOLUTION_MAX_CHANGE:
488 old_max = mod['old_max']
489 self.letters_in_solution[letter][1] = old_max
490 elif mod_type is WordStateUndoType.LETTER_IN_SOLUTION_ADDED_WITH_MIN_MAX:
491 del self.letters_in_solution[letter]
494 class AutoplayOracle(object):
495 """The class that knows the right solution and can give hints in
500 def __init__(self, solution: Word):
502 self.solution = solution.lower()
503 self.solution_letter_count = Counter(self.solution)
504 self.solution_letter_to_pos = defaultdict(set)
505 for n, letter in enumerate(self.solution):
506 self.solution_letter_to_pos[letter].add(n)
507 self.hint_quota_by_letter = Counter(self.solution)
509 def judge_guess(self, guess: Word) -> Dict[Position, Hint]:
510 """Returns a mapping from position -> Hint to indicate
511 whether each letter in the guess string is green, yellow
515 assert len(guess) == len(self.solution)
516 hint_by_pos: Dict[Position, Hint] = {}
517 letter_to_pos = defaultdict(set)
518 hint_quota_by_letter = self.hint_quota_by_letter.copy()
520 for position, letter in enumerate(guess):
521 letter_to_pos[letter].add(position)
523 if letter == self.solution[position]: # green
524 if hint_quota_by_letter[letter] == 0:
525 # We must tell them this letter is green in this
526 # position but we've exhausted our quota of hints
527 # for this letter. There must exist some yellow
528 # hint we previously gave that we need to turn to
530 for other in letter_to_pos[letter]:
531 if other != position:
534 == Hint.YELLOW_LETTER_RIGHT_POSITION_WRONG
536 hint_by_pos[other] = Hint.GRAY_WRONG
538 hint_by_pos[position] = Hint.GREEN_LETTER_IN_RIGHT_POSITION
540 elif self.solution_letter_count[letter] > 0: # yellow
541 if hint_quota_by_letter[letter] == 0:
542 # We're out of hints for this letter. All the
543 # other hints must be green or yellow. Tell them
544 # this one is gray (wrong).
545 hint_by_pos[position] = Hint.GRAY_WRONG
547 hint_by_pos[position] = Hint.YELLOW_LETTER_RIGHT_POSITION_WRONG
550 hint_by_pos[position] = Hint.GRAY_WRONG
551 if hint_quota_by_letter[letter] > 0:
552 hint_quota_by_letter[letter] -= 1
556 class AutoPlayer(object):
557 """This is the class that knows how to guess given the current game
558 state along with several support methods.
564 self.solution_length = None
569 # Board state tracker and move undoer
570 self.word_state = None
572 # The position hash has known best guesses for some subset of
573 # remaining words. See --mode=PRECOMPUTE for how to populate.
574 self.position_hash = {}
575 filename = config.config['hash_file']
576 if filename is not None and file_utils.is_readable(filename):
577 logger.debug('Initializing position hash from %s...', filename)
578 with open(filename, 'r') as rf:
582 line = re.sub(r'#.*$', '', line)
585 (key, word) = line.split(':')
586 (count, fprint) = key.split('@')
587 count = count.strip()
589 fprint = fprint.strip()
591 self.position_hash[(count, fprint)] = word
592 logger.debug('...hash contains %s entries.', len(self.position_hash))
594 # All legal solutions pre-sorted by length.
595 self.all_possible_solutions_by_length = defaultdict(list)
596 filename = config.config['solutions_file']
597 if filename is not None and file_utils.is_readable(filename):
598 logger.debug('Initializing valid solution word list from %s...', filename)
599 with open(filename) as rf:
603 self.all_possible_solutions_by_length[len(word)].append(word)
605 logger.error('A valid --solutions_file is required.')
608 # All legal guesses pre-sorted by length.
609 self.all_possible_guesses_by_length = defaultdict(list)
610 filename = config.config['guesses_file']
611 if filename is not None and file_utils.is_readable(filename):
612 logger.debug('Initializing legal guess word list from %s...', filename)
613 with open(filename) as rf:
617 self.all_possible_guesses_by_length[len(word)].append(word)
619 logger.error('A valid --guesses_file is required.')
625 oracle: Optional[AutoplayOracle],
626 word_state: Optional[WordState],
628 """Here's a new word to guess. Reset state to play a new game. On
629 principle we don't give this class the solution. Instead, we
630 just give it length to indicate the number of characters in
631 the solution. Guesses will be turned into hints by the
632 oracle. The current game state is tracked by the
636 self.solution_length = length
638 self.word_state = word_state
640 def get_all_possible_solutions(self) -> List[Word]:
641 """Given the current known word state, compute the subset of all
642 possible solutions that are still possible. Note: this method
643 guarantees to return the possible solutions list in sorted
648 def is_possible_solution(solution: Word, word_state: WordState) -> bool:
649 """Note: very perf sensitive code; inner loop."""
651 letters_seen: Dict[Letter, int] = defaultdict(int)
652 for n, letter in enumerate(solution):
653 # The word can't contain letters we know aren't in the
655 if letter in word_state.letters_excluded:
658 # If we know a letter is in a position, solution words
659 # must have that letter in that position.
661 n in word_state.letters_at_known_positions
662 and letter != word_state.letters_at_known_positions[n]
666 # If we already tried this letter in this position and
667 # it wasn't green, this isn't a possible solution.
668 if n in word_state.letters_at_unknown_positions[letter]:
671 letters_seen[letter] += 1
673 # Finally, the word must include all letters presently
674 # known to be in the solution to be viable.
675 for letter, min_max in word_state.letters_in_solution.items():
676 num_seen = letters_seen[letter]
677 if num_seen < min_max[0]:
679 elif num_seen > min_max[1]:
683 possible_solutions = []
684 for word in self.all_possible_solutions_by_length[self.solution_length]:
685 if is_possible_solution(word, self.word_state):
686 possible_solutions.append(word)
688 # Note: because self.all_possible_solutions_by_length is sorted
689 # and we iterated it in order, possible_solutions is also sorted
691 # assert possible_solutions == sorted(possible_solutions)
692 return possible_solutions
694 def get_frequency_and_frequency_by_position_tables(
696 possible_solutions: List[Word],
697 ) -> Tuple[Dict[Letter, float], List[Dict[Letter, float]]]:
698 """This method is used by heuristic mode. It returns two tables:
700 1. The frequency count by letter for words in possible_solutions
701 to encourage guesses composed of common letters.
702 2. A letter-in-position bonus term to encourage guesses with
703 letters in positions that occur frequently in the solutions.
706 template = self.word_state.get_template()
707 pfreq: List[Dict[Letter, float]] = []
708 pop_letters: List[Dict[Letter, int]] = []
709 for n in range(len(template)):
710 pop_letters.append(defaultdict(int))
713 freq: Dict[Letter, float] = {}
714 for word in possible_solutions:
715 letter_filter = set()
716 for n, letter in enumerate(word):
718 # Only count frequency of letters still to be filled in.
719 if template[n] != '_':
722 # Do not give a full frequency bonus to repeated letters.
723 if letter not in letter_filter:
724 freq[letter] = freq.get(letter, 0) + 1
725 letter_filter.add(letter)
728 pop_letters[n][letter] += 1 # type: ignore
730 # Reward guesses that place the most popular letter in a position.
731 # Save the top 3 letters per position.
732 # don't give a bonus if letters spread all over the place?
733 # only give a bonus to a given letter in one position?
734 total_letters_in_position = len(possible_solutions)
735 for n in range(len(template)):
736 for letter, count in sorted(pop_letters[n].items(), key=lambda x: -x[1]):
739 normalized = count / total_letters_in_position
740 assert 0.0 < normalized <= 1.0
741 if normalized > 0.08:
742 pfreq[n][letter] = normalized
747 def dump_frequency_data(
749 possible_solutions: List[Word],
750 letter_frequency: Dict[Letter, float],
751 letter_position_frequency: List[Dict[Letter, float]],
753 """Just logs the frequency tables we computed above."""
755 logger.debug('Unknown letter(s) frequency table: ')
757 for letter, weight in sorted(letter_frequency.items(), key=lambda x: -x[1]):
758 out += f'{letter}:{weight}, '
763 logger.debug('Unknown letter-in-position bonus table: ')
765 for n in range(len(possible_solutions[0])):
766 pop = letter_position_frequency[n]
767 for letter, weight in sorted(pop.items(), key=lambda x: -x[1]):
768 out += f'pos{n}:{letter}@{weight:.5f}, '
773 def position_in_hash(self, num_potential_solutions: int, fprint: Fprint):
774 """Is a position in our hash table?"""
775 return (num_potential_solutions, fprint) in self.position_hash
777 def guess_word(self) -> Optional[Word]:
778 """Compute a guess word and return it. Returns None on error."""
780 template = self.word_state.get_template()
781 possible_solutions = self.get_all_possible_solutions()
782 num_possible_solutions = len(possible_solutions)
783 fprint = hashlib.md5(repr(possible_solutions).encode('ascii')).hexdigest()
785 n = num_possible_solutions
787 string_utils.make_contractions(
788 f'There {string_utils.is_are(n)} {n} word{string_utils.pluralize(n)} '
789 + f'({template} @ {fprint}).'
792 if num_possible_solutions < 30:
794 string_utils.make_contractions(
795 f'{string_utils.capitalize_first_letter(string_utils.it_they(n))} '
796 + f'{string_utils.is_are(n)}: {possible_solutions}'
800 'Letter count restrictions: %s', self.word_state.letters_in_solution
802 if num_possible_solutions == 0:
803 logger.error('No possible solutions?!')
804 print('No possible solutions?!', file=sys.stderr)
805 print(self.word_state)
806 print(self.word_state.letters_in_solution)
809 elif num_possible_solutions == 1:
810 return possible_solutions[0]
812 # Check the hash table for a precomputed best guess.
813 elif self.position_in_hash(num_possible_solutions, fprint):
814 guess = self.position_hash[(num_possible_solutions, fprint)]
815 logger.debug('hash hit: %s', guess)
818 # If there are just a few solutions possible, brute force the
819 # guess. This is expensive: for every possible solution it
820 # computes the entropy of every possible guess. Not fast for
821 # large numbers of solutions.
822 elif num_possible_solutions < 20:
824 'Only %d solutions; using brute force strategy.', num_possible_solutions
826 return self.brute_force_internal(
828 self.all_possible_guesses_by_length[len(possible_solutions[0])],
832 # A hybrid approach: narrow down the guess list using
833 # heuristic scoring and then compute the entropy of the topN
834 # guesses via brute force.
835 elif num_possible_solutions < 100:
837 'Only %d solutions; using hybrid strategy.', num_possible_solutions
839 return self.hybrid_search(possible_solutions)
841 # There are a lot of words left; score the guesses based on
842 # fast heuristics (i.e. letter frequency).
845 'There are %s solutions; using fast heuristics.', num_possible_solutions
847 return self.heuristics_search(possible_solutions)
849 def brute_force_internal(
851 possible_solutions: List[Word],
852 all_guesses: List[Word],
855 """Assume every possible solution is the answer, in turn. For each
856 one, try all_guesses and pay attention to the hint we would
857 get back. Compute the entropy of each guess and return the
858 guess with the highest entropy -- i.e. the guess that gives us
859 the most information on average; or, the guess whose results
860 would take the most bits to represent.
862 Note, this is expensive. O(num_possible_solutions * all_guesses)
865 num_possible_solutions = len(possible_solutions)
866 if num_possible_solutions == 1:
867 return possible_solutions[0]
868 possible_solutions_set = set(possible_solutions) # for O(1) lookups
870 # Buckets count distinct outcomes of a guess. e.g. if a guess
871 # yields the hint G_Y__ that outcome is assigned a bucket
872 # number and we will increment the count of how many times
873 # that outcome happened for this guess across all possible
875 bucket_population_by_guess: Dict[Word, Dict[Bucket, int]] = {}
876 for guess in all_guesses:
877 bucket_population_by_guess[guess] = defaultdict(int)
879 # Pretend that each possible solution is the real solution
880 # and make every possible guess for each.
881 for solution in possible_solutions:
882 oracle = AutoplayOracle(solution)
883 for guess in all_guesses:
884 hints = oracle.judge_guess(guess)
886 # Note: this is a really good way to make sure that
887 # make/unmake moves works:
889 # before = self.word_state.__repr__()
890 self.word_state.record_guess_and_hint(guess, hints)
891 # during = self.word_state.__repr__()
893 # Map the hint returned into a bucket number and
894 # keep track of population per bucket. Below:
896 # n is a position := {0, 1, 2, 3, 4}
897 # hint[n] := {1, 2, 3}
900 for n in range(len(guess)):
901 bucket += hints[n] * (3**n)
902 bucket_population_by_guess[guess][bucket] += 1
903 self.word_state.undo_guess()
905 # after = self.word_state.__repr__()
906 # if before != after:
907 # print(f'BEFORE: {before}')
908 # print(f' WORD: {colorize_guess(guess, hints)}')
909 # print(f' HINT: {hints}')
910 # print(f'DURING: {during}')
911 # print(f' AFTER: {after}')
914 # Compute the entropy of every guess across all possible
917 # https://markmliu.medium.com/what-in-the-wordle-5dc5ed94fe2
918 # https://machinelearningmastery.com/what-is-information-entropy/
919 # https://en.wikipedia.org/wiki/Entropy_(information_theory)
922 entropy: Dict[Word, float] = {}
923 for guess in all_guesses:
925 for bucket in bucket_population_by_guess[guess]:
927 # We counted how many times this outcome occurred.
928 # The probabilty of this outcome = count / total.
929 p = float(bucket_population_by_guess[guess][bucket])
930 p /= num_possible_solutions
931 entropy[guess] += p * math.log2(p)
932 entropy[guess] = -entropy[guess]
934 if best_entropy is None:
935 best_entropy = entropy[guess]
939 # We always choose the guess with the highest entropy
940 # because this guess gives us the most information in
941 # the average case, i.e. it takes the most bits to
942 # represent the average outcome of this guess.
944 # However, in practice, usually several guesses tie
945 # for best. Prefer guesses that are also potential
946 # solutions (not just legal guesses)
947 if entropy[guess] > best_entropy or (
948 entropy[guess] == best_entropy and guess in possible_solutions_set
950 best_entropy = entropy[guess]
953 # This is just logging the results. Display the guesses with
954 # the highest entropy but also display the entropy of every
955 # possible solution, too, even if they are not best.
956 possible_solutions_seen = 0
959 for n, (guess, guess_entropy) in enumerate(
960 sorted(entropy.items(), key=lambda x: -x[1])
962 if best_entropy is None:
963 best_entropy = guess_entropy
964 if guess in possible_solutions_set:
965 possible_solutions_seen += 1
967 f'{label}: #{n}: {guess} with {guess_entropy:.5f} bits <--'
969 elif guess_entropy == best_entropy and best_count < 15:
970 logger.debug(f'{label}: #{n}: {guess} with {guess_entropy:.5f} bits')
972 logger.debug('%s: best guess is %s.', label, best_guess)
975 def hybrid_search(self, possible_solutions: List[Word]) -> Optional[Word]:
976 """Use heuristic scoring to reduce the number of guesses before
977 calling brute force search.
980 (freq, pfreq) = self.get_frequency_and_frequency_by_position_tables(
983 self.dump_frequency_data(possible_solutions, freq, pfreq)
984 scored_guesses = self.assign_scores(possible_solutions, freq, pfreq)
989 for guess, score in sorted(scored_guesses.items(), key=lambda x: -x[1]):
990 logger.debug(f'hybrid: heuristic #{count} = {guess} with {score:.5f}')
991 topn_guesses.append(guess)
996 best_guess = self.brute_force_internal(
997 possible_solutions, topn_guesses, 'hybrid: brute_force'
1003 possible_solutions: List[Word],
1004 freq: Dict[Letter, float],
1005 pfreq: List[Dict[Letter, float]],
1006 ) -> Dict[Word, float]:
1007 """Given some letter frequency and letter+position bonus data, assign
1008 a score to every possible guess.
1011 num_possible_solutions = len(possible_solutions)
1012 assert num_possible_solutions > 1
1013 template = self.word_state.get_template()
1015 # Give every word a score composed of letter frequencies and letter
1016 # in position frequencies. This score attempts to approximate the
1017 # result of brute_force_search less expensively.
1018 word_scores: Dict[Word, float] = {}
1019 for guess in possible_solutions:
1023 letter_filter = set()
1024 for n, letter in enumerate(guess):
1026 # Ignore letters we already know.
1027 if template[n] != '_':
1030 # Is there a bonus for this letter in this position? (pfreq)
1033 pfreq_term += pop[letter] * num_possible_solutions / 4
1035 # Don't count freq bonus more than once per letter. (freq)
1036 if letter in letter_filter:
1038 freq_term += freq.get(letter, 0.0)
1039 letter_filter.add(letter)
1040 score = freq_term + pfreq_term
1041 word_scores[guess] = score
1044 def heuristics_search(self, possible_solutions: List[Word]) -> Word:
1045 """The dumbest but fastest search. Use fast heuristics to score each
1046 possible guess and then guess the one with the highest
1050 (freq, pfreq) = self.get_frequency_and_frequency_by_position_tables(
1053 self.dump_frequency_data(possible_solutions, freq, pfreq)
1054 scored_guesses = self.assign_scores(possible_solutions, freq, pfreq)
1056 for n, (guess, score) in enumerate(
1057 sorted(scored_guesses.items(), key=lambda x: -x[1])
1059 if best_guess is None:
1062 logger.debug(f'heuristic: #{n}: {guess} with {score:.5f}')
1063 assert best_guess is not None
1067 def colorize_guess(guess: Word, hints: Dict[Position, Hint]) -> str:
1068 """Use hints to make the letters green/yellow/gray."""
1070 for n, letter in enumerate(guess):
1072 if hint is Hint.GRAY_WRONG:
1073 ret += ansi.bg('mist gray')
1074 elif hint is Hint.YELLOW_LETTER_RIGHT_POSITION_WRONG:
1075 ret += ansi.bg('energy yellow')
1076 elif hint is Hint.GREEN_LETTER_IN_RIGHT_POSITION:
1077 ret += ansi.bg('forest green')
1078 ret += ansi.fg('black')
1084 def get_max_letter_population() -> Dict[Letter, int]:
1085 filename = config.config['solutions_file']
1086 max_letter_population_per_word: Dict[Letter, int] = defaultdict(int)
1087 if filename is not None and file_utils.is_readable(filename):
1089 'Figuring out all letters\' max frequency in the solution space...'
1091 with open(filename) as rf:
1095 letter_pop = Counter(word)
1096 for letter, pop in letter_pop.items():
1097 if pop > max_letter_population_per_word[letter]:
1098 max_letter_population_per_word[letter] = pop
1100 logger.error('A valid --solutions_file is required.')
1102 return max_letter_population_per_word
1107 oracle: AutoplayOracle,
1108 word_state: WordState,
1110 quiet: bool = False,
1112 """Make guesses one by one until arriving at a known solution."""
1114 logger.debug('Autoplayer mode...')
1115 player.new_word(len(solution), oracle, word_state)
1119 guess = player.guess_word()
1120 if guess is not None:
1121 hints = oracle.judge_guess(guess)
1122 word_state.record_guess_and_hint(guess, hints)
1123 guesses.append(guess)
1125 colorized_guess = colorize_guess(guess, hints)
1126 print(f'Guess #{len(guesses)}: {colorized_guess} => {word_state}')
1127 if guess == solution:
1130 logger.error(f'"{solution}" is not in my --solutions_file.')
1136 """Cheater! Given a board state, determine the best guess. Note that
1137 in this mode the solution is not known.
1140 logger.debug('Cheater mode...')
1142 template = config.config['template']
1145 # Extract known letter positions from the template.
1146 template = template.lower()
1147 letters_at_known_positions = {}
1148 for n, letter in enumerate(template):
1150 letters_at_known_positions[n] = letter
1152 # Initialize set of letters to be avoided.
1153 avoid = config.config['letters_to_avoid']
1156 avoid = avoid.lower()
1157 letters_to_avoid = set(list(avoid))
1159 # Initialize the set of letters we know are in the solution but
1161 in_word = config.config['letters_in_word']
1164 in_word = in_word.lower()
1166 # This is parsing out the --letters_in_word argument. The
1169 # <letter><1 or more zero-based positions we already tried it>
1171 # So, if we know an E is in the word (i.e. it's yellow) and
1172 # we tried it in the first and third letter already:
1176 # Note: 0 means "the first letter", i.e. position is zero based.
1178 # You can stack letters this way. e.g.:
1181 letters_in_word_at_unknown_position = defaultdict(set)
1183 for letter in in_word:
1184 if letter.isdigit():
1186 letters_in_word_at_unknown_position[last_letter].add(int(letter))
1187 elif letter.isalpha():
1188 last_letter = letter
1190 max_letter_pop_per_word = get_max_letter_population()
1191 word_state = WordState(
1193 max_letter_pop_per_word,
1194 letters_at_known_positions=letters_at_known_positions,
1195 letters_at_unknown_positions=letters_in_word_at_unknown_position,
1196 letters_excluded=letters_to_avoid,
1199 player = AutoPlayer()
1200 player.new_word(len(template), None, word_state)
1201 return player.guess_word()
1205 """Autoplay every possible solution and pay attention to statistics."""
1207 logger.debug('Selftest mode...')
1211 max_guesses_words = []
1213 hist = histogram.SimpleHistogram(
1214 [(1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 100)]
1216 top_guess_number = defaultdict(dict)
1219 player = AutoPlayer()
1220 with open(config.config['solutions_file'], 'r') as rf:
1221 contents = rf.readlines()
1223 max_letter_pop_per_word = get_max_letter_population()
1225 for word in contents:
1230 f'Found word "{word}" in solutions file that is not 5 letters in length. Skipping it.'
1233 oracle = AutoplayOracle(word)
1234 word_state = WordState(len(word), max_letter_pop_per_word)
1235 player.new_word(len(word), oracle, word_state)
1238 runtime = time.time() - start
1240 f'{total_words} / {len(contents)} ("{word}") = {total_words/len(contents)*100.0:.2f}% | {total_guesses/total_words:.3f} guesses/word | {runtime:.1f}s @ {runtime/total_words:.3f}s/word\r',
1243 if total_words % 100 == 0:
1244 print(f'\nAfter {total_words} words:')
1246 f'...I made {total_guesses} guesses; ({total_guesses/total_words:.3f}/word)'
1249 f'...Max guesses was {max_guesses} for {max_guesses_words}; I lost {num_losses} times.'
1251 print(f'...I made {len(every_guess)} total distinct "interior" guesses.')
1254 guesses = autoplay(word, oracle, word_state, player, True)
1255 guess_count = len(guesses)
1258 hist.add_item(guess_count)
1259 total_guesses += guess_count
1260 for n, guess in enumerate(guesses):
1261 tops = top_guess_number[n]
1262 tops[guess] = tops.get(guess, 0) + 1
1263 if n != len(guesses) - 1:
1264 every_guess.add(guess)
1265 if max_guesses is None or guess_count > max_guesses:
1266 max_guesses = guess_count
1267 max_guesses_words = [word]
1268 elif guess_count == max_guesses:
1269 max_guesses_words.append(word)
1270 print("\nFinal Report:")
1271 print("-------------")
1272 print(f'On {total_words} words:')
1274 f'...I made {total_guesses} guesses; ({total_guesses / total_words:.3f} / word)'
1277 f'...Max guesses was {max_guesses} for {max_guesses_words}; I lost {num_losses} times.'
1279 print(f'...I made {len(every_guess)} total distinct "interior" guesses.')
1281 for n in range(0, 8):
1282 top_guesses = top_guess_number[n]
1285 print(f'Top guesses #{n+1}: ', end='')
1286 for guess, count in sorted(top_guesses.items(), key=lambda x: -x[1]):
1287 out += f'{guess}@{count}, '
1297 @par.parallelize(method=par.Method.PROCESS)
1299 solutions: List[Word],
1301 shared_cache_name: str,
1302 lock: multiprocessing.RLock,
1303 max_letter_pop_per_word: Dict[Letter, int],
1305 """Work on precomputing one shard of the solution space, in parallel."""
1307 logger.debug(f'Shard {shard_num} owns solutions {solutions[0]}..{solutions[-1]}.')
1308 player = AutoPlayer()
1309 length = len(solutions[0])
1310 shared_cache = SharedDict(shared_cache_name, 0, lock)
1311 begin = solutions[0]
1319 local_cache: Dict[Tuple[int, Fprint], Word] = {}
1321 for n, solution in enumerate(solutions):
1322 oracle = AutoplayOracle(solution)
1323 word_state = WordState(length, max_letter_pop_per_word)
1324 player.new_word(length, oracle, word_state)
1327 # Make guesses until the next guess is not in the hash or
1330 remaining_words = player.get_all_possible_solutions()
1331 num_remaining_words = len(remaining_words)
1332 if num_remaining_words <= 1:
1335 sig_remaining_words = hashlib.md5(
1336 remaining_words.__repr__().encode('ascii')
1338 key = (num_remaining_words, sig_remaining_words)
1340 if player.position_in_hash(num_remaining_words, sig_remaining_words):
1341 provenance = 'game_hash'
1342 guess = player.guess_word()
1343 elif key in local_cache:
1344 provenance = 'local_cache'
1345 guess = local_cache[key]
1346 elif key in shared_cache:
1347 provenance = 'shared_cache'
1348 guess = shared_cache[key]
1350 provenance = 'computed'
1351 guess = player.brute_force_internal(
1353 player.all_possible_guesses_by_length[length],
1356 local_cache[key] = guess
1357 shared_cache[key] = guess
1360 guesses.append(guess)
1361 hints = oracle.judge_guess(guess)
1362 word_state.record_guess_and_hint(guess, hints)
1364 f'shard{shard_num}: '
1365 + f'{passes}/{n}/{len(solutions)}/{solution}> '
1366 + f'{num_remaining_words} @ {sig_remaining_words}: {guesses} # {provenance}'
1369 # When we can make it through a pass over all solutions and
1370 # never miss the hash or shared dict, we're done.
1371 if num_computed == 0:
1373 f'{ansi.bg("green")}{ansi.fg("black")}'
1374 + f'shard{shard_num}: "{begin}".."{end}" is finished!'
1378 shared_cache.close()
1379 return f'(shard {shard_num} done)'
1383 """Precompute the best guess in every situation via the expensive
1384 brute force / entropy method. Break the solutions list into
1385 shards and execute each one in parallel. Persist the concatenated
1389 with open(config.config['solutions_file'], 'r') as rf:
1390 contents = rf.readlines()
1393 for word in contents:
1399 assert len(word) == length
1400 all_words.append(word)
1402 max_letter_pop_per_word = get_max_letter_population()
1404 logger.debug('Sharding words into groups of 10.')
1405 for subset in list_utils.shard(all_words, 10):
1406 shards.append([x for x in subset])
1408 logger.debug('Kicking off helper pool.')
1410 # Shared cache is a dict that is slow to read/write but is visible
1411 # to all shards so that they can benefit from results already
1412 # computed in one of the other workers. 10 pages (~40kb) of
1414 shared_cache = SharedDict("wordle_shared_dict", 4096 * 10)
1417 for n, shard in enumerate(shards):
1422 shared_cache.get_name(),
1424 max_letter_pop_per_word,
1427 smart_future.wait_all(results)
1429 with open(config.config['hash_file'], 'a') as wf:
1430 for key, value in shared_cache.items():
1431 print(f'{key[0]} @ {key[1]}: {value}', file=wf)
1433 shared_cache.close()
1434 shared_cache.cleanup()
1435 executors.DefaultExecutors().process_pool().shutdown()
1438 def get_legal_guesses() -> Set[Word]:
1439 legal_guesses = set()
1440 with open(config.config['guesses_file'], 'r') as rf:
1441 contents = rf.readlines()
1442 for line in contents:
1445 legal_guesses.add(line)
1446 return legal_guesses
1449 def get_solution() -> Word:
1450 if config.config['template'] is not None:
1451 solution = config.config['template']
1454 with open(config.config['solutions_file'], 'r') as rf:
1455 contents = rf.readlines()
1456 solution = random.choice(contents)
1457 solution = solution[:-1]
1458 solution = solution.lower()
1459 solution = solution.strip()
1463 def give_hint(num_hints: int, player: AutoPlayer, word_state: WordState):
1464 """Give a smart(?) hint to the guesser."""
1466 possible_solutions = player.get_all_possible_solutions()
1467 n = len(possible_solutions)
1470 f'There {string_utils.is_are(n)} {n} possible solution{string_utils.pluralize(n)}.'
1472 elif num_hints == 2:
1473 (freq, _) = player.get_frequency_and_frequency_by_position_tables(
1476 hint_letters = sorted(freq.items(), key=lambda x: -x[1])
1477 good_hints = set(string.ascii_lowercase)
1478 for letter in word_state.letters_at_unknown_positions.keys():
1479 if len(word_state.letters_at_unknown_positions[letter]) > 0:
1480 if letter in good_hints:
1481 good_hints.remove(letter)
1482 for letter in word_state.letters_at_known_positions.values():
1483 if letter in good_hints:
1484 good_hints.remove(letter)
1488 for letter, _ in hint_letters:
1489 if letter in good_hints:
1491 f'"{letter}" is popular in the possible solution{string_utils.pluralize(n)}.'
1496 elif num_hints == 3:
1500 for possibility in random.sample(possible_solutions, min(limit, n)):
1501 print(f'Maybe something like "{possibility}"?')
1502 elif num_hints >= 4:
1503 best_guess = player.guess_word()
1504 print(f'Try "{best_guess}"')
1508 """Allow users to play and color in the letter for them."""
1510 legal_guesses = get_legal_guesses()
1511 solution = get_solution()
1512 oracle = AutoplayOracle(solution)
1513 max_letter_pop_per_word = get_max_letter_population()
1514 word_state = WordState(len(solution), max_letter_pop_per_word)
1515 player = AutoPlayer()
1516 player.new_word(len(solution), oracle, word_state)
1519 prompt = 'Guess #_/6 (or "?" for a hint): '
1520 padding = ' ' * len(prompt)
1521 colorized_guess = "▢" * len(solution)
1526 # Print current state + template.
1527 print(padding + colorized_guess + " " + word_state.__repr__())
1528 if guess == solution:
1531 if num_guesses >= 6:
1532 print('Better luck next time.')
1533 print(padding + f'{solution}')
1536 # Get a guess / command.
1537 guess = input(prompt.replace('_', str(num_guesses + 1))).lower().strip()
1542 give_hint(num_hints, player, word_state)
1544 elif guess == '#brute':
1545 remaining_words = player.get_all_possible_solutions()
1546 brute = player.brute_force_internal(
1548 player.all_possible_guesses_by_length[len(solution)],
1554 elif len(guess) != len(solution) or guess not in legal_guesses:
1555 print(f'"{guess}" is not a legal guess, try again.')
1558 # If we get here it was a guess. Process it.
1560 hints = oracle.judge_guess(guess)
1561 colorized_guess = colorize_guess(guess, hints)
1562 word_state.record_guess_and_hint(guess, hints)
1566 # The bootstrap.initialize decorator takes care of parsing our commandline
1567 # flags and populating config. It can also do cool things like time and
1568 # profile the code run within it, audit imports, profile memory usage,
1569 # and break into pdb on unhandled exception.
1570 @bootstrap.initialize
1571 def main() -> Optional[int]:
1572 mode = config.config['mode'].upper().strip()
1573 if mode == 'AUTOPLAY':
1574 solution = config.config['template']
1576 solution = solution.lower()
1577 oracle = AutoplayOracle(solution)
1578 max_letter_pop_per_word = get_max_letter_population()
1579 word_state = WordState(len(solution), max_letter_pop_per_word)
1580 player = AutoPlayer()
1581 autoplay(solution, oracle, word_state, player)
1583 elif mode == 'CHEAT':
1585 elif mode == 'PLAY':
1588 elif mode == 'SELFTEST':
1591 elif mode == 'PRECOMPUTE':
1594 raise Exception('wtf?')
1597 if __name__ == '__main__':