X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=ml%2Fquick_label.py;h=feab67b2e88484132ac80506f5add82b395e35bf;hb=e46158e49121b8a955bb07b73f5bcf9928b79c90;hp=00acf05f4e389a36096b8cd13e6cbbee56985446;hpb=723f5fcb660eef79cc455cfb3f2eebfa667c90fa;p=python_utils.git diff --git a/ml/quick_label.py b/ml/quick_label.py index 00acf05..feab67b 100644 --- a/ml/quick_label.py +++ b/ml/quick_label.py @@ -2,7 +2,12 @@ # © Copyright 2021-2022, Scott Gasch -"""A helper to facilitate quick manual labeling of ML training data.""" +"""A helper to facilitate quick manual labeling of ML training data. + +To use, implement a subclass that implements the QuickLabelHelper +interface and pass it into the quick_label function. + +""" import logging import os @@ -70,7 +75,7 @@ class QuickLabelHelper: @abstractmethod def render_example(self, filename: str, features: str, lines: List[str]) -> None: - '''Render a raw file with its features for the user.''' + '''Render a raw file with its features for the user to judge.''' pass @abstractmethod @@ -91,7 +96,8 @@ class QuickLabelHelper: features: str, lines: List[str], ) -> Any: - '''Ask the current ML model about this example, if necessary.''' + '''Ask the current ML model about this example, if necessary/possible. + Returns None to indicate no model to consult.''' pass @abstractmethod @@ -157,6 +163,10 @@ def _maybe_write_skip_list(skip_list) -> None: def _filter_images( images: List[str], skip_list: Set[str], helper: QuickLabelHelper ) -> List[Tuple[str, str]]: + '''Discard examples that have particular characteristics. e.g. + those that are already labeled and whose current label agrees with + the ML model, etc...''' + filtered_images = [] label_label = helper.get_label_feature() for image in images: @@ -216,15 +226,18 @@ def _filter_images( def _make_prompt( helper: QuickLabelHelper, cursor: int, - filtered_images: List[Tuple[str, str]], - image: str, - features: str, - labeled_features: Dict[Tuple[str, str], str], + num_filtered_images: int, + current_image: str, + current_features: str, + labeled_features: Dict[Tuple[str, str], str], # Examples already labeled ) -> None: - label_label = helper.get_label_feature() + '''Tell an interactive user where they are in the set of examples that + may be labeled and the details of the current example.''' + + label_label = helper.get_label_feature() # the key: of a label in features filtered_lines = [] - label = labeled_features.get((image, features), None) - with open(features, 'r') as rf: + label = labeled_features.get((current_image, current_features), None) + with open(current_features, 'r') as rf: lines = rf.readlines() for line in lines: line = line[:-1] @@ -237,18 +250,18 @@ def _make_prompt( label = line # Prompt... - helper.render_example(image, features, filtered_lines) - print(f'{cursor}/{len(filtered_images)} ({cursor/len(filtered_images)*100.0:.1f}%) | ', end='') - print(f'{ansi.bold()}{image} / {features}{ansi.reset()}:') - print(f' ...{len(labeled_features)} currently unsaved labels ("W" to save).') + helper.render_example(current_image, current_features, filtered_lines) + print(f'{cursor}/{num_filtered_images} ({cursor/num_filtered_images*100.0:.1f}%) | ', end='') + print(f'{ansi.bold()}{current_image} / {current_features}{ansi.reset()}:') + print(f' ...{len(labeled_features)} currently unwritten label(s) ("W" to write).') if label: - if (image, features) in labeled_features: + if (current_image, current_features) in labeled_features: print(f' ...This example is labeled but not yet saved: {label}') else: print(f' ...This example is already labeled on disk: {label}') else: print(' ...This example is currently unlabeled') - guess = helper.ask_current_model_about_example(image, features, filtered_lines) + guess = helper.ask_current_model_about_example(current_image, current_features, filtered_lines) if guess: print(f' ...The ML Model says {guess}') print() @@ -283,6 +296,10 @@ def _write_labeled_features( def quick_label(helper: QuickLabelHelper) -> None: + '''Pass your QuickLabelHelper implementing class to this function and + it will allow users to label examples and persist them to disk. + + ''' skip_list = _maybe_read_skip_list() # Ask helper for an initial set of files. @@ -301,7 +318,6 @@ def quick_label(helper: QuickLabelHelper) -> None: logger.info('There are %d candidate images post filtering.', len(filtered_images)) # Allow the user to label the non-filtered images one by one. - labeled_features: Dict[Tuple[str, str], str] = {} cursor = 0 while True: @@ -313,7 +329,7 @@ def quick_label(helper: QuickLabelHelper) -> None: assert features and os.path.exists(features) # Render the features, image and prompt. - _make_prompt(helper, cursor, filtered_images, image, features, labeled_features) + _make_prompt(helper, cursor, len(filtered_images), image, features, labeled_features) try: # Did they want everything labelled the same? label_everything = helper.get_everything_label()