X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=ml_quick_label.py;h=5a112db3d22e7dfc5d839306f4b78ba0648ae7fe;hb=f4f303f79a478d37f9756cd4f26fab130fc62e2b;hp=1c359828003110d4358d45bd8e5a825f631569c2;hpb=497fb9e21f45ec08e1486abaee6dfa7b20b8a691;p=python_utils.git diff --git a/ml_quick_label.py b/ml_quick_label.py index 1c35982..5a112db 100644 --- a/ml_quick_label.py +++ b/ml_quick_label.py @@ -1,13 +1,15 @@ #!/usr/bin/env python3 import glob +import logging import os -from typing import Callable, List, NamedTuple, Set +from typing import Callable, List, NamedTuple, Optional, Set import argparse_utils import config import input_utils +logger = logging.getLogger(__name__) parser = config.add_commandline_args( f"ML Quick Labeler ({__file__})", "Args related to quick labeling of ML training data", @@ -17,12 +19,25 @@ parser.add_argument( default="./qlabel_skip_list.txt", metavar="FILENAME", type=argparse_utils.valid_filename, - help="Path to file in which to store already labeled data", + help="Path to file in which to store already labeled data.", +) +parser.add_argument( + "--ml_quick_label_use_skip_lists", + default=True, + action=argparse_utils.ActionNoYes, + help='Should we use a skip list file to speed up execution?', +) +parser.add_argument( + "--ml_quick_label_overwrite_labels", + default=False, + action=argparse_utils.ActionNoYes, + help='Enable overwriting existing labels; default is to not relabel.', ) class InputSpec(NamedTuple): - image_file_glob: str + image_file_glob: Optional[str] + image_file_prepopulated_list: Optional[List[str]] image_file_to_features_file: Callable[[str], str] label: str valid_keystrokes: List[str] @@ -32,60 +47,76 @@ class InputSpec(NamedTuple): def read_skip_list() -> Set[str]: ret: Set[str] = set() - quick_skip_file = config.config['ml_quick_label_skip_list_path'] - if not os.path.exists(quick_skip_file): - return ret - with open(quick_skip_file, 'r') as f: - lines = f.readlines() - for line in lines: - line = line[:-1] - line.strip() - ret.add(line) + if config.config['ml_quick_label_use_skip_lists']: + quick_skip_file = config.config['ml_quick_label_skip_list_path'] + if os.path.exists(quick_skip_file): + with open(quick_skip_file, 'r') as f: + lines = f.readlines() + for line in lines: + line = line[:-1] + line.strip() + ret.add(line) + logger.debug(f'Read {quick_skip_file} and found {len(ret)} entries.') return ret def write_skip_list(skip_list) -> None: - quick_skip_file = config.config['ml_quick_label_skip_list_path'] - with open(quick_skip_file, 'w') as f: - for filename in skip_list: - filename = filename.strip() - if len(filename) > 0: - f.write(f'{filename}\n') + if config.config['ml_quick_label_use_skip_lists']: + quick_skip_file = config.config['ml_quick_label_skip_list_path'] + with open(quick_skip_file, 'w') as f: + for filename in skip_list: + filename = filename.strip() + if len(filename) > 0: + f.write(f'{filename}\n') + logger.debug(f'Updated {quick_skip_file}') def label(in_spec: InputSpec) -> None: - images = glob.glob(in_spec.image_file_glob) + images = [] + if in_spec.image_file_glob is not None: + images += glob.glob(in_spec.image_file_glob) + elif in_spec.image_file_prepopulated_list is not None: + images += in_spec.image_file_prepopulated_list + else: + raise ValueError( + 'One of image_file_glob or image_file_prepopulated_list is required' + ) skip_list = read_skip_list() for image in images: if image in skip_list: + logger.debug(f'Skipping {image} because of the skip list') continue features = in_spec.image_file_to_features_file(image) if features is None or not os.path.exists(features): + logger.warning( + f'File {image} yielded file {features} which does not exist, SKIPPING.' + ) continue # Render features and image. + filtered_lines = [] with open(features, "r") as f: lines = f.readlines() - skip = False + saw_label = False for line in lines: line = line[:-1] - if in_spec.label in line: - skip = True - if skip: + if in_spec.label not in line: + filtered_lines.append(line) + else: + saw_label = True + + if not saw_label or config.config['ml_quick_label_overwrite_labels']: + logger.info(features) + os.system(f'xv {image} &') + keystroke = input_utils.single_keystroke_response( + in_spec.valid_keystrokes, + prompt=in_spec.prompt, + ) + os.system('killall xv') + label_value = in_spec.keystroke_to_label(keystroke) + filtered_lines.append(f"{in_spec.label}: {label_value}\n") + with open(features, 'w') as f: + f.writelines("%s\n" % line for line in filtered_lines) skip_list.add(image) - continue - - os.system(f'xv {image} &') - keystroke = input_utils.single_keystroke_response( - in_spec.valid_keystrokes, - prompt=in_spec.prompt, - ) - os.system('killall xv') - - label_value = in_spec.keystroke_to_label(keystroke) - with open(features, "a") as f: - f.write(f"{in_spec.label}: {label_value}\n") - skip_list.add(image) - write_skip_list(skip_list)