#!/usr/bin/env python3 import glob import logging import os from typing import Callable, List, NamedTuple, Optional, Set import warnings import argparse_utils import config logger = logging.getLogger(__name__) parser = config.add_commandline_args( f"ML Quick Labeler ({__file__})", "Args related to quick labeling of ML training data", ) parser.add_argument( "--ml_quick_label_skip_list_path", default="./qlabel_skip_list.txt", metavar="FILENAME", type=argparse_utils.valid_filename, help="Path to file in which to store already labeled data.", ) parser.add_argument( "--ml_quick_label_use_skip_lists", default=True, action=argparse_utils.ActionNoYes, help='Should we use a skip list file to speed up execution?', ) parser.add_argument( "--ml_quick_label_overwrite_labels", default=False, action=argparse_utils.ActionNoYes, help='Enable overwriting existing labels; default is to not relabel.', ) class InputSpec(NamedTuple): image_file_glob: Optional[str] image_file_prepopulated_list: Optional[List[str]] image_file_to_features_file: Callable[[str], str] label: str valid_keystrokes: List[str] prompt: str keystroke_to_label: Callable[[str], str] def read_skip_list() -> Set[str]: ret: Set[str] = set() if config.config['ml_quick_label_use_skip_lists']: quick_skip_file = config.config['ml_quick_label_skip_list_path'] if os.path.exists(quick_skip_file): with open(quick_skip_file, 'r') as f: lines = f.readlines() for line in lines: line = line[:-1] line.strip() ret.add(line) logger.debug(f'Read {quick_skip_file} and found {len(ret)} entries.') return ret def write_skip_list(skip_list) -> None: if config.config['ml_quick_label_use_skip_lists']: quick_skip_file = config.config['ml_quick_label_skip_list_path'] with open(quick_skip_file, 'w') as f: for filename in skip_list: filename = filename.strip() if len(filename) > 0: f.write(f'{filename}\n') logger.debug(f'Updated {quick_skip_file}') def label(in_spec: InputSpec) -> None: import input_utils images = [] if in_spec.image_file_glob is not None: images += glob.glob(in_spec.image_file_glob) elif in_spec.image_file_prepopulated_list is not None: images += in_spec.image_file_prepopulated_list else: raise ValueError( 'One of image_file_glob or image_file_prepopulated_list is required' ) skip_list = read_skip_list() for image in images: if image in skip_list: logger.debug(f'Skipping {image} because of the skip list') continue features = in_spec.image_file_to_features_file(image) if features is None or not os.path.exists(features): msg = f'File {image} yielded file {features} which does not exist, SKIPPING.' logger.warning(msg) warnings.warn(msg) continue # Render features and image. filtered_lines = [] with open(features, "r") as f: lines = f.readlines() saw_label = False for line in lines: line = line[:-1] if in_spec.label not in line: filtered_lines.append(line) else: saw_label = True if not saw_label or config.config['ml_quick_label_overwrite_labels']: logger.info(features) os.system(f'xv {image} &') keystroke = input_utils.single_keystroke_response( in_spec.valid_keystrokes, prompt=in_spec.prompt, ) os.system('killall xv') label_value = in_spec.keystroke_to_label(keystroke) filtered_lines.append(f"{in_spec.label}: {label_value}\n") with open(features, 'w') as f: f.writelines("%s\n" % line for line in filtered_lines) skip_list.add(image) write_skip_list(skip_list)