Teach python logging formatter to use %f in the format string.
[python_utils.git] / ml_quick_label.py
index 1c359828003110d4358d45bd8e5a825f631569c2..5a112db3d22e7dfc5d839306f4b78ba0648ae7fe 100644 (file)
@@ -1,13 +1,15 @@
 #!/usr/bin/env python3
 
 import glob
+import logging
 import os
-from typing import Callable, List, NamedTuple, Set
+from typing import Callable, List, NamedTuple, Optional, Set
 
 import argparse_utils
 import config
 import input_utils
 
+logger = logging.getLogger(__name__)
 parser = config.add_commandline_args(
     f"ML Quick Labeler ({__file__})",
     "Args related to quick labeling of ML training data",
@@ -17,12 +19,25 @@ parser.add_argument(
     default="./qlabel_skip_list.txt",
     metavar="FILENAME",
     type=argparse_utils.valid_filename,
-    help="Path to file in which to store already labeled data",
+    help="Path to file in which to store already labeled data.",
+)
+parser.add_argument(
+    "--ml_quick_label_use_skip_lists",
+    default=True,
+    action=argparse_utils.ActionNoYes,
+    help='Should we use a skip list file to speed up execution?',
+)
+parser.add_argument(
+    "--ml_quick_label_overwrite_labels",
+    default=False,
+    action=argparse_utils.ActionNoYes,
+    help='Enable overwriting existing labels; default is to not relabel.',
 )
 
 
 class InputSpec(NamedTuple):
-    image_file_glob: str
+    image_file_glob: Optional[str]
+    image_file_prepopulated_list: Optional[List[str]]
     image_file_to_features_file: Callable[[str], str]
     label: str
     valid_keystrokes: List[str]
@@ -32,60 +47,76 @@ class InputSpec(NamedTuple):
 
 def read_skip_list() -> Set[str]:
     ret: Set[str] = set()
-    quick_skip_file = config.config['ml_quick_label_skip_list_path']
-    if not os.path.exists(quick_skip_file):
-        return ret
-    with open(quick_skip_file, 'r') as f:
-        lines = f.readlines()
-    for line in lines:
-        line = line[:-1]
-        line.strip()
-        ret.add(line)
+    if config.config['ml_quick_label_use_skip_lists']:
+        quick_skip_file = config.config['ml_quick_label_skip_list_path']
+        if os.path.exists(quick_skip_file):
+            with open(quick_skip_file, 'r') as f:
+                lines = f.readlines()
+            for line in lines:
+                line = line[:-1]
+                line.strip()
+                ret.add(line)
+        logger.debug(f'Read {quick_skip_file} and found {len(ret)} entries.')
     return ret
 
 
 def write_skip_list(skip_list) -> None:
-    quick_skip_file = config.config['ml_quick_label_skip_list_path']
-    with open(quick_skip_file, 'w') as f:
-        for filename in skip_list:
-            filename = filename.strip()
-            if len(filename) > 0:
-                f.write(f'{filename}\n')
+    if config.config['ml_quick_label_use_skip_lists']:
+        quick_skip_file = config.config['ml_quick_label_skip_list_path']
+        with open(quick_skip_file, 'w') as f:
+            for filename in skip_list:
+                filename = filename.strip()
+                if len(filename) > 0:
+                    f.write(f'{filename}\n')
+        logger.debug(f'Updated {quick_skip_file}')
 
 
 def label(in_spec: InputSpec) -> None:
-    images = glob.glob(in_spec.image_file_glob)
+    images = []
+    if in_spec.image_file_glob is not None:
+        images += glob.glob(in_spec.image_file_glob)
+    elif in_spec.image_file_prepopulated_list is not None:
+        images += in_spec.image_file_prepopulated_list
+    else:
+        raise ValueError(
+            'One of image_file_glob or image_file_prepopulated_list is required'
+        )
 
     skip_list = read_skip_list()
     for image in images:
         if image in skip_list:
+            logger.debug(f'Skipping {image} because of the skip list')
             continue
         features = in_spec.image_file_to_features_file(image)
         if features is None or not os.path.exists(features):
+            logger.warning(
+                f'File {image} yielded file {features} which does not exist, SKIPPING.'
+            )
             continue
 
         # Render features and image.
+        filtered_lines = []
         with open(features, "r") as f:
             lines = f.readlines()
-        skip = False
+        saw_label = False
         for line in lines:
             line = line[:-1]
-            if in_spec.label in line:
-                skip = True
-        if skip:
+            if in_spec.label not in line:
+                filtered_lines.append(line)
+            else:
+                saw_label = True
+
+        if not saw_label or config.config['ml_quick_label_overwrite_labels']:
+            logger.info(features)
+            os.system(f'xv {image} &')
+            keystroke = input_utils.single_keystroke_response(
+                in_spec.valid_keystrokes,
+                prompt=in_spec.prompt,
+            )
+            os.system('killall xv')
+            label_value = in_spec.keystroke_to_label(keystroke)
+            filtered_lines.append(f"{in_spec.label}: {label_value}\n")
+            with open(features, 'w') as f:
+                f.writelines("%s\n" % line for line in filtered_lines)
             skip_list.add(image)
-            continue
-
-        os.system(f'xv {image} &')
-        keystroke = input_utils.single_keystroke_response(
-            in_spec.valid_keystrokes,
-            prompt=in_spec.prompt,
-        )
-        os.system('killall xv')
-
-        label_value = in_spec.keystroke_to_label(keystroke)
-        with open(features, "a") as f:
-            f.write(f"{in_spec.label}: {label_value}\n")
-        skip_list.add(image)
-
     write_skip_list(skip_list)