A bunch of changes...
[python_utils.git] / ml / model_trainer.py
index ab3059f855388d06b8077a359897bb07ef5b2bc9..acd721868a2a9e04de0da364b8d37dcc268b4fee 100644 (file)
@@ -218,7 +218,7 @@ class TrainingBlueprint(ABC):
                 line = line.strip()
                 try:
                     (key, value) = line.split(self.spec.key_value_delimiter)
-                except Exception as e:
+                except Exception:
                     logger.debug(f"WARNING: bad line in file {filename} '{line}', skipped")
                     continue
 
@@ -246,12 +246,12 @@ class TrainingBlueprint(ABC):
                     y.pop()
 
                 if self.spec.delete_bad_inputs:
-                    msg = f"WARNING: {filename}: missing features or label.  DELETING."
+                    msg = f"WARNING: {filename}: missing features or label; expected {self.spec.feature_count} but saw {len(x)}.  DELETING."
                     print(msg, file=sys.stderr)
                     logger.warning(msg)
                     os.remove(filename)
                 else:
-                    msg = f"WARNING: {filename}: missing features or label.  Skipped."
+                    msg = f"WARNING: {filename}: missing features or label; expected {self.spec.feature_count} but saw {len(x)}.  Skipping."
                     print(msg, file=sys.stderr)
                     logger.warning(msg)
         return (X, y)
@@ -356,6 +356,13 @@ class TrainingBlueprint(ABC):
             import input_utils
             import string_utils
 
+            now: datetime.datetime = datetime_utils.now_pacific()
+            info = f"""Timestamp: {datetime_utils.datetime_to_string(now)}
+Model params: {params}
+Training examples: {num_examples}
+Training set score: {training_score:.2f}%
+Testing set score: {test_score:.2f}%"""
+            print(f'\n{info}\n')
             if (
                     (self.spec.persist_percentage_threshold is not None and
                      test_score > self.spec.persist_percentage_threshold)
@@ -376,12 +383,6 @@ class TrainingBlueprint(ABC):
                 print(msg)
                 logger.info(msg)
                 model_info_filename = f"{self.spec.basename}_model_info.txt"
-                now: datetime.datetime = datetime_utils.now_pst()
-                info = f"""Timestamp: {datetime_utils.datetime_to_string(now)}
-Model params: {params}
-Training examples: {num_examples}
-Training set score: {training_score:.2f}%
-Testing set score: {test_score:.2f}%"""
                 with open(model_info_filename, "w") as f:
                     f.write(info)
                 msg = f"Wrote {model_info_filename}:"