from ansi import bold, reset
import argparse_utils
import config
-import datetime_utils
-import decorator_utils
-import input_utils
-import list_utils
+from decorator_utils import timed
import parallelize as par
-import smart_future
-import string_utils
-import text_utils
logger = logging.getLogger(__file__)
self.spec = None
def train(self, spec: InputSpec) -> OutputSpec:
+ import smart_future
+
random.seed()
self.spec = spec
best_test_score = None
best_training_score = None
best_params = None
- for model in smart_future.wait_many(models):
+ for model in smart_future.wait_any(models):
params = modelid_to_params[model.get_id()]
if isinstance(model, smart_future.SmartFuture):
model = model._resolve()
- training_score, test_score = self.evaluate_model(
- model,
- self.X_train_scaled,
- self.y_train,
- self.X_test_scaled,
- self.y_test,
- )
- score = (training_score + test_score * 20) / 21
- if not self.spec.quiet:
- print(
- f"{bold()}{params}{reset()}: "
- f"Training set score={training_score:.2f}%, "
- f"test set score={test_score:.2f}%",
- file=sys.stderr,
+ if model is not None:
+ training_score, test_score = self.evaluate_model(
+ model,
+ self.X_train_scaled,
+ self.y_train,
+ self.X_test_scaled,
+ self.y_test,
)
- if best_score is None or score > best_score:
- best_score = score
- best_test_score = test_score
- best_training_score = training_score
- best_model = model
- best_params = params
+ score = (training_score + test_score * 20) / 21
if not self.spec.quiet:
print(
- f"New best score {best_score:.2f}% with params {params}"
+ f"{bold()}{params}{reset()}: "
+ f"Training set score={training_score:.2f}%, "
+ f"test set score={test_score:.2f}%",
+ file=sys.stderr,
)
+ if best_score is None or score > best_score:
+ best_score = score
+ best_test_score = test_score
+ best_training_score = training_score
+ best_model = model
+ best_params = params
+ if not self.spec.quiet:
+ print(
+ f"New best score {best_score:.2f}% with params {params}"
+ )
if not self.spec.quiet:
msg = f"Done training; best test set score was: {best_test_score:.1f}%"
def make_progress_graph(self) -> None:
if not self.spec.quiet:
- text_utils.progress_graph(self.file_done_count,
- self.total_file_count)
+ from text_utils import progress_graph
+ progress_graph(
+ self.file_done_count,
+ self.total_file_count
+ )
- @decorator_utils.timed
+ @timed
def read_input_files(self):
+ import list_utils
+ import smart_future
+
# All features
X = []
file_list = list(files)
results.append(self.read_files_from_list(file_list, n))
- for result in smart_future.wait_many(results, callback=self.make_progress_graph):
+ for result in smart_future.wait_any(results, callback=self.make_progress_graph):
result = result._resolve()
for z in result[0]:
X.append(z)
scaler: Any,
model: Any) -> Tuple[Optional[str], Optional[str], Optional[str]]:
if not self.spec.dry_run:
+ import datetime_utils
+ import input_utils
+ import string_utils
+
if (
(self.spec.persist_percentage_threshold is not None and
test_score > self.spec.persist_percentage_threshold)