- cmd = f'{RSYNC} {bundle.code_file} {username}@{machine}:{bundle.code_file}'
- logger.info(f"Copying work to {worker} via {cmd}")
- run_silently(cmd)
+ try:
+ cmd = f'{RSYNC} {bundle.code_file} {username}@{machine}:{bundle.code_file}'
+ start_ts = time.time()
+ logger.info(f"{bundle}: Copying work to {worker} via {cmd}.")
+ run_silently(cmd)
+ xfer_latency = time.time() - start_ts
+ logger.info(f"{bundle}: Copying done to {worker} in {xfer_latency:.1f}s.")
+ except Exception as e:
+ logger.exception(e)
+ logger.error(
+ f'{bundle}: failed to send instructions to worker machine?!?'
+ )
+ assert bundle.worker is not None
+ self.status.record_release_worker(
+ bundle.worker,
+ bundle.uuid,
+ True,
+ )
+ self.release_worker(bundle.worker)
+ self.adjust_task_count(-1)
+ if is_original:
+ # Weird. We tried to copy the code to the worker and it failed...
+ # And we're the original bundle. We have to retry.
+ return self.emergency_retry_nasty_bundle(bundle)
+ else:
+ # This is actually expected; we're a backup.
+ # There's a race condition where someone else
+ # already finished the work and removed the source
+ # code file before we could copy it. No biggie.
+ return None