def compile(
self,
student: Program,
trainset: List[Example],
strategy: str = "p -> w -> p",
valset_ratio = 0.1,
) -> Program:
# TODO: We could record acc on a different valset to pick the best
# strategy within the provided strategy
logger.info("[BetterTogether] Validating the strategy")
parsed_strategy = strategy.lower().split(self.STRAT_SEP)
if not all([s in ["p", "w"] for s in parsed_strategy]):
raise ValueError(
f"The strategy should be a sequence of 'p' and 'w' separated by '{self.STRAT_SEP}', but "
f"found: {strategy}"
)
logger.info("[BetterTogether] Preparing the student program...")
# TODO: Prepare student returns student.reset_copy(), which is what gets
# optimized. We should make this clear in the doc comments.
student = prepare_student(student)
set_missing_predictor_lms(student)
# Make a shallow copy of the trainset, so that we don't change the order
# of the examples in the original trainset
trainset = trainset[:]
logger.info("[BetterTogether] Compiling the student program...")
student = self._run_strategies(parsed_strategy, student, trainset, valset_ratio)
logger.info("[BetterTogether] BetterTogether has finished compiling the student program")
return student