diff --git a/lucidia_llm/data/build_pretrain.py b/lucidia_llm/data/build_pretrain.py new file mode 100644 index 0000000..20fcff9 --- /dev/null +++ b/lucidia_llm/data/build_pretrain.py @@ -0,0 +1,6 @@ +"""Data builder for pretraining dataset.""" + + +def build_pretrain_dataset(input_paths, output_path): + """Construct the pretraining dataset from raw inputs (placeholder).""" + raise NotImplementedError("Pretrain dataset builder not implemented")