From 91fb34569ed17b143812b70526ac6ca013eccb1a Mon Sep 17 00:00:00 2001 From: blackboxprogramming <118287761+blackboxprogramming@users.noreply.github.com> Date: Fri, 8 Aug 2025 01:14:25 -0700 Subject: [PATCH] Add tokenizer/spm.yaml with SentencePiece configuration --- lucidia_llm/tokenizer/spm.yaml | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 lucidia_llm/tokenizer/spm.yaml diff --git a/lucidia_llm/tokenizer/spm.yaml b/lucidia_llm/tokenizer/spm.yaml new file mode 100644 index 0000000..72b560f --- /dev/null +++ b/lucidia_llm/tokenizer/spm.yaml @@ -0,0 +1,3 @@ +# SentencePiece model configuration for Lucidia +model_type: bpe +vocab_size: 50000