// Sampling & initLlama parameter builders — extracted for max-lines compliance. import type { ModelConfig } from './types'; import type { OAIMessage } from './chatThunksHelpers'; /** Map a ModelConfig + path into the param object expected by initLlama. */ export function buildInitParams(modelPath: string, cfg: ModelConfig) { return { model: modelPath, n_ctx: cfg.n_ctx ?? 2048, n_batch: cfg.n_batch, n_ubatch: cfg.n_ubatch, n_threads: cfg.n_threads, n_gpu_layers: cfg.n_gpu_layers, flash_attn: cfg.flash_attn, cache_type_k: cfg.cache_type_k, cache_type_v: cfg.cache_type_v, use_mlock: cfg.use_mlock ?? false, use_mmap: cfg.use_mmap ?? true, rope_freq_base: cfg.rope_freq_base, rope_freq_scale: cfg.rope_freq_scale, ctx_shift: cfg.ctx_shift, kv_unified: cfg.kv_unified, n_cpu_moe: cfg.n_cpu_moe, cpu_mask: cfg.cpu_mask, n_parallel: cfg.n_parallel, }; } /** Build the full completion() params object (sampling + stop + messages). */ export function buildSamplingParams( oaiMessages: OAIMessage[], activeStops: string[], cfg: ModelConfig, fallbackMaxTokens: number, fallbackTemp: number, ): Record { return { messages: oaiMessages, n_predict: cfg.n_predict ?? cfg.max_new_tokens ?? fallbackMaxTokens, temperature: cfg.temperature ?? fallbackTemp, top_k: cfg.top_k, top_p: cfg.top_p, min_p: cfg.min_p, seed: cfg.seed, typical_p: cfg.typical_p, top_n_sigma: cfg.top_n_sigma, mirostat: cfg.mirostat, mirostat_tau: cfg.mirostat_tau, mirostat_eta: cfg.mirostat_eta, xtc_probability: cfg.xtc_probability, xtc_threshold: cfg.xtc_threshold, penalty_repeat: cfg.penalty_repeat ?? cfg.repetition_penalty, penalty_last_n: cfg.penalty_last_n, penalty_freq: cfg.penalty_freq, penalty_present: cfg.penalty_present, dry_multiplier: cfg.dry_multiplier, dry_base: cfg.dry_base, dry_allowed_length: cfg.dry_allowed_length, dry_penalty_last_n: cfg.dry_penalty_last_n, ignore_eos: cfg.ignore_eos, n_probs: cfg.n_probs, stop: activeStops, reasoning_format: 'auto', }; }