66 lines
2.4 KiB
TypeScript
66 lines
2.4 KiB
TypeScript
// Sampling & initLlama parameter builders — extracted for max-lines compliance.
|
|
import type { ModelConfig } from './types';
|
|
import type { OAIMessage } from './chatThunksHelpers';
|
|
|
|
/** Map a ModelConfig + path into the param object expected by initLlama. */
|
|
export function buildInitParams(modelPath: string, cfg: ModelConfig) {
|
|
return {
|
|
model: modelPath,
|
|
n_ctx: cfg.n_ctx ?? 2048,
|
|
n_batch: cfg.n_batch,
|
|
n_ubatch: cfg.n_ubatch,
|
|
n_threads: cfg.n_threads,
|
|
n_gpu_layers: cfg.n_gpu_layers,
|
|
flash_attn: cfg.flash_attn,
|
|
cache_type_k: cfg.cache_type_k,
|
|
cache_type_v: cfg.cache_type_v,
|
|
use_mlock: cfg.use_mlock ?? false,
|
|
use_mmap: cfg.use_mmap ?? true,
|
|
rope_freq_base: cfg.rope_freq_base,
|
|
rope_freq_scale: cfg.rope_freq_scale,
|
|
ctx_shift: cfg.ctx_shift,
|
|
kv_unified: cfg.kv_unified,
|
|
n_cpu_moe: cfg.n_cpu_moe,
|
|
cpu_mask: cfg.cpu_mask,
|
|
n_parallel: cfg.n_parallel,
|
|
};
|
|
}
|
|
|
|
/** Build the full completion() params object (sampling + stop + messages). */
|
|
export function buildSamplingParams(
|
|
oaiMessages: OAIMessage[],
|
|
activeStops: string[],
|
|
cfg: ModelConfig,
|
|
fallbackMaxTokens: number,
|
|
fallbackTemp: number,
|
|
): Record<string, unknown> {
|
|
return {
|
|
messages: oaiMessages,
|
|
n_predict: cfg.n_predict ?? cfg.max_new_tokens ?? fallbackMaxTokens,
|
|
temperature: cfg.temperature ?? fallbackTemp,
|
|
top_k: cfg.top_k,
|
|
top_p: cfg.top_p,
|
|
min_p: cfg.min_p,
|
|
seed: cfg.seed,
|
|
typical_p: cfg.typical_p,
|
|
top_n_sigma: cfg.top_n_sigma,
|
|
mirostat: cfg.mirostat,
|
|
mirostat_tau: cfg.mirostat_tau,
|
|
mirostat_eta: cfg.mirostat_eta,
|
|
xtc_probability: cfg.xtc_probability,
|
|
xtc_threshold: cfg.xtc_threshold,
|
|
penalty_repeat: cfg.penalty_repeat ?? cfg.repetition_penalty,
|
|
penalty_last_n: cfg.penalty_last_n,
|
|
penalty_freq: cfg.penalty_freq,
|
|
penalty_present: cfg.penalty_present,
|
|
dry_multiplier: cfg.dry_multiplier,
|
|
dry_base: cfg.dry_base,
|
|
dry_allowed_length: cfg.dry_allowed_length,
|
|
dry_penalty_last_n: cfg.dry_penalty_last_n,
|
|
ignore_eos: cfg.ignore_eos,
|
|
n_probs: cfg.n_probs,
|
|
stop: activeStops,
|
|
reasoning_format: 'auto',
|
|
};
|
|
}
|