From 2fe84a56a2f984ed6f6e7a65ff5996e1b89367b2 Mon Sep 17 00:00:00 2001 From: sa_ddam213 Date: Fri, 8 Sep 2023 15:22:55 +1200 Subject: [PATCH] Remove obsolete model parameters, more comments --- LLamaStack.Core/Config/IInferenceConfig.cs | 77 ++++++++++++++ LLamaStack.Core/Config/ISessionConfig.cs | 38 +++++++ LLamaStack.Core/Config/InferenceConfig.cs | 68 +++++++++++++ LLamaStack.Core/Config/LLamaStackConfig.cs | 19 ++++ LLamaStack.Core/Config/ModelConfig.cs | 107 +++++++++++++++++++- LLamaStack.Core/Config/SessionConfig.cs | 40 ++++++++ LLamaStack.WPF/Models/ModelConfiguration.cs | 17 +--- LLamaStack.WPF/Views/ModelEditorView.xaml | 10 -- 8 files changed, 347 insertions(+), 29 deletions(-) diff --git a/LLamaStack.Core/Config/IInferenceConfig.cs b/LLamaStack.Core/Config/IInferenceConfig.cs index 7ace379..8c27597 100644 --- a/LLamaStack.Core/Config/IInferenceConfig.cs +++ b/LLamaStack.Core/Config/IInferenceConfig.cs @@ -5,21 +5,98 @@ namespace LLamaStack.Core.Config { public interface IInferenceConfig { + /// + /// Gets or sets the penalty applied to token frequency, affecting token selection during language model inference. + /// float FrequencyPenalty { get; set; } + + /// + /// Gets or sets a list of objects that provide bias information for specific tokens in the language model's vocabulary + /// List LogitBias { get; set; } + + + /// + /// Gets or sets the maximum number of tokens to generate during inference, limiting the length of the generated text + /// int MaxTokens { get; set; } + + + /// + /// Gets or sets the type of sampling strategy to use during language model inference (e.g., greedy, top-k, top-p) + /// SamplerType SamplerType { get; set; } + + + /// + /// Gets or sets the mirostat eta used to adjust the strength of the Mirostat bias + /// float MirostatEta { get; set; } + + + /// + /// Gets or sets the temperature or sensitivity of the Mirostat sampling process + /// float MirostatTau { get; set; } + + + /// + /// Determines whether to apply penalty for generating newline characters ("\n") in the generated text. + /// bool PenalizeNL { get; set; } + + + /// + /// Gets or sets the penalty applied to token presence in the generated text + /// float PresencePenalty { get; set; } + + + /// + /// Gets or sets the number of tokens to repeat at the end of the generated text. + /// int RepeatLastTokensCount { get; set; } + + + /// + /// Gets or sets the penalty applied for repeating tokens in the generated text + /// float RepeatPenalty { get; set; } + + + /// + /// Gets or sets the temperature parameter for temperature-based sampling. Higher values make output more random, while lower values make it more deterministic + /// float Temperature { get; set; } + + + /// + /// Gets or sets the parameter (z) used in the TFS (Top Few Sampling) strategy. + /// float TfsZ { get; set; } + + + /// + /// Gets or sets the number of tokens to keep from the input text when generating output. + /// int TokensKeep { get; set; } + + + /// + /// Gets or sets the maximum number of tokens to consider during top-k sampling + /// int TopK { get; set; } + + + /// + /// Gets or sets the cumulative probability threshold for top-p sampling + /// float TopP { get; set; } + + + /// + /// Gets or sets the typicality penalty applied during language model inference. + /// float TypicalP { get; set; } } } \ No newline at end of file diff --git a/LLamaStack.Core/Config/ISessionConfig.cs b/LLamaStack.Core/Config/ISessionConfig.cs index 33ffe50..eaff24b 100644 --- a/LLamaStack.Core/Config/ISessionConfig.cs +++ b/LLamaStack.Core/Config/ISessionConfig.cs @@ -2,16 +2,54 @@ namespace LLamaStack.Core.Config { + /// + /// Interface for Session configurations + /// public interface ISessionConfig { + /// + /// Gets or sets the model name to open the session on + /// string Model { get; set; } + + /// + /// Gets or sets the type of the executor to use for inference. + /// ExecutorType ExecutorType { get; set; } + + /// + /// Gets or sets the initial prompt to start the session with. + /// string Prompt { get; set; } + + /// + /// Gets or sets the input prefix for Instruct executors. + /// string InputPrefix { get; set; } + + /// + /// Gets or sets the input suffix for Instruct executors. + /// string InputSuffix { get; set; } + + /// + /// Gets or sets one or more anti-prompt words as CSV. (Combined with AntiPrompts) + /// string AntiPrompt { get; set; } + + /// + /// Gets or sets a list of anti-prompt words. (Combined with AntiPrompt) + /// public List AntiPrompts { get; set; } + + /// + /// Gets or sets a list of words to remove from the output as CSV. (Combined with OutputFilters) + /// string OutputFilter { get; set; } + + /// + /// Gets or sets a list of words to remove from the output, (Combined with OutputFilter) + /// public List OutputFilters { get; set; } } } \ No newline at end of file diff --git a/LLamaStack.Core/Config/InferenceConfig.cs b/LLamaStack.Core/Config/InferenceConfig.cs index 0aa04bc..9258b8a 100644 --- a/LLamaStack.Core/Config/InferenceConfig.cs +++ b/LLamaStack.Core/Config/InferenceConfig.cs @@ -3,23 +3,91 @@ namespace LLamaStack.Core.Config { + /// + /// Concrete implemtation of IInferenceConfig + /// + /// public class InferenceConfig : IInferenceConfig { + /// + /// Gets or sets the number of tokens to keep from the input text when generating output. + /// public int TokensKeep { get; set; } = 0; + + + /// + /// Gets or sets the maximum number of tokens to generate during inference, limiting the length of the generated text + /// public int MaxTokens { get; set; } = -1; + + /// + /// Gets or sets the maximum number of tokens to consider during top-k sampling + /// public int TopK { get; set; } = 40; + + /// + /// Gets or sets the cumulative probability threshold for top-p sampling + /// public float TopP { get; set; } = 0.95f; + + /// + /// Gets or sets the parameter (z) used in the TFS (Top Few Sampling) strategy. + /// public float TfsZ { get; set; } = 1.0f; + + /// + /// Gets or sets the typicality penalty applied during language model inference. + /// public float TypicalP { get; set; } = 1.0f; + + /// + /// Gets or sets the temperature parameter for temperature-based sampling. Higher values make output more random, while lower values make it more deterministic + /// public float Temperature { get; set; } = 0.8f; + + /// + /// Gets or sets the penalty applied for repeating tokens in the generated text + /// public float RepeatPenalty { get; set; } = 1.1f; + + /// + /// Gets or sets the number of tokens to repeat at the end of the generated text. + /// public int RepeatLastTokensCount { get; set; } = 64; + + /// + /// Gets or sets the penalty applied to token frequency, affecting token selection during language model inference. + /// public float FrequencyPenalty { get; set; } = .0f; + + /// + /// Gets or sets the penalty applied to token presence in the generated text + /// public float PresencePenalty { get; set; } = .0f; + + /// + /// Gets or sets the temperature or sensitivity of the Mirostat sampling process + /// public float MirostatTau { get; set; } = 5.0f; + + /// + /// Gets or sets the mirostat eta used to adjust the strength of the Mirostat bias + /// public float MirostatEta { get; set; } = 0.1f; + + /// + /// Determines whether to apply penalty for generating newline characters ("\n") in the generated text. + /// public bool PenalizeNL { get; set; } = true; + + /// + /// Gets or sets the type of sampling strategy to use during language model inference (e.g., greedy, top-k, top-p) + /// public SamplerType SamplerType { get; set; } = SamplerType.Default; + + /// + /// Gets or sets a list of objects that provide bias information for specific tokens in the language model's vocabulary + /// public List LogitBias { get; set; } = new List(); } } diff --git a/LLamaStack.Core/Config/LLamaStackConfig.cs b/LLamaStack.Core/Config/LLamaStackConfig.cs index 61aeb0b..6b98d4f 100644 --- a/LLamaStack.Core/Config/LLamaStackConfig.cs +++ b/LLamaStack.Core/Config/LLamaStackConfig.cs @@ -2,12 +2,31 @@ namespace LLamaStack.Core.Config { + /// + /// LLamaStack appsettings.json config element + /// + /// public class LLamaStackConfig : IConfigSection { + /// + /// Gets or sets the ModelLoad type + /// public ModelLoadType ModelLoadType { get; set; } + + /// + /// Gets or sets the model state path. + /// public string ModelStatePath { get; set; } + + + /// + /// Gets or sets the models. + /// public List Models { get; set; } + /// + /// Perform any initialization, called directly after deserialization + /// public void Initialize() { if (string.IsNullOrEmpty(ModelStatePath)) diff --git a/LLamaStack.Core/Config/ModelConfig.cs b/LLamaStack.Core/Config/ModelConfig.cs index f63bab0..c09e6c3 100644 --- a/LLamaStack.Core/Config/ModelConfig.cs +++ b/LLamaStack.Core/Config/ModelConfig.cs @@ -1,32 +1,133 @@ -namespace LLamaStack.Core.Config + +namespace LLamaStack.Core.Config { + + /// + /// Concrete implentation of IModelConfig + /// + /// public class ModelConfig : IModelConfig { + + /// + /// Gets or sets the maximum context instances. + /// public int MaxInstances { get; set; } = -1; + + /// + /// Gets or sets the name. + /// public string Name { get; set; } = "unknown"; + + /// + /// Model context size (n_ctx) + /// public int ContextSize { get; set; } = 512; + + /// + /// the GPU that is used for scratch and small tensors + /// public int MainGpu { get; set; } = 0; + + /// + /// if true, reduce VRAM usage at the cost of performance + /// public bool LowVram { get; set; } = false; + + /// + /// Number of layers to run in VRAM / GPU memory (n_gpu_layers) + /// public int GpuLayerCount { get; set; } = 20; + + /// + /// Seed for the random number generator (seed) + /// public int Seed { get; set; } = 1686349486; + + /// + /// Use f16 instead of f32 for memory kv (memory_f16) + /// public bool UseFp16Memory { get; set; } = true; + + /// + /// Use mmap for faster loads (use_mmap) + /// public bool UseMemorymap { get; set; } = true; + + /// + /// Use mlock to keep model in memory (use_mlock) + /// public bool UseMemoryLock { get; set; } = false; + + /// + /// Compute perplexity over the prompt (perplexity) + /// public bool Perplexity { get; set; } = false; + + /// + /// Model path (model) + /// public string ModelPath { get; set; } + + /// + /// lora adapter path (lora_adapter) + /// public string LoraAdapter { get; set; } = string.Empty; + + /// + /// base model path for the lora adapter (lora_base) + /// public string LoraBase { get; set; } = string.Empty; + + /// + /// Number of threads (-1 = autodetect) (n_threads) + /// public int Threads { get; set; } = Math.Max(Environment.ProcessorCount / 2, 1); + + /// + /// batch size for prompt processing (must be >=32 to use BLAS) (n_batch) + /// public int BatchSize { get; set; } = 512; + + /// + /// Whether to convert eos to newline during the inference. + /// public bool ConvertEosToNewLine { get; set; } = false; + + /// + /// Whether to use embedding mode. (embedding) Note that if this is set to true, + /// The LLamaModel won't produce text response anymore. + /// public bool EmbeddingMode { get; set; } = false; + + /// + /// how split tensors should be distributed across GPUs + /// public float[] TensorSplits { get; set; } = new float[] { 0 }; - public int GroupedQueryAttention { get; set; } = 1; - public float RmsNormEpsilon { get; set; } = 5e-6f; + + /// + /// RoPE base frequency + /// public float RopeFrequencyBase { get; set; } = 10000.0f; + + /// + /// RoPE frequency scaling factor + /// public float RopeFrequencyScale { get; set; } = 1.0f; + + /// + /// model alias + /// public string ModelAlias { get; set; } + + /// + /// Use experimental mul_mat_q kernels + /// public bool MulMatQ { get; set; } + + /// + /// The encoding to use for models + /// public string Encoding { get; set; } = "UTF-8"; } } diff --git a/LLamaStack.Core/Config/SessionConfig.cs b/LLamaStack.Core/Config/SessionConfig.cs index fe5ca7e..d45304e 100644 --- a/LLamaStack.Core/Config/SessionConfig.cs +++ b/LLamaStack.Core/Config/SessionConfig.cs @@ -2,16 +2,56 @@ namespace LLamaStack.Core.Config { + /// + /// Concrete implemetation of ISessionConfig + /// + /// public class SessionConfig : ISessionConfig { + + /// + /// Gets or sets the model name to open the session on + /// public string Model { get; set; } + + /// + /// Gets or sets the type of the executor to use for inference. + /// public ExecutorType ExecutorType { get; set; } = ExecutorType.Instruct; + + /// + /// Gets or sets the initial prompt to start the session with. + /// public string Prompt { get; set; } + + /// + /// Gets or sets the input prefix for Instruct executors. + /// public string InputPrefix { get; set; } = "\n\n### Instruction:\n\n"; + + /// + /// Gets or sets the input suffix for Instruct executors. + /// public string InputSuffix { get; set; } = "\n\n### Response:\n\n"; + + /// + /// Gets or sets one or more anti-prompt words as CSV. (Combined with AntiPrompts) + /// public string AntiPrompt { get; set; } = string.Empty; + + /// + /// Gets or sets a list of anti-prompt words. (Combined with AntiPrompt) + /// public List AntiPrompts { get; set; } = new List(); + + /// + /// Gets or sets a list of words to remove from the output as CSV. (Combined with OutputFilters) + /// public string OutputFilter { get; set; } = string.Empty; + + /// + /// Gets or sets a list of words to remove from the output, (Combined with OutputFilter) + /// public List OutputFilters { get; set; } = new List(); } } diff --git a/LLamaStack.WPF/Models/ModelConfiguration.cs b/LLamaStack.WPF/Models/ModelConfiguration.cs index fe0ef6c..63a2b5a 100644 --- a/LLamaStack.WPF/Models/ModelConfiguration.cs +++ b/LLamaStack.WPF/Models/ModelConfiguration.cs @@ -12,8 +12,6 @@ public class ModelConfiguration : IModelConfig, INotifyPropertyChanged private bool _mulMatQ; private float _ropeFrequencyScale; private float _ropeFrequencyBase; - private float _rmsNormEpsilon; - private int _groupedQueryAttention; private float[] _tensorSplits; private bool _embeddingMode; private bool _convertEosToNewLine; @@ -135,16 +133,7 @@ public float[] TensorSplits get { return _tensorSplits; } set { _tensorSplits = value; NotifyPropertyChanged(); } } - public int GroupedQueryAttention - { - get { return _groupedQueryAttention; } - set { _groupedQueryAttention = value; NotifyPropertyChanged(); } - } - public float RmsNormEpsilon - { - get { return _rmsNormEpsilon; } - set { _rmsNormEpsilon = value; NotifyPropertyChanged(); } - } + public float RopeFrequencyBase { get { return _ropeFrequencyBase; } @@ -176,7 +165,6 @@ public static ModelConfiguration From(ModelConfig config) EmbeddingMode = config.EmbeddingMode, Encoding = config.Encoding, GpuLayerCount = config.GpuLayerCount, - GroupedQueryAttention = config.GroupedQueryAttention, LoraAdapter = config.LoraAdapter, LoraBase = config.LoraBase, LowVram = config.LowVram, @@ -187,7 +175,6 @@ public static ModelConfiguration From(ModelConfig config) MulMatQ = config.MulMatQ, Name = config.Name, Perplexity = config.Perplexity, - RmsNormEpsilon = config.RmsNormEpsilon, RopeFrequencyBase = config.RopeFrequencyBase, RopeFrequencyScale = config.RopeFrequencyScale, Seed = config.Seed, @@ -209,7 +196,6 @@ public static ModelConfig To(ModelConfiguration config) EmbeddingMode = config.EmbeddingMode, Encoding = config.Encoding, GpuLayerCount = config.GpuLayerCount, - GroupedQueryAttention = config.GroupedQueryAttention, LoraAdapter = config.LoraAdapter, LoraBase = config.LoraBase, LowVram = config.LowVram, @@ -220,7 +206,6 @@ public static ModelConfig To(ModelConfiguration config) MulMatQ = config.MulMatQ, Name = config.Name, Perplexity = config.Perplexity, - RmsNormEpsilon = config.RmsNormEpsilon, RopeFrequencyBase = config.RopeFrequencyBase, RopeFrequencyScale = config.RopeFrequencyScale, Seed = config.Seed, diff --git a/LLamaStack.WPF/Views/ModelEditorView.xaml b/LLamaStack.WPF/Views/ModelEditorView.xaml index 976791f..5ef6e5d 100644 --- a/LLamaStack.WPF/Views/ModelEditorView.xaml +++ b/LLamaStack.WPF/Views/ModelEditorView.xaml @@ -99,16 +99,6 @@ - - - - - - - - - -