From 2fe84a56a2f984ed6f6e7a65ff5996e1b89367b2 Mon Sep 17 00:00:00 2001
From: sa_ddam213 <sa_ddam213@live.com>
Date: Fri, 8 Sep 2023 15:22:55 +1200
Subject: [PATCH] Remove obsolete model parameters, more comments

---
 LLamaStack.Core/Config/IInferenceConfig.cs  |  77 ++++++++++++++
 LLamaStack.Core/Config/ISessionConfig.cs    |  38 +++++++
 LLamaStack.Core/Config/InferenceConfig.cs   |  68 +++++++++++++
 LLamaStack.Core/Config/LLamaStackConfig.cs  |  19 ++++
 LLamaStack.Core/Config/ModelConfig.cs       | 107 +++++++++++++++++++-
 LLamaStack.Core/Config/SessionConfig.cs     |  40 ++++++++
 LLamaStack.WPF/Models/ModelConfiguration.cs |  17 +---
 LLamaStack.WPF/Views/ModelEditorView.xaml   |  10 --
 8 files changed, 347 insertions(+), 29 deletions(-)
diff --git a/LLamaStack.Core/Config/IInferenceConfig.cs b/LLamaStack.Core/Config/IInferenceConfig.cs
index 7ace379..8c27597 100644
--- a/LLamaStack.Core/Config/IInferenceConfig.cs
+++ b/LLamaStack.Core/Config/IInferenceConfig.cs
@@ -5,21 +5,98 @@ namespace LLamaStack.Core.Config
 {
     public interface IInferenceConfig
     {
+        /// <summary>
+        /// Gets or sets the penalty applied to token frequency, affecting token selection during language model inference.
+        /// </summary>
         float FrequencyPenalty { get; set; }
+
+        /// <summary>
+        /// Gets or sets a list of <see cref="LLamaStack.Core.Models.LogitBiasModel"/> objects that provide bias information for specific tokens in the language model's vocabulary
+        /// </summary>
         List<LogitBiasModel> LogitBias { get; set; }
+
+
+        /// <summary>
+        /// Gets or sets the maximum number of tokens to generate during inference, limiting the length of the generated text
+        /// </summary>
         int MaxTokens { get; set; }
+
+
+        /// <summary>
+        /// Gets or sets the type of sampling strategy to use during language model inference (e.g., greedy, top-k, top-p)
+        /// </summary>
         SamplerType SamplerType { get; set; }
+
+
+        /// <summary>
+        /// Gets or sets the mirostat eta used to adjust the strength of the Mirostat bias
+        /// </summary>
         float MirostatEta { get; set; }
+
+
+        /// <summary>
+        /// Gets or sets the temperature or sensitivity of the Mirostat sampling process
+        /// </summary>
         float MirostatTau { get; set; }
+
+
+        /// <summary>
+        /// Determines whether to apply penalty for generating newline characters ("\n") in the generated text.
+        /// </summary>
         bool PenalizeNL { get; set; }
+
+
+        /// <summary>
+        /// Gets or sets the penalty applied to token presence in the generated text
+        /// </summary>
         float PresencePenalty { get; set; }
+
+
+        /// <summary>
+        /// Gets or sets the number of tokens to repeat at the end of the generated text.
+        /// </summary>
         int RepeatLastTokensCount { get; set; }
+
+
+        /// <summary>
+        /// Gets or sets the penalty applied for repeating tokens in the generated text
+        /// </summary>
         float RepeatPenalty { get; set; }
+
+
+        /// <summary>
+        /// Gets or sets the temperature parameter for temperature-based sampling. Higher values make output more random, while lower values make it more deterministic
+        /// </summary>
         float Temperature { get; set; }
+
+
+        /// <summary>
+        /// Gets or sets the parameter (z) used in the TFS (Top Few Sampling) strategy.
+        /// </summary>
         float TfsZ { get; set; }
+
+
+        /// <summary>
+        /// Gets or sets the number of tokens to keep from the input text when generating output.
+        /// </summary>
         int TokensKeep { get; set; }
+
+
+        /// <summary>
+        /// Gets or sets the maximum number of tokens to consider during top-k sampling
+        /// </summary>
         int TopK { get; set; }
+
+
+        /// <summary>
+        /// Gets or sets the cumulative probability threshold for top-p sampling
+        /// </summary>
         float TopP { get; set; }
+
+
+        /// <summary>
+        /// Gets or sets the typicality penalty applied during language model inference.
+        /// </summary>
         float TypicalP { get; set; }
     }
 }
\ No newline at end of file
diff --git a/LLamaStack.Core/Config/ISessionConfig.cs b/LLamaStack.Core/Config/ISessionConfig.cs
index 33ffe50..eaff24b 100644
--- a/LLamaStack.Core/Config/ISessionConfig.cs
+++ b/LLamaStack.Core/Config/ISessionConfig.cs
@@ -2,16 +2,54 @@
 
 namespace LLamaStack.Core.Config
 {
+    /// <summary>
+    /// Interface for Session configurations
+    /// </summary>
     public interface ISessionConfig
     {
+        /// <summary>
+        /// Gets or sets the model name to open the session on
+        /// </summary>
         string Model { get; set; }
+
+        /// <summary>
+        /// Gets or sets the type of the executor to use for inference.
+        /// </summary>
         ExecutorType ExecutorType { get; set; }
+
+        /// <summary>
+        /// Gets or sets the initial prompt to start the session with.
+        /// </summary>
         string Prompt { get; set; }
+
+        /// <summary>
+        /// Gets or sets the input prefix for Instruct executors.
+        /// </summary>
         string InputPrefix { get; set; }
+
+        /// <summary>
+        /// Gets or sets the input suffix for Instruct executors.
+        /// </summary>
         string InputSuffix { get; set; }
+
+        /// <summary>
+        /// Gets or sets one or more anti-prompt words as CSV. (Combined with AntiPrompts)
+        /// </summary>
         string AntiPrompt { get; set; }
+
+        /// <summary>
+        /// Gets or sets a list of anti-prompt words. (Combined with AntiPrompt)
+        /// </summary>
         public List<string> AntiPrompts { get; set; }
+
+        /// <summary>
+        /// Gets or sets a list of words to remove from the output as CSV. (Combined with OutputFilters)
+        /// </summary>
         string OutputFilter { get; set; }
+
+        /// <summary>
+        /// Gets or sets a list of words to remove from the output, (Combined with OutputFilter)
+        /// </summary>
         public List<string> OutputFilters { get; set; }
     }
 }
\ No newline at end of file
diff --git a/LLamaStack.Core/Config/InferenceConfig.cs b/LLamaStack.Core/Config/InferenceConfig.cs
index 0aa04bc..9258b8a 100644
--- a/LLamaStack.Core/Config/InferenceConfig.cs
+++ b/LLamaStack.Core/Config/InferenceConfig.cs
@@ -3,23 +3,91 @@
 
 namespace LLamaStack.Core.Config
 {
+    /// <summary>
+    /// Concrete implemtation of IInferenceConfig
+    /// </summary>
+    /// <seealso cref="LLamaStack.Core.Config.IInferenceConfig" />
     public class InferenceConfig : IInferenceConfig
     {
+        /// <summary>
+        /// Gets or sets the number of tokens to keep from the input text when generating output.
+        /// </summary>
         public int TokensKeep { get; set; } = 0;
+
+
+        /// <summary>
+        /// Gets or sets the maximum number of tokens to generate during inference, limiting the length of the generated text
+        /// </summary>
         public int MaxTokens { get; set; } = -1;
+
+        /// <summary>
+        /// Gets or sets the maximum number of tokens to consider during top-k sampling
+        /// </summary>
         public int TopK { get; set; } = 40;
+
+        /// <summary>
+        /// Gets or sets the cumulative probability threshold for top-p sampling
+        /// </summary>
         public float TopP { get; set; } = 0.95f;
+
+        /// <summary>
+        /// Gets or sets the parameter (z) used in the TFS (Top Few Sampling) strategy.
+        /// </summary>
         public float TfsZ { get; set; } = 1.0f;
+
+        /// <summary>
+        /// Gets or sets the typicality penalty applied during language model inference.
+        /// </summary>
         public float TypicalP { get; set; } = 1.0f;
+
+        /// <summary>
+        /// Gets or sets the temperature parameter for temperature-based sampling. Higher values make output more random, while lower values make it more deterministic
+        /// </summary>
         public float Temperature { get; set; } = 0.8f;
+
+        /// <summary>
+        /// Gets or sets the penalty applied for repeating tokens in the generated text
+        /// </summary>
         public float RepeatPenalty { get; set; } = 1.1f;
+
+        /// <summary>
+        /// Gets or sets the number of tokens to repeat at the end of the generated text.
+        /// </summary>
         public int RepeatLastTokensCount { get; set; } = 64;
+
+        /// <summary>
+        /// Gets or sets the penalty applied to token frequency, affecting token selection during language model inference.
+        /// </summary>
         public float FrequencyPenalty { get; set; } = .0f;
+
+        /// <summary>
+        /// Gets or sets the penalty applied to token presence in the generated text
+        /// </summary>
         public float PresencePenalty { get; set; } = .0f;
+
+        /// <summary>
+        /// Gets or sets the temperature or sensitivity of the Mirostat sampling process
+        /// </summary>
         public float MirostatTau { get; set; } = 5.0f;
+
+        /// <summary>
+        /// Gets or sets the mirostat eta used to adjust the strength of the Mirostat bias
+        /// </summary>
         public float MirostatEta { get; set; } = 0.1f;
+
+        /// <summary>
+        /// Determines whether to apply penalty for generating newline characters ("\n") in the generated text.
+        /// </summary>
         public bool PenalizeNL { get; set; } = true;
+
+        /// <summary>
+        /// Gets or sets the type of sampling strategy to use during language model inference (e.g., greedy, top-k, top-p)
+        /// </summary>
         public SamplerType SamplerType { get; set; } = SamplerType.Default;
+
+        /// <summary>
+        /// Gets or sets a list of <see cref="LLamaStack.Core.Models.LogitBiasModel" /> objects that provide bias information for specific tokens in the language model's vocabulary
+        /// </summary>
         public List<LogitBiasModel> LogitBias { get; set; } = new List<LogitBiasModel>();
     }
 }
diff --git a/LLamaStack.Core/Config/LLamaStackConfig.cs b/LLamaStack.Core/Config/LLamaStackConfig.cs
index 61aeb0b..6b98d4f 100644
--- a/LLamaStack.Core/Config/LLamaStackConfig.cs
+++ b/LLamaStack.Core/Config/LLamaStackConfig.cs
@@ -2,12 +2,31 @@
 
 namespace LLamaStack.Core.Config
 {
+    /// <summary>
+    /// LLamaStack appsettings.json config element
+    /// </summary>
+    /// <seealso cref="LLamaStack.Core.Config.IConfigSection" />
     public class LLamaStackConfig : IConfigSection
     {
+        /// <summary>
+        /// Gets or sets the ModelLoad type
+        /// </summary>
         public ModelLoadType ModelLoadType { get; set; }
+
+        /// <summary>
+        /// Gets or sets the model state path.
+        /// </summary>
         public string ModelStatePath { get; set; }
+
+
+        /// <summary>
+        /// Gets or sets the models.
+        /// </summary>
         public List<ModelConfig> Models { get; set; }
 
+        /// <summary>
+        /// Perform any initialization, called directly after deserialization
+        /// </summary>
         public void Initialize()
         {
             if (string.IsNullOrEmpty(ModelStatePath))
diff --git a/LLamaStack.Core/Config/ModelConfig.cs b/LLamaStack.Core/Config/ModelConfig.cs
index f63bab0..c09e6c3 100644
--- a/LLamaStack.Core/Config/ModelConfig.cs
+++ b/LLamaStack.Core/Config/ModelConfig.cs
@@ -1,32 +1,133 @@
-﻿namespace LLamaStack.Core.Config
+﻿
+namespace LLamaStack.Core.Config
 {
+
+    /// <summary>
+    /// Concrete implentation of IModelConfig
+    /// </summary>
+    /// <seealso cref="LLamaStack.Core.Config.IModelConfig" />
     public class ModelConfig : IModelConfig
     {
+
+        /// <summary>
+        /// Gets or sets the maximum context instances.
+        /// </summary>
         public int MaxInstances { get; set; } = -1;
+
+        /// <summary>
+        /// Gets or sets the name.
+        /// </summary>
         public string Name { get; set; } = "unknown";
+
+        /// <summary>
+        /// Model context size (n_ctx)
+        /// </summary>
         public int ContextSize { get; set; } = 512;
+
+        /// <summary>
+        /// the GPU that is used for scratch and small tensors
+        /// </summary>
         public int MainGpu { get; set; } = 0;
+
+        /// <summary>
+        /// if true, reduce VRAM usage at the cost of performance
+        /// </summary>
         public bool LowVram { get; set; } = false;
+
+        /// <summary>
+        /// Number of layers to run in VRAM / GPU memory (n_gpu_layers)
+        /// </summary>
         public int GpuLayerCount { get; set; } = 20;
+
+        /// <summary>
+        /// Seed for the random number generator (seed)
+        /// </summary>
         public int Seed { get; set; } = 1686349486;
+
+        /// <summary>
+        /// Use f16 instead of f32 for memory kv (memory_f16)
+        /// </summary>
         public bool UseFp16Memory { get; set; } = true;
+
+        /// <summary>
+        /// Use mmap for faster loads (use_mmap)
+        /// </summary>
         public bool UseMemorymap { get; set; } = true;
+
+        /// <summary>
+        /// Use mlock to keep model in memory (use_mlock)
+        /// </summary>
         public bool UseMemoryLock { get; set; } = false;
+
+        /// <summary>
+        /// Compute perplexity over the prompt (perplexity)
+        /// </summary>
         public bool Perplexity { get; set; } = false;
+
+        /// <summary>
+        /// Model path (model)
+        /// </summary>
         public string ModelPath { get; set; }
+
+        /// <summary>
+        /// lora adapter path (lora_adapter)
+        /// </summary>
         public string LoraAdapter { get; set; } = string.Empty;
+
+        /// <summary>
+        /// base model path for the lora adapter (lora_base)
+        /// </summary>
         public string LoraBase { get; set; } = string.Empty;
+
+        /// <summary>
+        /// Number of threads (-1 = autodetect) (n_threads)
+        /// </summary>
         public int Threads { get; set; } = Math.Max(Environment.ProcessorCount / 2, 1);
+
+        /// <summary>
+        /// batch size for prompt processing (must be &gt;=32 to use BLAS) (n_batch)
+        /// </summary>
         public int BatchSize { get; set; } = 512;
+
+        /// <summary>
+        /// Whether to convert eos to newline during the inference.
+        /// </summary>
         public bool ConvertEosToNewLine { get; set; } = false;
+
+        /// <summary>
+        /// Whether to use embedding mode. (embedding) Note that if this is set to true,
+        /// The LLamaModel won't produce text response anymore.
+        /// </summary>
         public bool EmbeddingMode { get; set; } = false;
+
+        /// <summary>
+        /// how split tensors should be distributed across GPUs
+        /// </summary>
         public float[] TensorSplits { get; set; } = new float[] { 0 };
-        public int GroupedQueryAttention { get; set; } = 1;
-        public float RmsNormEpsilon { get; set; } = 5e-6f;
+
+        /// <summary>
+        /// RoPE base frequency
+        /// </summary>
         public float RopeFrequencyBase { get; set; } = 10000.0f;
+
+        /// <summary>
+        /// RoPE frequency scaling factor
+        /// </summary>
         public float RopeFrequencyScale { get; set; } = 1.0f;
+
+        /// <summary>
+        /// model alias
+        /// </summary>
         public string ModelAlias { get; set; }
+
+        /// <summary>
+        /// Use experimental mul_mat_q kernels
+        /// </summary>
         public bool MulMatQ { get; set; }
+
+        /// <summary>
+        /// The encoding to use for models
+        /// </summary>
         public string Encoding { get; set; } = "UTF-8";
     }
 }
diff --git a/LLamaStack.Core/Config/SessionConfig.cs b/LLamaStack.Core/Config/SessionConfig.cs
index fe5ca7e..d45304e 100644
--- a/LLamaStack.Core/Config/SessionConfig.cs
+++ b/LLamaStack.Core/Config/SessionConfig.cs
@@ -2,16 +2,56 @@
 
 namespace LLamaStack.Core.Config
 {
+    /// <summary>
+    /// Concrete implemetation of ISessionConfig
+    /// </summary>
+    /// <seealso cref="LLamaStack.Core.Config.ISessionConfig" />
     public class SessionConfig : ISessionConfig
     {
+
+        /// <summary>
+        /// Gets or sets the model name to open the session on
+        /// </summary>
         public string Model { get; set; }
+
+        /// <summary>
+        /// Gets or sets the type of the executor to use for inference.
+        /// </summary>
         public ExecutorType ExecutorType { get; set; } = ExecutorType.Instruct;
+
+        /// <summary>
+        /// Gets or sets the initial prompt to start the session with.
+        /// </summary>
         public string Prompt { get; set; }
+
+        /// <summary>
+        /// Gets or sets the input prefix for Instruct executors.
+        /// </summary>
         public string InputPrefix { get; set; } = "\n\n### Instruction:\n\n";
+
+        /// <summary>
+        /// Gets or sets the input suffix for Instruct executors.
+        /// </summary>
         public string InputSuffix { get; set; } = "\n\n### Response:\n\n";
+
+        /// <summary>
+        /// Gets or sets one or more anti-prompt words as CSV. (Combined with AntiPrompts)
+        /// </summary>
         public string AntiPrompt { get; set; } = string.Empty;
+
+        /// <summary>
+        /// Gets or sets a list of anti-prompt words. (Combined with AntiPrompt)
+        /// </summary>
         public List<string> AntiPrompts { get; set; } = new List<string>();
+
+        /// <summary>
+        /// Gets or sets a list of words to remove from the output as CSV. (Combined with OutputFilters)
+        /// </summary>
         public string OutputFilter { get; set; } = string.Empty;
+
+        /// <summary>
+        /// Gets or sets a list of words to remove from the output, (Combined with OutputFilter)
+        /// </summary>
         public List<string> OutputFilters { get; set; } = new List<string>();
     }
 }
diff --git a/LLamaStack.WPF/Models/ModelConfiguration.cs b/LLamaStack.WPF/Models/ModelConfiguration.cs
index fe0ef6c..63a2b5a 100644
--- a/LLamaStack.WPF/Models/ModelConfiguration.cs
+++ b/LLamaStack.WPF/Models/ModelConfiguration.cs
@@ -12,8 +12,6 @@ public class ModelConfiguration : IModelConfig, INotifyPropertyChanged
         private bool _mulMatQ;
         private float _ropeFrequencyScale;
         private float _ropeFrequencyBase;
-        private float _rmsNormEpsilon;
-        private int _groupedQueryAttention;
         private float[] _tensorSplits;
         private bool _embeddingMode;
         private bool _convertEosToNewLine;
@@ -135,16 +133,7 @@ public float[] TensorSplits
             get { return _tensorSplits; }
             set { _tensorSplits = value; NotifyPropertyChanged(); }
         }
-        public int GroupedQueryAttention
-        {
-            get { return _groupedQueryAttention; }
-            set { _groupedQueryAttention = value; NotifyPropertyChanged(); }
-        }
-        public float RmsNormEpsilon
-        {
-            get { return _rmsNormEpsilon; }
-            set { _rmsNormEpsilon = value; NotifyPropertyChanged(); }
-        }
+
         public float RopeFrequencyBase
         {
             get { return _ropeFrequencyBase; }
@@ -176,7 +165,6 @@ public static ModelConfiguration From(ModelConfig config)
                 EmbeddingMode = config.EmbeddingMode,
                 Encoding = config.Encoding,
                 GpuLayerCount = config.GpuLayerCount,
-                GroupedQueryAttention = config.GroupedQueryAttention,
                 LoraAdapter = config.LoraAdapter,
                 LoraBase = config.LoraBase,
                 LowVram = config.LowVram,
@@ -187,7 +175,6 @@ public static ModelConfiguration From(ModelConfig config)
                 MulMatQ = config.MulMatQ,
                 Name = config.Name,
                 Perplexity = config.Perplexity,
-                RmsNormEpsilon = config.RmsNormEpsilon,
                 RopeFrequencyBase = config.RopeFrequencyBase,
                 RopeFrequencyScale = config.RopeFrequencyScale,
                 Seed = config.Seed,
@@ -209,7 +196,6 @@ public static ModelConfig To(ModelConfiguration config)
                 EmbeddingMode = config.EmbeddingMode,
                 Encoding = config.Encoding,
                 GpuLayerCount = config.GpuLayerCount,
-                GroupedQueryAttention = config.GroupedQueryAttention,
                 LoraAdapter = config.LoraAdapter,
                 LoraBase = config.LoraBase,
                 LowVram = config.LowVram,
@@ -220,7 +206,6 @@ public static ModelConfig To(ModelConfiguration config)
                 MulMatQ = config.MulMatQ,
                 Name = config.Name,
                 Perplexity = config.Perplexity,
-                RmsNormEpsilon = config.RmsNormEpsilon,
                 RopeFrequencyBase = config.RopeFrequencyBase,
                 RopeFrequencyScale = config.RopeFrequencyScale,
                 Seed = config.Seed,
diff --git a/LLamaStack.WPF/Views/ModelEditorView.xaml b/LLamaStack.WPF/Views/ModelEditorView.xaml
index 976791f..5ef6e5d 100644
--- a/LLamaStack.WPF/Views/ModelEditorView.xaml
+++ b/LLamaStack.WPF/Views/ModelEditorView.xaml
@@ -99,16 +99,6 @@
                             <userControls:FilePickerTextBox FileName="{Binding SelectedModel.LoraBase}" IsRequired="False"/>
                         </StackPanel>
 
-                        <UniformGrid Columns="2" Margin="0,0,0,5">
-                            <StackPanel Margin="0,0,5,0">
-                                <TextBlock Text="GroupedQueryAttention"/>
-                                <TextBox Text="{Binding SelectedModel.GroupedQueryAttention}" />
-                            </StackPanel>
-                            <StackPanel Margin="5,0,0,0">
-                                <TextBlock Text="RmsNormEpsilon"/>
-                                <TextBox Text="{Binding SelectedModel.RmsNormEpsilon}" />
-                            </StackPanel>
-                        </UniformGrid>
 
                         <UniformGrid Columns="2" Margin="0,0,0,10">
                             <StackPanel Margin="0,0,5,0">