diff --git a/core/common/constant.py b/core/common/constant.py
index aa111ddb..d8723ae4 100644
--- a/core/common/constant.py
+++ b/core/common/constant.py
@@ -26,6 +26,7 @@ class DatasetFormat(Enum):
     CSV = "csv"
     TXT = "txt"
     JSON = "json"
+    JSONL = "jsonl"
 
 
 class ParadigmType(Enum):
diff --git a/core/testcasecontroller/algorithm/paradigm/singletask_learning/singletask_learning.py b/core/testcasecontroller/algorithm/paradigm/singletask_learning/singletask_learning.py
index 19972538..3194b7cd 100644
--- a/core/testcasecontroller/algorithm/paradigm/singletask_learning/singletask_learning.py
+++ b/core/testcasecontroller/algorithm/paradigm/singletask_learning/singletask_learning.py
@@ -84,5 +84,8 @@ def _inference(self, job, trained_model):
         inference_output_dir = os.path.join(self.workspace, "output/inference/")
         os.environ["RESULT_SAVED_URL"] = inference_output_dir
         job.load(trained_model)
-        infer_res = job.predict(inference_dataset.x)
+        if hasattr(inference_dataset, 'need_other_info'):
+            infer_res = job.predict(inference_dataset)
+        else:
+            infer_res = job.predict(inference_dataset.x)
         return infer_res
diff --git a/core/testenvmanager/dataset/dataset.py b/core/testenvmanager/dataset/dataset.py
index e07f5601..2edc960f 100644
--- a/core/testenvmanager/dataset/dataset.py
+++ b/core/testenvmanager/dataset/dataset.py
@@ -16,10 +16,16 @@
 
 import os
 import tempfile
-
 import pandas as pd
-from sedna.datasources import CSVDataParse, TxtDataParse, JSONDataParse
-
+# pylint: disable=no-name-in-module
+# pylint: disable=too-many-instance-attributes
+from sedna.datasources import (
+    CSVDataParse,
+    TxtDataParse,
+    JSONDataParse,
+    JsonlDataParse,
+    JSONMetaDataParse,
+)
 from core.common import utils
 from core.common.constant import DatasetFormat
 
@@ -38,12 +44,28 @@ class Dataset:
     def __init__(self, config):
         self.train_url: str = ""
         self.test_url: str = ""
+        self.train_index: str = ""
+        self.test_index: str = ""
+        self.train_data: str = ""
+        self.test_data: str = ""
+        self.train_data_info: str = ""
+        self.test_data_info: str = ""
         self.label: str = ""
         self._parse_config(config)
 
     def _check_fields(self):
-        self._check_dataset_url(self.train_url)
-        self._check_dataset_url(self.test_url)
+        if self.train_index:
+            self._check_dataset_url(self.train_index)
+        if self.test_index:
+            self._check_dataset_url(self.test_index)
+        if self.train_data:
+            self._check_dataset_url(self.train_data)
+        if self.test_data:
+            self._check_dataset_url(self.test_data)
+        if self.train_data_info:
+            self._check_dataset_url(self.train_data_info)
+        if self.test_data_info:
+            self._check_dataset_url(self.test_data_info)
 
     def _parse_config(self, config):
         for attr, value in config.items():
@@ -108,6 +130,20 @@ def _process_index_file(self, file_url):
 
         return None
 
+    def _process_data_file(self, file_url):
+        file_format = utils.get_file_format(file_url)
+        if file_format == DatasetFormat.JSONL.value:
+            return file_url
+
+        return None
+
+    def _process_data_info_file(self, file_url):
+        file_format = utils.get_file_format(file_url)
+        if file_format == DatasetFormat.JSON.value:
+            return file_url
+
+        return None
+
     def process_dataset(self):
         """
         process dataset:
@@ -116,9 +152,26 @@ def process_dataset(self):
               in the index file(e.g.: txt index file).
 
         """
+        if self.train_index:
+            self.train_url = self._process_index_file(self.train_index)
+        elif self.train_data:
+            self.train_url = self._process_data_file(self.train_data)
+        elif self.train_data_info:
+            self.train_url = self._process_data_info_file(self.train_data_info)
+            # raise NotImplementedError('to be done')
+        else:
+            raise NotImplementedError('not one of train_index/train_data/train_data_info')
+
+        if self.test_index:
+            self.test_url = self._process_index_file(self.test_index)
+        elif self.test_data:
+            self.test_url = self._process_data_file(self.test_data)
+        elif self.test_data_info:
+            self.test_url = self._process_data_info_file(self.test_data_info)
+            # raise NotImplementedError('to be done')
+        else:
+            raise NotImplementedError('not one of test_index/test_data/test_data_info')
 
-        self.train_url = self._process_index_file(self.train_url)
-        self.test_url = self._process_index_file(self.test_url)
 
     # pylint: disable=too-many-arguments
     def split_dataset(
@@ -514,6 +567,11 @@ def load_data(
             e.g.: TxtDataParse, CSVDataParse.
 
         """
+        if file.split('/')[-1] == "metadata.json":
+            data = JSONMetaDataParse(data_type=data_type, func=feature_process)
+            data.parse(file)
+            return data
+
         data_format = utils.get_file_format(file)
 
         data = None
@@ -523,11 +581,14 @@ def load_data(
 
         if data_format == DatasetFormat.TXT.value:
             data = TxtDataParse(data_type=data_type, func=feature_process)
-            # print(file)
             data.parse(file, use_raw=use_raw)
 
         if data_format == DatasetFormat.JSON.value:
             data = JSONDataParse(data_type=data_type, func=feature_process)
             data.parse(file)
 
+        if data_format == DatasetFormat.JSONL.value:
+            data = JsonlDataParse(data_type=data_type, func=feature_process)
+            data.parse(file)
+
         return data
diff --git a/examples/government/singletask_learning_bench/README.md b/examples/government/singletask_learning_bench/README.md
new file mode 100644
index 00000000..22dfbfed
--- /dev/null
+++ b/examples/government/singletask_learning_bench/README.md
@@ -0,0 +1,104 @@
+# Government BenchMark
+
+## Introduction
+
+This is the work for Domain-specific Large Model Benchmark:
+
+Constructs a suite for the government sector, including test datasets, evaluation metrics, testing environments, and usage guidelines.
+
+This Benchmark consists of two parts: subjective evaluation data and objective evaluation data.
+
+## Design
+
+### Metadata Format
+
+| Name | Field Name | Option | Description |
+| --- | --- | --- | --- |
+| Data Name | dataset |  Required | Name of the dataset |
+| Data Description | description | Optional | Dataset description, such as usage scope, sample size, etc. |
+| First-level Dimension | level_1_dim | Required | Should fill in "Single Modal" or "Multi-Modal" |
+| Second-level Dimension | level_2_dim | Required | For "Single Modal", fill in "Text", "Image", or "Audio". For "Multi-Modal", fill in "Text-Image", "Text-Audio", "Image-Audio", or "Text-Image-Audio" |
+| Third-level Dimension | level_3_dim | Optional | Should be filled if all samples in the dataset have the same third-level dimension. If filled, content should be based on the standards shown in the normative reference document |
+| Fourth-level Dimension | level_4_dim | Optional | Should be filled if all samples in the dataset have the same third-level dimension. If filled, content should be based on the standards shown in the normative reference document |
+
+metadata example:
+
+```json
+{
+    "dataset": "Medical BenchMark",
+    "description": "xxx",
+    "level_1_dim": "single-modal",
+    "level_2_dim": "text",
+    "level_3_dim": "Q&A",
+    "level_4_dim": "medical"
+}
+```
+
+### Data format:
+
+|name|Option|information|
+|---|---|---|
+|prompt|Optional|the background of the LLM testing|
+|query|Required|the testing question|
+|response|Required|the answer of the question|
+|explanation|Optional|the explanation of the answer|
+|judge_prompt|Optional|the prompt of the judge model|
+|level_1_dim|Optional|single-modal or multi-modal|
+|level_2_dim|Optional|single-modal: text, image, video; multi-modal: text-image, text-video, text-image-video|
+|level_3_dim|Required|details|
+|level_4_dim|Required|details|
+
+data example:
+
+```json
+{
+    "prompt": "Please think step by step and answer the question.",
+    "question": "Which one is the correct answer of xxx? A. xxx B. xxx C. xxx D. xxx",
+    "response": "C",
+    "explanation": "xxx",
+    "level_1_dim": "single-modal",
+    "level_2_dim": "text",
+    "level_3_dim": "knowledge Q&A",
+    "level_4_dim": "medical knowledge"
+}
+```
+
+
+## Change to Core Code
+
+![](./imgs/structure.png)
+
+## Prepare Datasets
+
+You can download dataset in [kaggle](https://www.kaggle.com/datasets/kubeedgeianvs/the-government-affairs-dataset-govaff/data?select=government_benchmark)
+
+```
+dataset/government
+├── objective
+│   ├── test_data
+│   │   ├── data.jsonl
+│   │   └── metadata.json
+│   └── train_data
+└── subjective
+    ├── test_data
+    │   ├── data_full.jsonl
+    │   ├── data.jsonl
+    │   └── metadata.json
+    └── train_data
+```
+
+## Prepare Environment
+
+You should change your sedna package like this: [my sedna repo commit](https://github.com/IcyFeather233/sedna/commit/e13b82363c03dc771fca4922a24798554ca32a9f)
+
+Or you can replace the file in `yourpath/anaconda3/envs/ianvs/lib/python3.x/site-packages/sedna` with `examples/resources/sedna-llm.zip`
+
+## Run Ianvs
+
+### Objective
+
+`ianvs -f examples/government/singletask_learning_bench/objective/benchmarkingjob.yaml`
+
+### Subjective
+
+`ianvs -f examples/government/singletask_learning_bench/subjective/benchmarkingjob.yaml`
\ No newline at end of file
diff --git a/examples/government/singletask_learning_bench/imgs/structure.png b/examples/government/singletask_learning_bench/imgs/structure.png
new file mode 100644
index 00000000..22c1695e
Binary files /dev/null and b/examples/government/singletask_learning_bench/imgs/structure.png differ
diff --git a/examples/government/singletask_learning_bench/objective/benchmarkingjob.yaml b/examples/government/singletask_learning_bench/objective/benchmarkingjob.yaml
new file mode 100644
index 00000000..38c8f2c5
--- /dev/null
+++ b/examples/government/singletask_learning_bench/objective/benchmarkingjob.yaml
@@ -0,0 +1,72 @@
+benchmarkingjob:
+  # job name of bechmarking; string type;
+  name: "benchmarkingjob"
+  # the url address of job workspace that will reserve the output of tests; string type;
+  workspace: "/home/icyfeather/project/ianvs/workspace"
+
+  # the url address of test environment configuration file; string type;
+  # the file format supports yaml/yml;
+  testenv: "./examples/government/singletask_learning_bench/objective/testenv/testenv.yaml"
+
+  # the configuration of test object
+  test_object:
+    # test type; string type;
+    # currently the option of value is "algorithms",the others will be added in succession.
+    type: "algorithms"
+    # test algorithm configuration files; list type;
+    algorithms:
+      # algorithm name; string type;
+      - name: "politic_bench_singletask_learning"
+        # the url address of test algorithm configuration file; string type;
+        # the file format supports yaml/yml;
+        url: "./examples/government/singletask_learning_bench/objective/testalgorithms/gen/gen_algorithm.yaml"
+
+  # the configuration of ranking leaderboard
+  rank:
+    # rank leaderboard with metric of test case's evaluation and order ; list type;
+    # the sorting priority is based on the sequence of metrics in the list from front to back;
+    sort_by: [ { "acc": "descend" } ]
+
+    # visualization configuration
+    visualization:
+      # mode of visualization in the leaderboard; string type;
+      # There are quite a few possible dataitems in the leaderboard. Not all of them can be shown simultaneously on the screen.
+      # In the leaderboard, we provide the "selected_only" mode for the user to configure what is shown or is not shown.
+      mode: "selected_only"
+      # method of visualization for selected dataitems; string type;
+      # currently the options of value are as follows:
+      #  1> "print_table": print selected dataitems;
+      method: "print_table"
+
+    # selected dataitem configuration
+    # The user can add his/her interested dataitems in terms of "paradigms", "modules", "hyperparameters" and "metrics",
+    # so that the selected columns will be shown.
+    selected_dataitem:
+      # currently the options of value are as follows:
+      #   1> "all": select all paradigms in the leaderboard;
+      #   2> paradigms in the leaderboard, e.g., "singletasklearning"
+      paradigms: [ "all" ]
+      # currently the options of value are as follows:
+      #   1> "all": select all modules in the leaderboard;
+      #   2> modules in the leaderboard, e.g., "basemodel"
+      modules: [ "all" ]
+      # currently the options of value are as follows:
+      #   1> "all": select all hyperparameters in the leaderboard;
+      #   2> hyperparameters in the leaderboard, e.g., "momentum"
+      hyperparameters: [ "all" ]
+      # currently the options of value are as follows:
+      #   1> "all": select all metrics in the leaderboard;
+      #   2> metrics in the leaderboard, e.g., "f1_score"
+      metrics: [ "acc" ]
+
+    # model of save selected and all dataitems in workspace; string type;
+    # currently the options of value are as follows:
+    #  1> "selected_and_all": save selected and all dataitems;
+    #  2> "selected_only": save selected dataitems;
+    save_mode: "selected_and_all"
+
+
+
+
+
+
diff --git a/examples/government/singletask_learning_bench/objective/testalgorithms/gen/basemodel.py b/examples/government/singletask_learning_bench/objective/testalgorithms/gen/basemodel.py
new file mode 100644
index 00000000..b6340ec3
--- /dev/null
+++ b/examples/government/singletask_learning_bench/objective/testalgorithms/gen/basemodel.py
@@ -0,0 +1,105 @@
+# Copyright 2022 The KubeEdge Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import, division
+
+import os
+import tempfile
+import time
+import zipfile
+import logging
+
+import numpy as np
+import random
+from tqdm import tqdm
+from sedna.common.config import Context
+from sedna.common.class_factory import ClassType, ClassFactory
+from core.common.log import LOGGER
+
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+
+
+logging.disable(logging.WARNING)
+
+__all__ = ["BaseModel"]
+
+os.environ['BACKEND_TYPE'] = 'TORCH'
+
+
+@ClassFactory.register(ClassType.GENERAL, alias="gen")
+class BaseModel:
+
+    def __init__(self, **kwargs):
+        self.model = AutoModelForCausalLM.from_pretrained(
+            "/home/icyfeather/models/Qwen2-0.5B-Instruct",
+            torch_dtype="auto",
+            device_map="auto"
+        )
+        self.tokenizer = AutoTokenizer.from_pretrained("/home/icyfeather/models/Qwen2-0.5B-Instruct")
+
+    def train(self, train_data, valid_data=None, **kwargs):
+        LOGGER.info("BaseModel train")
+        
+
+    def save(self, model_path):
+        LOGGER.info("BaseModel save")
+
+    def predict(self, data, input_shape=None, **kwargs):
+        LOGGER.info("BaseModel predict")
+        LOGGER.info(f"Dataset: {data.dataset_name}")
+        LOGGER.info(f"Description: {data.description}")
+        LOGGER.info(f"Data Level 1 Dim: {data.level_1_dim}")
+        LOGGER.info(f"Data Level 2 Dim: {data.level_2_dim}")
+        
+        answer_list = []
+        for line in tqdm(data.x, desc="Processing", unit="question"):
+            # 3-shot
+            indices = random.sample([i for i, l in enumerate(data.x) if l != line], 3)
+            history = []
+            for idx in indices:
+                history.append({"role": "user", "content": data.x[idx]})
+                history.append({"role": "assistant", "content": data.y[idx]})
+            history.append({"role": "user", "content": line})
+            response = self._infer(history)
+            answer_list.append(response)
+        return answer_list
+
+    def load(self, model_url=None):
+        LOGGER.info("BaseModel load")
+
+    def evaluate(self, data, model_path, **kwargs):
+        LOGGER.info("BaseModel evaluate")
+        
+    def _infer(self, messages):
+        text = self.tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True
+        )
+        model_inputs = self.tokenizer([text], return_tensors="pt").to(device)
+        
+        generated_ids = self.model.generate(
+            model_inputs.input_ids,
+            max_new_tokens=512,
+            temperature = 0.1,
+            top_p = 0.9
+        )
+        generated_ids = [
+            output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+        ]
+        
+        response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        return response
diff --git a/examples/government/singletask_learning_bench/objective/testalgorithms/gen/gen_algorithm.yaml b/examples/government/singletask_learning_bench/objective/testalgorithms/gen/gen_algorithm.yaml
new file mode 100644
index 00000000..3167cbe8
--- /dev/null
+++ b/examples/government/singletask_learning_bench/objective/testalgorithms/gen/gen_algorithm.yaml
@@ -0,0 +1,18 @@
+algorithm:
+  # paradigm name; string type;
+  # currently the options of value are as follows:
+  #   1> "singletasklearning"
+  #   2> "incrementallearning"
+  paradigm_type: "singletasklearning"
+
+  # algorithm module configuration in the paradigm; list type;
+  modules:
+    # kind of algorithm module; string type;
+    # currently the options of value are as follows:
+    #   1> "basemodel"
+    - type: "basemodel"
+      # name of python module; string type;
+      # example: basemodel.py has BaseModel module that the alias is "FPN" for this benchmarking;
+      name: "gen"
+      # the url address of python module; string type;
+      url: "./examples/government/singletask_learning_bench/objective/testalgorithms/gen/basemodel.py"
\ No newline at end of file
diff --git a/examples/government/singletask_learning_bench/objective/testenv/acc.py b/examples/government/singletask_learning_bench/objective/testenv/acc.py
new file mode 100644
index 00000000..a4041f48
--- /dev/null
+++ b/examples/government/singletask_learning_bench/objective/testenv/acc.py
@@ -0,0 +1,39 @@
+# Copyright 2022 The KubeEdge Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from sedna.common.class_factory import ClassType, ClassFactory
+
+__all__ = ["acc"]
+
+def get_last_letter(input_string):
+    if not input_string or not any(char.isalpha() for char in input_string):
+        return None
+    
+    for char in reversed(input_string):
+        if 'A' <= char <= 'D':
+            return char
+
+    return None
+
+
+@ClassFactory.register(ClassType.GENERAL, alias="acc")
+def acc(y_true, y_pred):
+    y_pred = [get_last_letter(pred) for pred in y_pred]
+    y_true = [get_last_letter(pred) for pred in y_true]
+        
+    same_elements = [y_pred[i] == y_true[i] for i in range(len(y_pred))]
+
+    acc = sum(same_elements) / len(same_elements)
+    
+    return acc
diff --git a/examples/government/singletask_learning_bench/objective/testenv/testenv.yaml b/examples/government/singletask_learning_bench/objective/testenv/testenv.yaml
new file mode 100644
index 00000000..e3a13834
--- /dev/null
+++ b/examples/government/singletask_learning_bench/objective/testenv/testenv.yaml
@@ -0,0 +1,14 @@
+testenv:
+  # dataset configuration
+  dataset:
+    # the url address of train dataset index; string type;
+    train_data: "/home/icyfeather/Projects/ianvs/dataset/government/objective/train_data/data.jsonl"
+    # the url address of test dataset index; string type;
+    test_data_info: "/home/icyfeather/Projects/ianvs/dataset/government/objective/test_data/metadata.json"
+
+  # metrics configuration for test case's evaluation; list type;
+  metrics:
+      # metric name; string type;
+    - name: "acc"
+      # the url address of python file
+      url: "./examples/government/singletask_learning_bench/objective/testenv/acc.py"
diff --git a/examples/government/singletask_learning_bench/subjective/benchmarkingjob.yaml b/examples/government/singletask_learning_bench/subjective/benchmarkingjob.yaml
new file mode 100644
index 00000000..26008c3c
--- /dev/null
+++ b/examples/government/singletask_learning_bench/subjective/benchmarkingjob.yaml
@@ -0,0 +1,72 @@
+benchmarkingjob:
+  # job name of bechmarking; string type;
+  name: "benchmarkingjob"
+  # the url address of job workspace that will reserve the output of tests; string type;
+  workspace: "/home/icyfeather/project/ianvs/workspace"
+
+  # the url address of test environment configuration file; string type;
+  # the file format supports yaml/yml;
+  testenv: "./examples/government/singletask_learning_bench/subjective/testenv/testenv.yaml"
+
+  # the configuration of test object
+  test_object:
+    # test type; string type;
+    # currently the option of value is "algorithms",the others will be added in succession.
+    type: "algorithms"
+    # test algorithm configuration files; list type;
+    algorithms:
+      # algorithm name; string type;
+      - name: "politic_bench_singletask_learning"
+        # the url address of test algorithm configuration file; string type;
+        # the file format supports yaml/yml;
+        url: "./examples/government/singletask_learning_bench/subjective/testalgorithms/gen/gen_algorithm.yaml"
+
+  # the configuration of ranking leaderboard
+  rank:
+    # rank leaderboard with metric of test case's evaluation and order ; list type;
+    # the sorting priority is based on the sequence of metrics in the list from front to back;
+    sort_by: [ { "llm_judgement": "descend" } ]
+
+    # visualization configuration
+    visualization:
+      # mode of visualization in the leaderboard; string type;
+      # There are quite a few possible dataitems in the leaderboard. Not all of them can be shown simultaneously on the screen.
+      # In the leaderboard, we provide the "selected_only" mode for the user to configure what is shown or is not shown.
+      mode: "selected_only"
+      # method of visualization for selected dataitems; string type;
+      # currently the options of value are as follows:
+      #  1> "print_table": print selected dataitems;
+      method: "print_table"
+
+    # selected dataitem configuration
+    # The user can add his/her interested dataitems in terms of "paradigms", "modules", "hyperparameters" and "metrics",
+    # so that the selected columns will be shown.
+    selected_dataitem:
+      # currently the options of value are as follows:
+      #   1> "all": select all paradigms in the leaderboard;
+      #   2> paradigms in the leaderboard, e.g., "singletasklearning"
+      paradigms: [ "all" ]
+      # currently the options of value are as follows:
+      #   1> "all": select all modules in the leaderboard;
+      #   2> modules in the leaderboard, e.g., "basemodel"
+      modules: [ "all" ]
+      # currently the options of value are as follows:
+      #   1> "all": select all hyperparameters in the leaderboard;
+      #   2> hyperparameters in the leaderboard, e.g., "momentum"
+      hyperparameters: [ "all" ]
+      # currently the options of value are as follows:
+      #   1> "all": select all metrics in the leaderboard;
+      #   2> metrics in the leaderboard, e.g., "f1_score"
+      metrics: [ "llm_judgement" ]
+
+    # model of save selected and all dataitems in workspace; string type;
+    # currently the options of value are as follows:
+    #  1> "selected_and_all": save selected and all dataitems;
+    #  2> "selected_only": save selected dataitems;
+    save_mode: "selected_and_all"
+
+
+
+
+
+
diff --git a/examples/government/singletask_learning_bench/subjective/testalgorithms/gen/basemodel.py b/examples/government/singletask_learning_bench/subjective/testalgorithms/gen/basemodel.py
new file mode 100644
index 00000000..ee7f2585
--- /dev/null
+++ b/examples/government/singletask_learning_bench/subjective/testalgorithms/gen/basemodel.py
@@ -0,0 +1,131 @@
+# Copyright 2022 The KubeEdge Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import, division
+
+import os
+import tempfile
+import time
+import zipfile
+import logging
+
+import numpy as np
+import random
+from tqdm import tqdm
+from sedna.common.config import Context
+from sedna.common.class_factory import ClassType, ClassFactory
+from core.common.log import LOGGER
+from openai import OpenAI
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+
+
+logging.disable(logging.WARNING)
+
+__all__ = ["BaseModel"]
+
+os.environ['BACKEND_TYPE'] = 'TORCH'
+
+
+@ClassFactory.register(ClassType.GENERAL, alias="gen")
+class BaseModel:
+
+    def __init__(self, **kwargs):
+        self.model = AutoModelForCausalLM.from_pretrained(
+            "/home/icyfeather/models/Qwen2-0.5B-Instruct",
+            torch_dtype="auto",
+            device_map="auto"
+        )
+        self.tokenizer = AutoTokenizer.from_pretrained("/home/icyfeather/models/Qwen2-0.5B-Instruct")
+
+    def train(self, train_data, valid_data=None, **kwargs):
+        LOGGER.info("BaseModel train")
+        
+
+    def save(self, model_path):
+        LOGGER.info("BaseModel save")
+
+    def predict(self, data, input_shape=None, **kwargs):
+        LOGGER.info("BaseModel predict")
+        LOGGER.info(f"Dataset: {data.dataset_name}")
+        LOGGER.info(f"Description: {data.description}")
+        LOGGER.info(f"Data Level 1 Dim: {data.level_1_dim}")
+        LOGGER.info(f"Data Level 2 Dim: {data.level_2_dim}")
+        
+        answer_list = []
+        for line in tqdm(data.x, desc="Processing", unit="question"):
+            history = []
+            history.append({"role": "user", "content": line})
+            response = self._infer(history)
+            answer_list.append(response)
+
+        judgement_list = []
+
+        # evaluate by llm
+        for index in tqdm(range(len(answer_list)), desc="Evaluating", ascii=False, ncols=75):
+            prompt = data.judge_prompts[index] + answer_list[index]
+            judgement = self._openai_generate(prompt)
+            judgement_list.append(judgement)
+
+        return judgement_list
+
+    def load(self, model_url=None):
+        LOGGER.info("BaseModel load")
+
+    def evaluate(self, data, model_path, **kwargs):
+        LOGGER.info("BaseModel evaluate")
+        
+    def _infer(self, messages):
+        text = self.tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True
+        )
+        model_inputs = self.tokenizer([text], return_tensors="pt").to(device)
+        
+        generated_ids = self.model.generate(
+            model_inputs.input_ids,
+            max_new_tokens=512,
+            temperature = 0.1,
+            top_p = 0.9
+        )
+        generated_ids = [
+            output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+        ]
+        
+        response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        return response
+
+
+    def _openai_generate(self, user_question, system=None):
+        key = os.getenv("DEEPSEEK_API_KEY")
+        if not key:
+            raise ValueError("You should set DEEPSEEK_API_KEY in your env.")
+        client = OpenAI(api_key=key, base_url="https://api.deepseek.com")
+
+        messages = []
+        if system:
+            messages.append({"role": "system", "content": system})
+        messages.append({"role": "user", "content": user_question})
+
+        response = client.chat.completions.create(
+            model="deepseek-chat",
+            messages=messages,
+            stream=False
+        )
+
+        res = response.choices[0].message.content
+
+        return res
\ No newline at end of file
diff --git a/examples/government/singletask_learning_bench/subjective/testalgorithms/gen/gen_algorithm.yaml b/examples/government/singletask_learning_bench/subjective/testalgorithms/gen/gen_algorithm.yaml
new file mode 100644
index 00000000..f20e9047
--- /dev/null
+++ b/examples/government/singletask_learning_bench/subjective/testalgorithms/gen/gen_algorithm.yaml
@@ -0,0 +1,18 @@
+algorithm:
+  # paradigm name; string type;
+  # currently the options of value are as follows:
+  #   1> "singletasklearning"
+  #   2> "incrementallearning"
+  paradigm_type: "singletasklearning"
+
+  # algorithm module configuration in the paradigm; list type;
+  modules:
+    # kind of algorithm module; string type;
+    # currently the options of value are as follows:
+    #   1> "basemodel"
+    - type: "basemodel"
+      # name of python module; string type;
+      # example: basemodel.py has BaseModel module that the alias is "FPN" for this benchmarking;
+      name: "gen"
+      # the url address of python module; string type;
+      url: "./examples/government/singletask_learning_bench/subjective/testalgorithms/gen/basemodel.py"
\ No newline at end of file
diff --git a/examples/government/singletask_learning_bench/subjective/testenv/llm_judgement.py b/examples/government/singletask_learning_bench/subjective/testenv/llm_judgement.py
new file mode 100644
index 00000000..97cbc72a
--- /dev/null
+++ b/examples/government/singletask_learning_bench/subjective/testenv/llm_judgement.py
@@ -0,0 +1,42 @@
+# Copyright 2022 The KubeEdge Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+from sedna.common.class_factory import ClassType, ClassFactory
+from core.common.log import LOGGER
+
+__all__ = ["llm_judgement"]
+
+def extract_comprehensive_score(input_str):
+    # extract overall points
+    match = re.search(r"'Overall Points': (\d+)", input_str)
+    if match:
+        return int(match.group(1))
+    else:
+        return None
+
+
+@ClassFactory.register(ClassType.GENERAL, alias="llm_judgement")
+def llm_judgement(y_true, y_pred):
+    y_pred = [extract_comprehensive_score(pred) for pred in y_pred]
+        
+    valid_scores = [score for score in y_pred if score is not None]
+
+    LOGGER.info(f"Extracted {len(valid_scores)} datas from {len(y_pred)} datas")
+    
+    if valid_scores:
+        average_score = sum(valid_scores) / len(valid_scores)
+        return average_score
+    else:
+        return -1
diff --git a/examples/government/singletask_learning_bench/subjective/testenv/testenv.yaml b/examples/government/singletask_learning_bench/subjective/testenv/testenv.yaml
new file mode 100644
index 00000000..f197b2fb
--- /dev/null
+++ b/examples/government/singletask_learning_bench/subjective/testenv/testenv.yaml
@@ -0,0 +1,14 @@
+testenv:
+  # dataset configuration
+  dataset:
+    # the url address of train dataset index; string type;
+    train_data: "/home/icyfeather/Projects/ianvs/dataset/government/subjective/train_data/data.jsonl"
+    # the url address of test dataset index; string type;
+    test_data_info: "/home/icyfeather/Projects/ianvs/dataset/government/subjective/test_data/metadata.json"
+
+  # metrics configuration for test case's evaluation; list type;
+  metrics:
+      # metric name; string type;
+    - name: "llm_judgement"
+      # the url address of python file
+      url: "./examples/government/singletask_learning_bench/subjective/testenv/llm_judgement.py"
diff --git a/examples/llm_simple_qa/README.md b/examples/llm_simple_qa/README.md
new file mode 100644
index 00000000..dbaf845a
--- /dev/null
+++ b/examples/llm_simple_qa/README.md
@@ -0,0 +1,84 @@
+# README
+
+## Simple QA
+
+### Prepare Data
+
+The data of simple-qa example structure is:
+
+```
+.
+├── test_data
+│   └── data.jsonl
+└── train_data
+    └── data.jsonl
+```
+
+`train_data/data.jsonl` is empty, and the `test_data/data.jsonl` is as follows:
+
+```
+{
+  "question": "If Xiao Ming has 5 apples, and he gives 3 to Xiao Hua, how many apples does Xiao Ming have left?\nA. 2\nB. 3\nC. 4\nD. 5",
+  "answer": "A"
+}
+{
+  "question": "Which of the following numbers is the smallest prime number?\nA. 0\nB. 1\nC. 2\nD. 4",
+  "answer": "C"
+}
+{
+  "question": "A rectangle has a length of 10 centimeters and a width of 5 centimeters, what is its perimeter in centimeters?\nA. 20 centimeters\nB. 30 centimeters\nC. 40 centimeters\nD. 50 centimeters",
+  "answer": "B"
+}
+{
+  "question": "Which of the following fractions is closest to 1?\nA. 1/2\nB. 3/4\nC. 4/5\nD. 5/6",
+  "answer": "D"
+}
+{
+  "question": "If a number plus 10 equals 30, what is the number?\nA. 20\nB. 21\nC. 22\nD. 23",
+  "answer": "A"
+}
+{
+  "question": "Which of the following expressions has the largest result?\nA. 3 + 4\nB. 5 - 2\nC. 6 * 2\nD. 7 ÷ 2",
+  "answer": "C"
+}
+{
+  "question": "A class has 24 students, and if each student brings 2 books, how many books are there in total?\nA. 48\nB. 36\nC. 24\nD. 12",
+  "answer": "A"
+}
+{
+  "question": "Which of the following is the correct multiplication rhyme?\nA. Three threes are seven\nB. Four fours are sixteen\nC. Five fives are twenty-five\nD. Six sixes are thirty-six",
+  "answer": "B"
+}
+{
+  "question": "If one number is three times another number, and this number is 15, what is the other number?\nA. 5\nB. 10\nC. 15\nD. 45",
+  "answer": "A"
+}
+{
+  "question": "Which of the following shapes has the longest perimeter?\nA. Square\nB. Rectangle\nC. Circle\nD. Triangle",
+  "answer": "C"
+}
+```
+
+### Prepare Environment
+
+You need to install the changed-sedna package, which added `JsonlDataParse` in `sedna.datasources`
+
+Replace the file in `yourpath/anaconda3/envs/ianvs/lib/python3.x/site-packages/sedna` with `examples/resources/sedna-with-jsonl.zip`
+
+
+### Run Ianvs
+
+Run the following command:
+
+`ianvs -f examples/llm/singletask_learning_bench/simple_qa/benchmarkingjob.yaml`
+
+## OpenCompass Evaluation
+
+### Prepare Environment
+
+`pip install examples/resources/opencompass-0.2.5-py3-none-any.whl`
+
+### Run Evaluation
+
+`python run_op.py examples/llm/singletask_learning_bench/simple_qa/testalgorithms/gen/op_eval.py`
+
diff --git a/examples/llm_simple_qa/benchmarkingjob.yaml b/examples/llm_simple_qa/benchmarkingjob.yaml
new file mode 100644
index 00000000..78961e52
--- /dev/null
+++ b/examples/llm_simple_qa/benchmarkingjob.yaml
@@ -0,0 +1,72 @@
+benchmarkingjob:
+  # job name of bechmarking; string type;
+  name: "benchmarkingjob"
+  # the url address of job workspace that will reserve the output of tests; string type;
+  workspace: "/home/icyfeather/project/ianvs/workspace"
+
+  # the url address of test environment configuration file; string type;
+  # the file format supports yaml/yml;
+  testenv: "./examples/llm/singletask_learning_bench/simple_qa/testenv/testenv.yaml"
+
+  # the configuration of test object
+  test_object:
+    # test type; string type;
+    # currently the option of value is "algorithms",the others will be added in succession.
+    type: "algorithms"
+    # test algorithm configuration files; list type;
+    algorithms:
+      # algorithm name; string type;
+      - name: "simple_qa_singletask_learning"
+        # the url address of test algorithm configuration file; string type;
+        # the file format supports yaml/yml;
+        url: "./examples/llm/singletask_learning_bench/simple_qa/testalgorithms/gen/gen_algorithm.yaml"
+
+  # the configuration of ranking leaderboard
+  rank:
+    # rank leaderboard with metric of test case's evaluation and order ; list type;
+    # the sorting priority is based on the sequence of metrics in the list from front to back;
+    sort_by: [ { "acc": "descend" } ]
+
+    # visualization configuration
+    visualization:
+      # mode of visualization in the leaderboard; string type;
+      # There are quite a few possible dataitems in the leaderboard. Not all of them can be shown simultaneously on the screen.
+      # In the leaderboard, we provide the "selected_only" mode for the user to configure what is shown or is not shown.
+      mode: "selected_only"
+      # method of visualization for selected dataitems; string type;
+      # currently the options of value are as follows:
+      #  1> "print_table": print selected dataitems;
+      method: "print_table"
+
+    # selected dataitem configuration
+    # The user can add his/her interested dataitems in terms of "paradigms", "modules", "hyperparameters" and "metrics",
+    # so that the selected columns will be shown.
+    selected_dataitem:
+      # currently the options of value are as follows:
+      #   1> "all": select all paradigms in the leaderboard;
+      #   2> paradigms in the leaderboard, e.g., "singletasklearning"
+      paradigms: [ "all" ]
+      # currently the options of value are as follows:
+      #   1> "all": select all modules in the leaderboard;
+      #   2> modules in the leaderboard, e.g., "basemodel"
+      modules: [ "all" ]
+      # currently the options of value are as follows:
+      #   1> "all": select all hyperparameters in the leaderboard;
+      #   2> hyperparameters in the leaderboard, e.g., "momentum"
+      hyperparameters: [ "all" ]
+      # currently the options of value are as follows:
+      #   1> "all": select all metrics in the leaderboard;
+      #   2> metrics in the leaderboard, e.g., "f1_score"
+      metrics: [ "acc" ]
+
+    # model of save selected and all dataitems in workspace; string type;
+    # currently the options of value are as follows:
+    #  1> "selected_and_all": save selected and all dataitems;
+    #  2> "selected_only": save selected dataitems;
+    save_mode: "selected_and_all"
+
+
+
+
+
+
diff --git a/examples/llm_simple_qa/testalgorithms/gen/basemodel.py b/examples/llm_simple_qa/testalgorithms/gen/basemodel.py
new file mode 100644
index 00000000..fdeedc98
--- /dev/null
+++ b/examples/llm_simple_qa/testalgorithms/gen/basemodel.py
@@ -0,0 +1,98 @@
+# Copyright 2022 The KubeEdge Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import, division, print_function
+
+import os
+import tempfile
+import time
+import zipfile
+import logging
+
+import numpy as np
+from sedna.common.config import Context
+from sedna.common.class_factory import ClassType, ClassFactory
+
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+
+
+logging.disable(logging.WARNING)
+
+__all__ = ["BaseModel"]
+
+os.environ['BACKEND_TYPE'] = 'TORCH'
+
+
+@ClassFactory.register(ClassType.GENERAL, alias="gen")
+class BaseModel:
+
+    def __init__(self, **kwargs):
+        self.model = AutoModelForCausalLM.from_pretrained(
+            "/home/icyfeather/models/Qwen2-0.5B-Instruct",
+            torch_dtype="auto",
+            device_map="auto"
+        )
+        self.tokenizer = AutoTokenizer.from_pretrained("/home/icyfeather/models/Qwen2-0.5B-Instruct")
+
+    def train(self, train_data, valid_data=None, **kwargs):
+        print("BaseModel doesn't need to train")
+        
+
+    def save(self, model_path):
+        print("BaseModel doesn't need to save")
+
+    def predict(self, data, input_shape=None, **kwargs):
+        print("BaseModel predict")
+        answer_list = []
+        for line in data:
+            response = self._infer(line)
+            answer_list.append(response)
+        return answer_list
+
+    def load(self, model_url=None):
+        print("BaseModel load")
+
+    def evaluate(self, data, model_path, **kwargs):
+        print("BaseModel evaluate")
+        
+    def _infer(self, prompt, system=None):
+        if system:   
+            messages = [
+                {"role": "system", "content": system},
+                {"role": "user", "content": prompt}
+            ]
+        else:
+            messages = [
+                {"role": "user", "content": prompt}
+            ]
+        text = self.tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True
+        )
+        model_inputs = self.tokenizer([text], return_tensors="pt").to(device)
+        
+        generated_ids = self.model.generate(
+            model_inputs.input_ids,
+            max_new_tokens=512
+        )
+        generated_ids = [
+            output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+        ]
+        
+        response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        
+        return response
diff --git a/examples/llm_simple_qa/testalgorithms/gen/gen_algorithm.yaml b/examples/llm_simple_qa/testalgorithms/gen/gen_algorithm.yaml
new file mode 100644
index 00000000..6536ceb9
--- /dev/null
+++ b/examples/llm_simple_qa/testalgorithms/gen/gen_algorithm.yaml
@@ -0,0 +1,18 @@
+algorithm:
+  # paradigm name; string type;
+  # currently the options of value are as follows:
+  #   1> "singletasklearning"
+  #   2> "incrementallearning"
+  paradigm_type: "singletasklearning"
+
+  # algorithm module configuration in the paradigm; list type;
+  modules:
+    # kind of algorithm module; string type;
+    # currently the options of value are as follows:
+    #   1> "basemodel"
+    - type: "basemodel"
+      # name of python module; string type;
+      # example: basemodel.py has BaseModel module that the alias is "FPN" for this benchmarking;
+      name: "gen"
+      # the url address of python module; string type;
+      url: "./examples/llm/singletask_learning_bench/simple_qa/testalgorithms/gen/basemodel.py"
\ No newline at end of file
diff --git a/examples/llm_simple_qa/testalgorithms/gen/op_eval.py b/examples/llm_simple_qa/testalgorithms/gen/op_eval.py
new file mode 100644
index 00000000..dc6d9c04
--- /dev/null
+++ b/examples/llm_simple_qa/testalgorithms/gen/op_eval.py
@@ -0,0 +1,21 @@
+from mmengine.config import read_base
+from opencompass.models import HuggingFacewithChatTemplate
+# import sys
+# sys.path.append('/home/icyfeather/project/ianvs')
+
+with read_base():
+    from core.op_extra.datasets.cmmlu.cmmlu_gen import cmmlu_datasets
+    
+datasets = [*cmmlu_datasets]
+
+models = [
+    dict(
+        type=HuggingFacewithChatTemplate,
+        abbr='qwen1.5-1.8b-chat-hf',
+        path='/home/icyfeather/models/Qwen1.5-1.8B-Chat',
+        max_out_len=1024,
+        batch_size=2,
+        run_cfg=dict(num_gpus=1),
+        stop_words=['<|im_end|>', '<|im_start|>'],
+    )
+]
diff --git a/examples/llm_simple_qa/testenv/acc.py b/examples/llm_simple_qa/testenv/acc.py
new file mode 100644
index 00000000..beccdadf
--- /dev/null
+++ b/examples/llm_simple_qa/testenv/acc.py
@@ -0,0 +1,40 @@
+# Copyright 2022 The KubeEdge Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from sedna.common.class_factory import ClassType, ClassFactory
+
+__all__ = ["acc"]
+
+def get_last_letter(input_string):
+    if not input_string or not any(char.isalpha() for char in input_string):
+        return None
+    
+    for char in reversed(input_string):
+        if 'A' <= char <= 'D':
+            return char
+        
+    return None
+
+
+@ClassFactory.register(ClassType.GENERAL, alias="acc")
+def acc(y_true, y_pred):
+    y_pred = [get_last_letter(pred) for pred in y_pred]
+    print(y_true)
+    print(y_pred)
+        
+    same_elements = [y_pred[i] == y_true[i] for i in range(len(y_pred))]
+
+    acc = sum(same_elements) / len(same_elements)
+    
+    return acc
diff --git a/examples/llm_simple_qa/testenv/testenv.yaml b/examples/llm_simple_qa/testenv/testenv.yaml
new file mode 100644
index 00000000..0bc7239f
--- /dev/null
+++ b/examples/llm_simple_qa/testenv/testenv.yaml
@@ -0,0 +1,14 @@
+testenv:
+  # dataset configuration
+  dataset:
+    # the url address of train dataset index; string type;
+    train_data: "/home/icyfeather/Projects/ianvs/dataset/llm_simple_qa/train_data/data.jsonl"
+    # the url address of test dataset index; string type;
+    test_data: "/home/icyfeather/Projects/ianvs/dataset/llm_simple_qa/test_data/data.jsonl"
+
+  # metrics configuration for test case's evaluation; list type;
+  metrics:
+      # metric name; string type;
+    - name: "acc"
+      # the url address of python file
+      url: "./examples/llm/singletask_learning_bench/simple_qa/testenv/acc.py"
diff --git a/examples/resources/sedna-llm.zip b/examples/resources/sedna-llm.zip
new file mode 100644
index 00000000..8ea3c0d3
Binary files /dev/null and b/examples/resources/sedna-llm.zip differ