NVIDIA · holgerroth · Nov 19, 2025 · Nov 19, 2025 · Nov 19, 2025 · Nov 19, 2025
diff --git a/...training/08.1_fed_bert/code/src/nlp_fl.py → ...ated_LLM_training/08.1_fed_bert/client.py b/...training/08.1_fed_bert/code/src/nlp_fl.py → ...ated_LLM_training/08.1_fed_bert/client.py
diff --git a/...ed_federated_learning/chapter-8_federated_LLM_training/08.1_fed_bert/code/prepare_data.sh b/...ed_federated_learning/chapter-8_federated_LLM_training/08.1_fed_bert/code/prepare_data.sh
diff --git a/...g/08.1_fed_bert/code/src/data_sequence.py → ...M_training/08.1_fed_bert/data_sequence.py b/...g/08.1_fed_bert/code/src/data_sequence.py → ...M_training/08.1_fed_bert/data_sequence.py
diff --git a/...ng/08.1_fed_bert/code/utils/data_split.py → ..._LLM_training/08.1_fed_bert/data_split.py b/...ng/08.1_fed_bert/code/utils/data_split.py → ..._LLM_training/08.1_fed_bert/data_split.py
diff --git a/...ted_learning/chapter-8_federated_LLM_training/08.1_fed_bert/federated_nlp_with_bert.ipynb b/...ted_learning/chapter-8_federated_LLM_training/08.1_fed_bert/federated_nlp_with_bert.ipynb
@@ -23,17 +23,21 @@
    "metadata": {},
    "source": [
     "## Setup\n",
-    "Install required packages for training"
+    "Install required packages for training\n",
+    "\n",
+    "> **Note**: This example requires PyTorch 2.2.0 or later for compatibility with the latest Transformers library."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "ee1d1f87-502a-4c30-aa40-f55ae65a1da7",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [],
    "source": [
-    "%pip install -r code/requirements.txt"
+    "%pip install -r requirements.txt"
    ]
   },
   {
@@ -79,7 +83,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "! code/prepare_data.sh /tmp/nvflare/dataset/nlp_ner"
+    "! prepare_data.sh /tmp/nvflare/dataset/nlp_ner"
    ]
   },
   {
@@ -119,12 +123,12 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "f48f1d5f-e656-4f71-b925-94035c60ace0",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [],
    "source": [
-    "%cd code\n",
-    "! python nlp_fl_job.py --model_name Bert\n",
-    "%cd .."
+    "! python job.py --model_name Bert"
    ]
   },
   {
@@ -169,9 +173,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%cd code\n",
-    "! sh test_global_model.sh /tmp/nvflare/dataset/nlp_ner\n",
-    "%cd .."
+    "! sh test_global_model.sh /tmp/nvflare/dataset/nlp_ner"
    ]
   },
   {
@@ -226,7 +228,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.0"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,

diff --git a/...training/08.1_fed_bert/code/nlp_fl_job.py → ...derated_LLM_training/08.1_fed_bert/job.py b/...training/08.1_fed_bert/code/nlp_fl_job.py → ...derated_LLM_training/08.1_fed_bert/job.py
@@ -14,7 +14,7 @@
 
 import argparse
 
-from src.nlp_models import BertModel, GPTModel
+from nlp_models import BertModel, GPTModel
 
 from nvflare.app_common.widgets.intime_model_selector import IntimeModelSelector
 from nvflare.app_common.workflows.fedavg import FedAvg
@@ -57,7 +57,7 @@ def main():
     # Local training parameters
     num_rounds = 5
     dataset_path = f"/tmp/nvflare/dataset/nlp_ner/{num_clients}_split"
-    train_script = "src/nlp_fl.py"
+    train_script = "client.py"
     train_args = f"--dataset_path {dataset_path} --model_name {train_model_name}"
 
     # Define the controller workflow and send to server

diff --git a/...ning/08.1_fed_bert/code/ner_model_test.py → ..._training/08.1_fed_bert/ner_model_test.py b/...ning/08.1_fed_bert/code/ner_model_test.py → ..._training/08.1_fed_bert/ner_model_test.py
@@ -18,8 +18,8 @@
 import pandas as pd
 import torch
 from seqeval.metrics import classification_report
-from src.data_sequence import DataSequence
-from src.nlp_models import BertModel, GPTModel
+from data_sequence import DataSequence
+from nlp_models import BertModel, GPTModel
 from torch.utils.data import DataLoader
 
 os.environ["TOKENIZERS_PARALLELISM"] = "False"

diff --git a/...ning/08.1_fed_bert/code/src/nlp_models.py → ..._LLM_training/08.1_fed_bert/nlp_models.py b/...ning/08.1_fed_bert/code/src/nlp_models.py → ..._LLM_training/08.1_fed_bert/nlp_models.py
diff --git a/...dvanced_federated_learning/chapter-8_federated_LLM_training/08.1_fed_bert/prepare_data.sh b/...dvanced_federated_learning/chapter-8_federated_LLM_training/08.1_fed_bert/prepare_data.sh
@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+DATASET_ROOT=${1}
+echo "4-client"
+python3 data_split.py --data_path ${DATASET_ROOT} --num_clients 4 --random_seed 0 --site_name_prefix 'site-'
diff --git a/...ining/08.1_fed_bert/code/requirements.txt → ...M_training/08.1_fed_bert/requirements.txt b/...ining/08.1_fed_bert/code/requirements.txt → ...M_training/08.1_fed_bert/requirements.txt
@@ -1,6 +1,6 @@
-torch
+torch>=2.2.0
 torchvision
 tensorboard
-transformers
+transformers>=4.40.0
 pandas
 seqeval
diff --git a/...g/08.1_fed_bert/code/test_global_model.sh → ...aining/08.1_fed_bert/test_global_model.sh b/...g/08.1_fed_bert/code/test_global_model.sh → ...aining/08.1_fed_bert/test_global_model.sh
diff --git a/...4_advanced_federated_learning/chapter-8_federated_LLM_training/08.2_llm_sft/LLM_SFT.ipynb b/...4_advanced_federated_learning/chapter-8_federated_LLM_training/08.2_llm_sft/LLM_SFT.ipynb
@@ -168,7 +168,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "! python sft_job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft --train_mode SFT "
+    "! python job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft --train_mode SFT "
    ]
   },
   {

diff --git a/...aining/08.2_llm_sft/src/hf_sft_peft_fl.py → ...rated_LLM_training/08.2_llm_sft/client.py b/...aining/08.2_llm_sft/src/hf_sft_peft_fl.py → ...rated_LLM_training/08.2_llm_sft/client.py
diff --git a/...training/08.2_llm_sft/src/hf_sft_model.py → ...LLM_training/08.2_llm_sft/hf_sft_model.py b/...training/08.2_llm_sft/src/hf_sft_model.py → ...LLM_training/08.2_llm_sft/hf_sft_model.py
diff --git a/...ated_LLM_training/08.2_llm_sft/sft_job.py → ...ederated_LLM_training/08.2_llm_sft/job.py b/...ated_LLM_training/08.2_llm_sft/sft_job.py → ...ederated_LLM_training/08.2_llm_sft/job.py
@@ -26,7 +26,7 @@
 
 def main():
     args = define_parser()
-    train_script = "src/hf_sft_peft_fl.py"
+    train_script = "client.py"
     client_ids = args.client_ids
     num_clients = len(client_ids)
 
@@ -74,9 +74,9 @@ def main():
 
     # Define the model persistor and send to server
     # First send the model to the server
-    job.to("src/hf_sft_model.py", "server")
+    job.to("hf_sft_model.py", "server")
     # Then send the model persistor to the server
-    model_args = {"path": "src.hf_sft_model.CausalLMModel", "args": {"model_name_or_path": model_name_or_path}}
+    model_args = {"path": "hf_sft_model.CausalLMModel", "args": {"model_name_or_path": model_name_or_path}}
     job.to(PTFileModelPersistor(model=model_args), "server", id="persistor")
 
     # Add model selection widget and send to server
@@ -126,7 +126,7 @@ def define_parser():
         nargs="+",
         type=str,
         default="",
-        help="Clinet IDs, used to get the data path for each client",
+        help="Client IDs, used to get the data path for each client",
     )
     parser.add_argument(
         "--num_rounds",

diff --git a/...advanced_federated_learning/chapter-8_federated_LLM_training/08.3_llm_peft/LLM_PEFT.ipynb b/...advanced_federated_learning/chapter-8_federated_LLM_training/08.3_llm_peft/LLM_PEFT.ipynb
@@ -99,7 +99,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "! python peft_job.py --client_ids dolly oasst1 --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/all_fl_peft --job_dir /tmp/nvflare/workspace/jobs/llm_fl_peft --train_mode PEFT --threads 2 "
+    "! python job.py --client_ids dolly oasst1 --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/all_fl_peft --job_dir /tmp/nvflare/workspace/jobs/llm_fl_peft --train_mode PEFT --threads 2 "
    ]
   },
   {

diff --git a/...ining/08.3_llm_peft/src/hf_sft_peft_fl.py → ...ated_LLM_training/08.3_llm_peft/client.py b/...ining/08.3_llm_peft/src/hf_sft_peft_fl.py → ...ated_LLM_training/08.3_llm_peft/client.py
diff --git a/...aining/08.3_llm_peft/src/hf_peft_model.py → ...M_training/08.3_llm_peft/hf_peft_model.py b/...aining/08.3_llm_peft/src/hf_peft_model.py → ...M_training/08.3_llm_peft/hf_peft_model.py
diff --git a/...ed_LLM_training/08.3_llm_peft/peft_job.py → ...derated_LLM_training/08.3_llm_peft/job.py b/...ed_LLM_training/08.3_llm_peft/peft_job.py → ...derated_LLM_training/08.3_llm_peft/job.py
@@ -26,7 +26,7 @@
 
 def main():
     args = define_parser()
-    train_script = "src/hf_sft_peft_fl.py"
+    train_script = "client.py"
     client_ids = args.client_ids
     num_clients = len(client_ids)
 
@@ -74,9 +74,9 @@ def main():
 
     # Define the model persistor and send to server
     # First send the model to the server
-    job.to("src/hf_peft_model.py", "server")
+    job.to("hf_peft_model.py", "server")
     # Then send the model persistor to the server
-    model_args = {"path": "src.hf_peft_model.CausalLMPEFTModel", "args": {"model_name_or_path": model_name_or_path}}
+    model_args = {"path": "hf_peft_model.CausalLMPEFTModel", "args": {"model_name_or_path": model_name_or_path}}
     job.to(PTFileModelPersistor(model=model_args), "server", id="persistor")
 
     # Add model selection widget and send to server
@@ -126,7 +126,7 @@ def define_parser():
         nargs="+",
         type=str,
         default="",
-        help="Clinet IDs, used to get the data path for each client",
+        help="Client IDs, used to get the data path for each client",
     )
     parser.add_argument(
         "--num_rounds",

diff --git a/...ed_learning/chapter-8_federated_LLM_training/08.4_llm_quantization/LLM_quantization.ipynb b/...ed_learning/chapter-8_federated_LLM_training/08.4_llm_quantization/LLM_quantization.ipynb
@@ -47,10 +47,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "! python sft_job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft_16 --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft_16 --train_mode SFT --quantize_mode float16\n",
-    "! python sft_job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft_8 --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft_8 --train_mode SFT --quantize_mode blockwise8\n",
-    "! python sft_job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft_fp4 --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft_fp4 --train_mode SFT --quantize_mode float4\n",
-    "! python sft_job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft_nf4 --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft_nf4 --train_mode SFT --quantize_mode normfloat4"
+    "! python job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft_16 --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft_16 --train_mode SFT --quantize_mode float16\n",
+    "! python job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft_8 --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft_8 --train_mode SFT --quantize_mode blockwise8\n",
+    "! python job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft_fp4 --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft_fp4 --train_mode SFT --quantize_mode float4\n",
+    "! python job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft_nf4 --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft_nf4 --train_mode SFT --quantize_mode normfloat4"
    ]
   },
   {
@@ -81,8 +81,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "! python sft_job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft_tensor --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft_tensor --train_mode SFT  --message_mode tensor\n",
-    "! python sft_job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft_tensor_fp4 --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft_tensor_fp4 --train_mode SFT  --message_mode tensor --quantize_mode float4"
+    "! python job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft_tensor --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft_tensor --train_mode SFT  --message_mode tensor\n",
+    "! python job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft_tensor_fp4 --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft_tensor_fp4 --train_mode SFT  --message_mode tensor --quantize_mode float4"
    ]
   },
   {

diff --git a/....4_llm_quantization/src/hf_sft_peft_fl.py → ..._training/08.4_llm_quantization/client.py b/....4_llm_quantization/src/hf_sft_peft_fl.py → ..._training/08.4_llm_quantization/client.py
diff --git a/...08.4_llm_quantization/src/hf_sft_model.py → ...ing/08.4_llm_quantization/hf_sft_model.py b/...08.4_llm_quantization/src/hf_sft_model.py → ...ing/08.4_llm_quantization/hf_sft_model.py
diff --git a/...training/08.4_llm_quantization/sft_job.py → ...LLM_training/08.4_llm_quantization/job.py b/...training/08.4_llm_quantization/sft_job.py → ...LLM_training/08.4_llm_quantization/job.py
@@ -26,7 +26,7 @@
 
 def main():
     args = define_parser()
-    train_script = "src/hf_sft_peft_fl.py"
+    train_script = "client.py"
     client_ids = args.client_ids
     num_clients = len(client_ids)
 
@@ -74,9 +74,9 @@ def main():
 
     # Define the model persistor and send to server
     # First send the model to the server
-    job.to("src/hf_sft_model.py", "server")
+    job.to("hf_sft_model.py", "server")
     # Then send the model persistor to the server
-    model_args = {"path": "src.hf_sft_model.CausalLMModel", "args": {"model_name_or_path": model_name_or_path}}
+    model_args = {"path": "hf_sft_model.CausalLMModel", "args": {"model_name_or_path": model_name_or_path}}
     job.to(PTFileModelPersistor(model=model_args), "server", id="persistor")
 
     # Add model selection widget and send to server

diff --git a/...ederated_learning/chapter-8_federated_LLM_training/08.5_llm_streaming/container_stream.sh b/...ederated_learning/chapter-8_federated_LLM_training/08.5_llm_streaming/container_stream.sh
@@ -1,2 +1,2 @@
 bash utils/log_memory.sh >>/tmp/nvflare/logs/container.txt &
-python streaming_job.py --retriever_mode container
+python job.py --retriever_mode container
diff --git a/...ced_federated_learning/chapter-8_federated_LLM_training/08.5_llm_streaming/file_stream.sh b/...ced_federated_learning/chapter-8_federated_LLM_training/08.5_llm_streaming/file_stream.sh
@@ -1,2 +1,2 @@
 bash utils/log_memory.sh >>/tmp/nvflare/logs/file.txt &
-python streaming_job.py --retriever_mode file
+python job.py --retriever_mode file
diff --git a/...ining/08.5_llm_streaming/streaming_job.py → ...ed_LLM_training/08.5_llm_streaming/job.py b/...ining/08.5_llm_streaming/streaming_job.py → ...ed_LLM_training/08.5_llm_streaming/job.py
diff --git a/...ated_learning/chapter-8_federated_LLM_training/08.5_llm_streaming/regular_transmission.sh b/...ated_learning/chapter-8_federated_LLM_training/08.5_llm_streaming/regular_transmission.sh
@@ -1,3 +1,3 @@
 mkdir /tmp/nvflare/logs/
 bash utils/log_memory.sh >>/tmp/nvflare/logs/regular.txt &
-python streaming_job.py
+python job.py
diff --git a/...llm_streaming/src/streaming_controller.py → ...8.5_llm_streaming/streaming_controller.py b/...llm_streaming/src/streaming_controller.py → ...8.5_llm_streaming/streaming_controller.py
diff --git a/...5_llm_streaming/src/streaming_executor.py → .../08.5_llm_streaming/streaming_executor.py b/...5_llm_streaming/src/streaming_executor.py → .../08.5_llm_streaming/streaming_executor.py