Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,21 @@
"metadata": {},
"source": [
"## Setup\n",
"Install required packages for training"
"Install required packages for training\n",
"\n",
"> **Note**: This example requires PyTorch 2.2.0 or later for compatibility with the latest Transformers library."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ee1d1f87-502a-4c30-aa40-f55ae65a1da7",
"metadata": {},
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"%pip install -r code/requirements.txt"
"%pip install -r requirements.txt"
]
},
{
Expand Down Expand Up @@ -79,7 +83,7 @@
"metadata": {},
"outputs": [],
"source": [
"! code/prepare_data.sh /tmp/nvflare/dataset/nlp_ner"
"! prepare_data.sh /tmp/nvflare/dataset/nlp_ner"
]
},
{
Expand Down Expand Up @@ -119,12 +123,12 @@
"cell_type": "code",
"execution_count": null,
"id": "f48f1d5f-e656-4f71-b925-94035c60ace0",
"metadata": {},
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"%cd code\n",
"! python nlp_fl_job.py --model_name Bert\n",
"%cd .."
"! python job.py --model_name Bert"
]
},
{
Expand Down Expand Up @@ -169,9 +173,7 @@
"metadata": {},
"outputs": [],
"source": [
"%cd code\n",
"! sh test_global_model.sh /tmp/nvflare/dataset/nlp_ner\n",
"%cd .."
"! sh test_global_model.sh /tmp/nvflare/dataset/nlp_ner"
]
},
{
Expand Down Expand Up @@ -226,7 +228,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
"version": "3.10.12"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

import argparse

from src.nlp_models import BertModel, GPTModel
from nlp_models import BertModel, GPTModel

from nvflare.app_common.widgets.intime_model_selector import IntimeModelSelector
from nvflare.app_common.workflows.fedavg import FedAvg
Expand Down Expand Up @@ -57,7 +57,7 @@ def main():
# Local training parameters
num_rounds = 5
dataset_path = f"/tmp/nvflare/dataset/nlp_ner/{num_clients}_split"
train_script = "src/nlp_fl.py"
train_script = "client.py"
train_args = f"--dataset_path {dataset_path} --model_name {train_model_name}"

# Define the controller workflow and send to server
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
import pandas as pd
import torch
from seqeval.metrics import classification_report
from src.data_sequence import DataSequence
from src.nlp_models import BertModel, GPTModel
from data_sequence import DataSequence
from nlp_models import BertModel, GPTModel
from torch.utils.data import DataLoader

os.environ["TOKENIZERS_PARALLELISM"] = "False"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/usr/bin/env bash
DATASET_ROOT=${1}
echo "4-client"
python3 data_split.py --data_path ${DATASET_ROOT} --num_clients 4 --random_seed 0 --site_name_prefix 'site-'
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
torch
torch>=2.2.0
torchvision
tensorboard
transformers
transformers>=4.40.0
pandas
seqeval
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@
"metadata": {},
"outputs": [],
"source": [
"! python sft_job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft --train_mode SFT "
"! python job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft --train_mode SFT "
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

def main():
args = define_parser()
train_script = "src/hf_sft_peft_fl.py"
train_script = "client.py"
client_ids = args.client_ids
num_clients = len(client_ids)

Expand Down Expand Up @@ -74,9 +74,9 @@ def main():

# Define the model persistor and send to server
# First send the model to the server
job.to("src/hf_sft_model.py", "server")
job.to("hf_sft_model.py", "server")
# Then send the model persistor to the server
model_args = {"path": "src.hf_sft_model.CausalLMModel", "args": {"model_name_or_path": model_name_or_path}}
model_args = {"path": "hf_sft_model.CausalLMModel", "args": {"model_name_or_path": model_name_or_path}}
job.to(PTFileModelPersistor(model=model_args), "server", id="persistor")

# Add model selection widget and send to server
Expand Down Expand Up @@ -126,7 +126,7 @@ def define_parser():
nargs="+",
type=str,
default="",
help="Clinet IDs, used to get the data path for each client",
help="Client IDs, used to get the data path for each client",
)
parser.add_argument(
"--num_rounds",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@
"metadata": {},
"outputs": [],
"source": [
"! python peft_job.py --client_ids dolly oasst1 --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/all_fl_peft --job_dir /tmp/nvflare/workspace/jobs/llm_fl_peft --train_mode PEFT --threads 2 "
"! python job.py --client_ids dolly oasst1 --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/all_fl_peft --job_dir /tmp/nvflare/workspace/jobs/llm_fl_peft --train_mode PEFT --threads 2 "
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

def main():
args = define_parser()
train_script = "src/hf_sft_peft_fl.py"
train_script = "client.py"
client_ids = args.client_ids
num_clients = len(client_ids)

Expand Down Expand Up @@ -74,9 +74,9 @@ def main():

# Define the model persistor and send to server
# First send the model to the server
job.to("src/hf_peft_model.py", "server")
job.to("hf_peft_model.py", "server")
# Then send the model persistor to the server
model_args = {"path": "src.hf_peft_model.CausalLMPEFTModel", "args": {"model_name_or_path": model_name_or_path}}
model_args = {"path": "hf_peft_model.CausalLMPEFTModel", "args": {"model_name_or_path": model_name_or_path}}
job.to(PTFileModelPersistor(model=model_args), "server", id="persistor")

# Add model selection widget and send to server
Expand Down Expand Up @@ -126,7 +126,7 @@ def define_parser():
nargs="+",
type=str,
default="",
help="Clinet IDs, used to get the data path for each client",
help="Client IDs, used to get the data path for each client",
)
parser.add_argument(
"--num_rounds",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@
"metadata": {},
"outputs": [],
"source": [
"! python sft_job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft_16 --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft_16 --train_mode SFT --quantize_mode float16\n",
"! python sft_job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft_8 --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft_8 --train_mode SFT --quantize_mode blockwise8\n",
"! python sft_job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft_fp4 --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft_fp4 --train_mode SFT --quantize_mode float4\n",
"! python sft_job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft_nf4 --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft_nf4 --train_mode SFT --quantize_mode normfloat4"
"! python job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft_16 --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft_16 --train_mode SFT --quantize_mode float16\n",
"! python job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft_8 --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft_8 --train_mode SFT --quantize_mode blockwise8\n",
"! python job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft_fp4 --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft_fp4 --train_mode SFT --quantize_mode float4\n",
"! python job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft_nf4 --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft_nf4 --train_mode SFT --quantize_mode normfloat4"
]
},
{
Expand Down Expand Up @@ -81,8 +81,8 @@
"metadata": {},
"outputs": [],
"source": [
"! python sft_job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft_tensor --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft_tensor --train_mode SFT --message_mode tensor\n",
"! python sft_job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft_tensor_fp4 --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft_tensor_fp4 --train_mode SFT --message_mode tensor --quantize_mode float4"
"! python job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft_tensor --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft_tensor --train_mode SFT --message_mode tensor\n",
"! python job.py --client_ids dolly --data_path /tmp/nvflare/dataset/llm/ --workspace_dir /tmp/nvflare/workspace/llm/dolly_fl_sft_tensor_fp4 --job_dir /tmp/nvflare/workspace/jobs/llm_hf_sft_tensor_fp4 --train_mode SFT --message_mode tensor --quantize_mode float4"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

def main():
args = define_parser()
train_script = "src/hf_sft_peft_fl.py"
train_script = "client.py"
client_ids = args.client_ids
num_clients = len(client_ids)

Expand Down Expand Up @@ -74,9 +74,9 @@ def main():

# Define the model persistor and send to server
# First send the model to the server
job.to("src/hf_sft_model.py", "server")
job.to("hf_sft_model.py", "server")
# Then send the model persistor to the server
model_args = {"path": "src.hf_sft_model.CausalLMModel", "args": {"model_name_or_path": model_name_or_path}}
model_args = {"path": "hf_sft_model.CausalLMModel", "args": {"model_name_or_path": model_name_or_path}}
job.to(PTFileModelPersistor(model=model_args), "server", id="persistor")

# Add model selection widget and send to server
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
bash utils/log_memory.sh >>/tmp/nvflare/logs/container.txt &
python streaming_job.py --retriever_mode container
python job.py --retriever_mode container
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
bash utils/log_memory.sh >>/tmp/nvflare/logs/file.txt &
python streaming_job.py --retriever_mode file
python job.py --retriever_mode file
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
mkdir /tmp/nvflare/logs/
bash utils/log_memory.sh >>/tmp/nvflare/logs/regular.txt &
python streaming_job.py
python job.py
Loading