mozilla
diff --git a/‎docs/training/task-cluster.md‎
Lines changed: 4 additions & 12 deletions b/‎docs/training/task-cluster.md‎
Lines changed: 4 additions & 12 deletions
diff --git a/‎pipeline/bicleaner/bicleaner.sh‎
Lines changed: 0 additions & 4 deletions b/‎pipeline/bicleaner/bicleaner.sh‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎pipeline/translate/requirements/translate-ctranslate2.in‎
Lines changed: 1 addition & 0 deletions b/‎pipeline/translate/requirements/translate-ctranslate2.in‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pipeline/translate/requirements/translate-ctranslate2.txt‎
Lines changed: 242 additions & 120 deletions b/‎pipeline/translate/requirements/translate-ctranslate2.txt‎
Lines changed: 242 additions & 120 deletions
diff --git a/‎taskcluster/config.yml‎
Lines changed: 14 additions & 14 deletions b/‎taskcluster/config.yml‎
Lines changed: 14 additions & 14 deletions
diff --git a/‎taskcluster/kinds/backtranslations-mono-trg-translate/kind.yml‎
Lines changed: 6 additions & 2 deletions b/‎taskcluster/kinds/backtranslations-mono-trg-translate/kind.yml‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎taskcluster/kinds/backtranslations-train-backwards-model/kind.yml‎
Lines changed: 7 additions & 2 deletions b/‎taskcluster/kinds/backtranslations-train-backwards-model/kind.yml‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎taskcluster/kinds/corpus-clean-parallel-bicleaner-ai/kind.yml‎
Lines changed: 12 additions & 6 deletions b/‎taskcluster/kinds/corpus-clean-parallel-bicleaner-ai/kind.yml‎
Lines changed: 12 additions & 6 deletions
diff --git a/‎taskcluster/kinds/distillation-mono-src-translate/kind.yml‎
Lines changed: 7 additions & 2 deletions b/‎taskcluster/kinds/distillation-mono-src-translate/kind.yml‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎taskcluster/kinds/distillation-parallel-src-translate/kind.yml‎
Lines changed: 6 additions & 2 deletions b/‎taskcluster/kinds/distillation-parallel-src-translate/kind.yml‎
Lines changed: 6 additions & 2 deletions
@@ -139,24 +139,16 @@ To start an interactive task, follow these steps:
 
 5. Reduce the maxRunTime to a best guess at how long you'll need the task and worker running for. (We pay for every minute a worker runs - so they should not be kept running, eg: overnight.)
 
-6. Adjust the payload to simply run bash and sleep (instead of a full pipeline step). For docker-worker tasks use something like:
+6. Adjust the payload to simply run bash and sleep (instead of a full pipeline step):
 ```
      command:
     - bash
     - '-c'
     - 'sleep 7200'
 ```
 
-For generic-worker tasks (those needing a GPU), use:
-```
-     command:
-    - - bash
-      - '-c'
-      - 'sleep 7200'
-```
-
-(docker-worker tasks have an `image` section in the payload)
-
 7. Click "Create Task"
 
-After a few minutes you should be able to get a shell (a link will show up in the tab when it's ready).
+After a few minutes you should be able to get a shell (a link will show up in the tab when it's ready). This shell should drop you inside of docker container as root, running the same image as the task you started this process with. Most tasks drop privileges to the `worker` user before doing any work, so you may want to run `su - worker` before doing anything of note.
+
+When you are done with the worker you can use "Cancel" from the three dots menu to immediately shut it down. (This should happen within a few minutes of closing your last shell to the worker, but it's good practice to do it yourself to minimize costs.)
@@ -16,10 +16,6 @@ python3 -c "from pipeline.common.marian import assert_gpus_available; assert_gpu
 test -v SRC
 test -v TRG
 test -v CUDA_DIR
-test -v CUDNN_DIR
-
-# cuda and cudnn libs
-export LD_LIBRARY_PATH=${CUDA_DIR}/lib64:${CUDNN_DIR}:${LD_LIBRARY_PATH:+LD_LIBRARY_PATH:}
 
 corpus_prefix=$1
 output_prefix=$2
 
@@ -1,3 +1,4 @@
 ctranslate2==4.3.1
 sentencepiece==0.2.0
 gpustat==1.1.1
+requests==2.32.3
@@ -97,39 +97,39 @@ workers:
             worker-type: 'b-linux-large-gcp-1tb-64-512-std-d2g'
         b-linux-v100-gpu:
             provisioner: '{trust-domain}-{level}'
-            implementation: generic-worker
+            implementation: docker-worker
             os: linux
-            worker-type: '{alias}'
+            worker-type: 'b-linux-v100-gpu-d2g'
         b-linux-v100-gpu-4:
             provisioner: '{trust-domain}-{level}'
-            implementation: generic-worker
+            implementation: docker-worker
             os: linux
-            worker-type: '{alias}'
+            worker-type: 'b-linux-v100-gpu-d2g-4'
         b-linux-v100-gpu-4-300gb:
             provisioner: '{trust-domain}-{level}'
-            implementation: generic-worker
+            implementation: docker-worker
             os: linux
-            worker-type: '{alias}'
+            worker-type: 'b-linux-v100-gpu-d2g-4-300gb'
         b-linux-v100-gpu-4-300gb-standard:
             provisioner: '{trust-domain}-{level}'
-            implementation: generic-worker
+            implementation: docker-worker
             os: linux
-            worker-type: '{alias}'
+            worker-type: 'b-linux-v100-gpu-d2g-4-300gb-standard'
         b-linux-v100-gpu-4-1tb:
             provisioner: '{trust-domain}-{level}'
-            implementation: generic-worker
+            implementation: docker-worker
             os: linux
-            worker-type: '{alias}'
+            worker-type: 'b-linux-v100-gpu-d2g-4-1tb'
         b-linux-v100-gpu-4-2tb:
             provisioner: '{trust-domain}-{level}'
-            implementation: generic-worker
+            implementation: docker-worker
             os: linux
-            worker-type: '{alias}'
+            worker-type: 'b-linux-v100-gpu-d2g-4-2tb'
         b-linux-v100-gpu-4-1tb-standard:
             provisioner: '{trust-domain}-{level}'
-            implementation: generic-worker
+            implementation: docker-worker
             os: linux
-            worker-type: '{alias}'
+            worker-type: 'b-linux-v100-gpu-d2g-4-1tb-standard'
         images:
             provisioner: '{trust-domain}-{level}'
             implementation: docker-worker
 
@@ -101,11 +101,14 @@ tasks:
             from-parameters: training_config.marian-args.decoding-backward
         worker-type: b-largegpu
         worker:
+            docker-image: {"in-tree": "train"}
             max-run-time: 2592000
+            volumes:
+                - /builds/worker/artifacts
             artifacts:
                 - name: public/build
-                  path: artifacts
-                  type: directory
+                  path: /builds/worker/artifacts
+                  type: volume
             env:
                 CUDA_DIR: fetches/cuda-toolkit
                 CUDNN_DIR: fetches/cuda-toolkit
@@ -135,6 +138,7 @@ tasks:
                     pip3 install -U pip==25.0.1 &&
                     pip3 install -r $VCS_PATH/pipeline/translate/requirements/translate-ctranslate2.txt &&
                     export PYTHONPATH=$PYTHONPATH:$VCS_PATH &&
+                    export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$MOZ_FETCHES_DIR/cuda-toolkit/lib64" &&
                     python3 $VCS_PATH/pipeline/translate/translate.py
                     --input       "$MOZ_FETCHES_DIR/file.{{this_chunk}}.zst"
                     --models_glob "$MOZ_FETCHES_DIR/*.npz" "$MOZ_FETCHES_DIR/model*/*.npz"
 
@@ -74,6 +74,7 @@ tasks:
                 cron: b-largegpu
                 default: b-largegpu-largedisk
         worker:
+            docker-image: {"in-tree": "train"}
             max-run-time: 2592000
             # train_taskcluster.py exits with 17 if a request to Taskcluster fails
             # 75 - EX_TEMPFAIL, used for when the GPUs aren't available on the machine.
@@ -86,10 +87,12 @@ tasks:
 
                 # Weight & Biases publication token is stored in that secret
                 TASKCLUSTER_SECRET: project/translations/level-1/weights-and-biases
+            volumes:
+                - /builds/worker/artifacts
             artifacts:
                 - name: public/build
-                  path: artifacts
-                  type: directory
+                  path: /builds/worker/artifacts
+                  type: volume
 
             # Taskcluster proxy is required to read secrets
             taskcluster-proxy: true
@@ -118,6 +121,7 @@ tasks:
                     export MARIAN=$MOZ_FETCHES_DIR &&
                     export MOZ_FETCHES_DIR=$MOZ_FETCHES_DIR &&
                     export PYTHONPATH=$PYTHONPATH:$VCS_PATH &&
+                    export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$MOZ_FETCHES_DIR/cuda-toolkit/lib64" &&
                     $VCS_PATH/taskcluster/scripts/pipeline/train_taskcluster.py
                     backward
                     train
@@ -143,6 +147,7 @@ tasks:
         fetches:
             toolchain:
                 - marian
+                - cuda-toolkit
             corpus-merge-parallel:
                 - artifact: corpus.{src_locale}.zst
                   extract: false
 
@@ -67,17 +67,18 @@ tasks:
 
         worker-type: b-largegpu-largedisk
         worker:
+            docker-image: {"in-tree": "train"}
+            volumes:
+                - /builds/worker/artifacts
             artifacts:
                 - name: public/build
-                  path: artifacts
-                  type: directory
+                  path: /builds/worker/artifacts
+                  type: volume
             # 7 days. yes, it can take a while to clean a huge dataset
             max-run-time: 604800
             env:
                 SRC: "{src_locale}"
                 TRG: "{trg_locale}"
-                CUDA_DIR: fetches/cuda-toolkit
-                CUDNN_DIR: fetches/cuda-toolkit
             # 128 happens when cloning this repository fails
             # 75 is the unix code EX_TEMPFAIL, which indicates a temporary failure.
             # This is used when the GPUs can't be accessed. Bicleaner reverts to CPU
@@ -104,7 +105,10 @@ tasks:
                     pip install $MOZ_FETCHES_DIR/kenlm-0.0.0-cp310-cp310-linux_x86_64.whl &&
                     pip install -r {bicleaner_reqs} &&
                     export PYTHONPATH=$PYTHONPATH:$VCS_PATH &&
-                    export PATH=$PATH:~/.local/bin &&
+                    export CUDA_DIR="$MOZ_FETCHES_DIR/cuda-toolkit" &&
+                    export PATH="$PATH:~/.local/bin:$CUDA_DIR/bin" &&
+                    export XLA_FLAGS="--xla_gpu_cuda_data_dir=$CUDA_DIR" &&
+                    export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$MOZ_FETCHES_DIR/cuda-toolkit/lib64:$MOZ_FETCHES_DIR/cudnn/lib" &&
                     $VCS_PATH/pipeline/bicleaner/bicleaner.sh
                     $MOZ_FETCHES_DIR/{dataset_sanitized}
                     $TASK_WORKDIR/artifacts/{dataset_sanitized}
@@ -115,12 +119,14 @@ tasks:
             "{provider}": corpus-clean-parallel-{provider}-{dataset_sanitized}-{src_locale}-{trg_locale}
             corpus-clean-parallel-fetch-bicleaner-model: corpus-clean-parallel-fetch-bicleaner-model-{src_locale}-{trg_locale}
         fetches:
+            fetch:
+                - cudnn
             toolchain:
                 - artifact: cyhunspell
                   extract: false
                 - artifact: kenlm
                   extract: false
-                - cuda-toolkit-11
+                - cuda-toolkit
             "{provider}":
                 - artifact: "{dataset_sanitized}.{src_locale}.zst"
                   extract: false
 
@@ -101,11 +101,14 @@ tasks:
 
         worker-type: b-largegpu
         worker:
+            docker-image: {"in-tree": "train"}
             max-run-time: 2592000
+            volumes:
+                - /builds/worker/artifacts
             artifacts:
                 - name: public/build
-                  path: artifacts
-                  type: directory
+                  path: /builds/worker/artifacts
+                  type: volume
             env:
                 CUDA_DIR: fetches/cuda-toolkit
                 CUDNN_DIR: fetches/cuda-toolkit
@@ -136,6 +139,7 @@ tasks:
                     pip3 install -U pip==25.0.1 &&
                     pip3 install -r $VCS_PATH/pipeline/translate/requirements/translate-ctranslate2.txt &&
                     export PYTHONPATH=$PYTHONPATH:$VCS_PATH &&
+                    export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$MOZ_FETCHES_DIR/cuda-toolkit/lib64" &&
                     python3 $VCS_PATH/pipeline/translate/translate.py
                     --input       "$MOZ_FETCHES_DIR/file.{{this_chunk}}.zst"
                     --models_glob "$MOZ_FETCHES_DIR/*.npz" "$MOZ_FETCHES_DIR/model*/*.npz"
@@ -152,3 +156,4 @@ tasks:
         fetches:
             toolchain:
                 - marian
+                - cuda-toolkit
@@ -102,11 +102,14 @@ tasks:
 
         worker-type: b-largegpu-xlargedisk
         worker:
+            docker-image: {"in-tree": "train"}
             max-run-time: 2592000
+            volumes:
+                - /builds/worker/artifacts
             artifacts:
                 - name: public/build
-                  path: artifacts
-                  type: directory
+                  path: /builds/worker/artifacts
+                  type: volume
             env:
                 CUDA_DIR: fetches/cuda-toolkit
                 CUDNN_DIR: fetches/cuda-toolkit
@@ -133,6 +136,7 @@ tasks:
                     pip3 install -U pip==25.0.1 &&
                     pip3 install -r $VCS_PATH/pipeline/translate/requirements/translate-ctranslate2.txt &&
                     export PYTHONPATH=$PYTHONPATH:$VCS_PATH &&
+                    export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$MOZ_FETCHES_DIR/cuda-toolkit/lib64" &&
                     python3 $VCS_PATH/pipeline/translate/translate.py
                     --input       "$MOZ_FETCHES_DIR/file.{{this_chunk}}.zst"
                     --models_glob "$MOZ_FETCHES_DIR/*.npz" "$MOZ_FETCHES_DIR/model*/*.npz"