keras-team · jaytiwarihub · Jan 3, 2026 · Jan 3, 2026 · Jan 5, 2026 · Jan 9, 2026
diff --git a/examples/audio/speaker_recognition_using_cnn.py b/examples/audio/speaker_recognition_using_cnn.py
@@ -45,22 +45,33 @@
 os.environ["KERAS_BACKEND"] = "tensorflow"
 
 import shutil
+import zipfile
 import numpy as np
-
 import tensorflow as tf
 import keras
-
 from pathlib import Path
 from IPython.display import display, Audio
 
-# Get the data from https://www.kaggle.com/kongaevans/speaker-recognition-dataset/
-# and save it to ./speaker-recognition-dataset.zip
-# then unzip it to ./16000_pcm_speeches
-"""shell
-kaggle datasets download -d kongaevans/speaker-recognition-dataset
-unzip -qq speaker-recognition-dataset.zip
-"""
-
+DATASET_ROOT = "16000_pcm_speeches"
+ZIP_FILE = "speaker-recognition-dataset.zip"
+
+DATASET_ROOT = Path("16000_pcm_speeches")
+ZIP_FILE = Path("speaker-recognition-dataset.zip")
+
+# Check if the dataset is already extracted
+if not DATASET_ROOT.exists():
+    # Check if the zip file is present
+    if ZIP_FILE.exists():
+        print(f"Extracting {ZIP_FILE}...")
+        with zipfile.ZipFile(ZIP_FILE, "r") as zip_ref:
+            zip_ref.extractall(DATASET_ROOT)
+        print("Extraction complete.")
+    else:
+        # If neither exists, guide the user
+        print(f"Dataset not found. Please download it from:")
+        print("https://www.kaggle.com/kongaevans/speaker-recognition-dataset")
+        print(f"Save it as '{ZIP_FILE}' in this directory and run again.")
+        exit()
 DATASET_ROOT = "16000_pcm_speeches"
 
 # The folders in which we will put the audio samples and the noise samples

diff --git a/examples/nlp/text_classification_from_scratch.py b/examples/nlp/text_classification_from_scratch.py
@@ -35,10 +35,14 @@
 Let's download the data and inspect its structure.
 """
 
-"""shell
-curl -O https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
-tar -xf aclImdb_v1.tar.gz
-"""
+dataset_url = "https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"
+dataset_zip = keras.utils.get_file(
+    "aclImdb_v1.tar.gz",
+    dataset_url,
+    extract=True,
+)
+# The dataset is extracted into the same folder as the zip
+dataset_dir = os.path.join(os.path.dirname(dataset_zip), "aclImdb")
 
 """
 The `aclImdb` folder contains a `train` and `test` subfolder:
@@ -96,22 +100,25 @@
 """
 
 batch_size = 32
+train_dir = os.path.join(dataset_dir, "train")
+test_dir = os.path.join(dataset_dir, "test")
+
 raw_train_ds = keras.utils.text_dataset_from_directory(
-    "aclImdb/train",
+    train_dir,
     batch_size=batch_size,
     validation_split=0.2,
     subset="training",
     seed=1337,
 )
 raw_val_ds = keras.utils.text_dataset_from_directory(
-    "aclImdb/train",
+    train_dir,
     batch_size=batch_size,
     validation_split=0.2,
     subset="validation",
     seed=1337,
 )
 raw_test_ds = keras.utils.text_dataset_from_directory(
-    "aclImdb/test", batch_size=batch_size
+    test_dir, batch_size=batch_size
 )
 
 print(f"Number of batches in raw_train_ds: {raw_train_ds.cardinality()}")

diff --git a/examples/structured_data/collaborative_filtering_movielens.py b/examples/structured_data/collaborative_filtering_movielens.py
@@ -36,7 +36,7 @@
 from pathlib import Path
 import matplotlib.pyplot as plt
 import numpy as np
-from zipfile import ZipFile
+
 
 import keras
 from keras import layers
@@ -48,23 +48,16 @@
 
 # Download the actual data from http://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
 # Use the ratings.csv file
+# FIX: Use HTTPS and let Keras handle extraction automatically
 movielens_data_file_url = (
-    "http://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
+    "https://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
 )
 movielens_zipped_file = keras.utils.get_file(
-    "ml-latest-small.zip", movielens_data_file_url, extract=False
+    "ml-latest-small.zip", movielens_data_file_url, extract=True
 )
 keras_datasets_path = Path(movielens_zipped_file).parents[0]
 movielens_dir = keras_datasets_path / "ml-latest-small"
 
-# Only extract the data the first time the script is run.
-if not movielens_dir.exists():
-    with ZipFile(movielens_zipped_file, "r") as zip:
-        # Extract files
-        print("Extracting all the files now...")
-        zip.extractall(path=keras_datasets_path)
-        print("Done!")
-
 ratings_file = movielens_dir / "ratings.csv"
 df = pd.read_csv(ratings_file)