Skip to content
31 changes: 21 additions & 10 deletions examples/audio/speaker_recognition_using_cnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,22 +45,33 @@
os.environ["KERAS_BACKEND"] = "tensorflow"

import shutil
import zipfile
import numpy as np

import tensorflow as tf
import keras

from pathlib import Path
from IPython.display import display, Audio

# Get the data from https://www.kaggle.com/kongaevans/speaker-recognition-dataset/
# and save it to ./speaker-recognition-dataset.zip
# then unzip it to ./16000_pcm_speeches
"""shell
kaggle datasets download -d kongaevans/speaker-recognition-dataset
unzip -qq speaker-recognition-dataset.zip
"""

DATASET_ROOT = "16000_pcm_speeches"
ZIP_FILE = "speaker-recognition-dataset.zip"

DATASET_ROOT = Path("16000_pcm_speeches")
ZIP_FILE = Path("speaker-recognition-dataset.zip")

# Check if the dataset is already extracted
if not DATASET_ROOT.exists():
# Check if the zip file is present
if ZIP_FILE.exists():
print(f"Extracting {ZIP_FILE}...")
with zipfile.ZipFile(ZIP_FILE, "r") as zip_ref:
zip_ref.extractall(DATASET_ROOT)
print("Extraction complete.")
else:
# If neither exists, guide the user
print(f"Dataset not found. Please download it from:")
print("https://www.kaggle.com/kongaevans/speaker-recognition-dataset")
print(f"Save it as '{ZIP_FILE}' in this directory and run again.")
exit()
DATASET_ROOT = "16000_pcm_speeches"

# The folders in which we will put the audio samples and the noise samples
Expand Down
21 changes: 14 additions & 7 deletions examples/nlp/text_classification_from_scratch.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,14 @@
Let's download the data and inspect its structure.
"""

"""shell
curl -O https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
tar -xf aclImdb_v1.tar.gz
"""
dataset_url = "https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"
dataset_zip = keras.utils.get_file(
"aclImdb_v1.tar.gz",
dataset_url,
extract=True,
)
# The dataset is extracted into the same folder as the zip
dataset_dir = os.path.join(os.path.dirname(dataset_zip), "aclImdb")

"""
The `aclImdb` folder contains a `train` and `test` subfolder:
Expand Down Expand Up @@ -96,22 +100,25 @@
"""

batch_size = 32
train_dir = os.path.join(dataset_dir, "train")
test_dir = os.path.join(dataset_dir, "test")

raw_train_ds = keras.utils.text_dataset_from_directory(
"aclImdb/train",
train_dir,
batch_size=batch_size,
validation_split=0.2,
subset="training",
seed=1337,
)
raw_val_ds = keras.utils.text_dataset_from_directory(
"aclImdb/train",
train_dir,
batch_size=batch_size,
validation_split=0.2,
subset="validation",
seed=1337,
)
raw_test_ds = keras.utils.text_dataset_from_directory(
"aclImdb/test", batch_size=batch_size
test_dir, batch_size=batch_size
)

print(f"Number of batches in raw_train_ds: {raw_train_ds.cardinality()}")
Expand Down
15 changes: 4 additions & 11 deletions examples/structured_data/collaborative_filtering_movielens.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from zipfile import ZipFile


import keras
from keras import layers
Expand All @@ -48,23 +48,16 @@

# Download the actual data from http://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
# Use the ratings.csv file
# FIX: Use HTTPS and let Keras handle extraction automatically
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can remove this comment.

movielens_data_file_url = (
"http://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
"https://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
)
movielens_zipped_file = keras.utils.get_file(
"ml-latest-small.zip", movielens_data_file_url, extract=False
"ml-latest-small.zip", movielens_data_file_url, extract=True
)
keras_datasets_path = Path(movielens_zipped_file).parents[0]
movielens_dir = keras_datasets_path / "ml-latest-small"

# Only extract the data the first time the script is run.
if not movielens_dir.exists():
with ZipFile(movielens_zipped_file, "r") as zip:
# Extract files
print("Extracting all the files now...")
zip.extractall(path=keras_datasets_path)
print("Done!")

ratings_file = movielens_dir / "ratings.csv"
df = pd.read_csv(ratings_file)

Expand Down