diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2acd8b1..bba386e 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -8,7 +8,7 @@ jobs:
     strategy:
       max-parallel: 1
       matrix:
-        python-version: [3.8]
+        python-version: [3.7, 3.8, 3.9]
 
     steps:
     - uses: actions/checkout@v1
diff --git a/.github/workflows/cov.yml b/.github/workflows/cov.yml
index d9f6909..a2c4c0c 100644
--- a/.github/workflows/cov.yml
+++ b/.github/workflows/cov.yml
@@ -19,10 +19,7 @@ jobs:
       run: |
         pip install pytest
         pip install pytest-cov
-        pip install requests
-        pip install selenium
-        pip install progress
-        pip install pandas
+        pip install .
         pytest --cov=./ --cov-report=xml
     - name: Upload coverage to Codecov
       uses: codecov/codecov-action@v1
diff --git a/pull_fb/auth.py b/pull_fb/auth.py
new file mode 100644
index 0000000..cb90099
--- /dev/null
+++ b/pull_fb/auth.py
@@ -0,0 +1,33 @@
+import requests
+import browser_cookie3
+
+
+def get_auth_cookies():
+
+    print(u"\U0001f512" + " Getting authentication cookies...")
+
+    return browser_cookie3.load(domain_name=".facebook.com")
+
+
+def check_auth(cookies):
+
+    login_url = "https://partners.facebook.com/data_for_good/"
+
+    r = requests.get(login_url, cookies=cookies)
+
+    check_auth_headers(r.headers, login_url)
+
+
+def check_auth_headers(headers, login_url):
+
+    if 'x-fb-rlafr' in headers.keys():
+
+        print(u"\U00002705" + " Authenticated.")
+
+        return True
+
+    else:
+
+        print(u"\U0000274c" + f" Not authenticated. You must log in to {login_url} in your default browser.")
+
+        return False
diff --git a/pull_fb/collection.py b/pull_fb/collection.py
new file mode 100644
index 0000000..216b45f
--- /dev/null
+++ b/pull_fb/collection.py
@@ -0,0 +1,141 @@
+import os
+import requests
+import zipfile
+import glob
+from datetime import datetime
+import re
+
+
+def get_outfn(dataset_id, cwd=os.getcwd()):
+
+    out_fn = cwd + "/" + dataset_id + ".csv.zip"
+
+    return out_fn
+
+
+def write_zipfile(out_fn, data):
+
+    try:
+
+        print(u"\U0001f4e5" + " Writing data...")
+
+        with open(out_fn, 'wb') as fd:
+            for chunk in data:
+                fd.write(chunk)
+
+    except Exception:
+
+        raise Exception("Failed to write output zipfile.")
+
+
+def unzip_data(out_fn, out_dir=os.getcwd()):
+
+    print(u"\U0001f4a5" + " Extracting data...")
+
+    try:
+
+        with zipfile.ZipFile(out_fn, 'r') as zip_ref:
+            zip_ref.extractall(out_dir)
+
+    except Exception:
+
+        raise Exception("Failed to extract files.")
+
+
+def get_file_dataset_ids(files: list):
+
+    try:
+
+        dataset_ids = [x.split("/")[-1].split("_")[0] for x in files]
+
+    except Exception:
+
+        raise Exception("Unable to parse dataset ids.")
+
+    return dataset_ids
+
+
+def get_file_dates(files: list):
+
+    try:
+
+        dates = [x.split("/")[-1].split("_")[1].replace(".csv", "")
+                 for x in files]
+
+        dates = [datetime.strptime(x, "%Y-%m-%d") for x in dates]
+
+    except Exception:
+
+        raise Exception("Unable to parse dates.")
+
+    return dates
+
+
+def set_file_dataset_ids(files, dataset_id):
+
+    print(u"\U0001f4c4" + " Renaming files...")
+
+    for file in files:
+
+        new_fn = re.sub(
+            r"\d{15}(_\d{4}-\d{2}-\d{2}_\d{4}.csv)",
+            rf"{dataset_id}\1",
+            file)
+
+        os.rename(file, new_fn)
+
+
+def request_data(dataset_id, start_date, end_date, cookies):
+
+    try:
+
+        url = "https://partners.facebook.com/data_for_good/bulk_download/?"
+        query = f"resource_type=downloadable_csv&start_date={start_date}&end_date={end_date}&dataset_id={dataset_id}"
+
+        print(u"\U0001f30e" + f" Trying {url + query}...")
+
+        r = requests.get(url + query,
+                         cookies=cookies)
+
+    except Exception:
+
+        raise Exception("Unable to request data.")
+
+    return r
+
+
+def download_data(dataset_id, start_date, end_date, cookies):
+
+    r = request_data(dataset_id, start_date, end_date, cookies)
+
+    out_fn = get_outfn(dataset_id)
+
+    write_zipfile(out_fn, r.iter_content(chunk_size=128))
+
+    unzip_data(out_fn)
+
+    os.remove(out_fn)
+
+    files = glob.glob(os.getcwd() + "/*.csv")
+
+    set_file_dataset_ids(files, dataset_id)
+
+    print(u"\U0001f389" + f" Done! Collection size: {len(files)} files.")
+
+
+def get_update_config():
+
+    files = glob.glob(os.getcwd() + "/*.csv")
+
+    dataset_ids = get_file_dataset_ids(files)
+    dates = get_file_dates(files)
+
+    start_date = datetime.strftime(max(dates), "%Y-%m-%d")
+    end_date = datetime.strftime(datetime.now(), "%Y-%m-%d")
+    dataset_id = dataset_ids[0]
+
+    return {
+        "start_date": start_date,
+        "end_date": end_date,
+        "dataset_id": dataset_id
+    }
diff --git a/pull_fb/credentials.py b/pull_fb/credentials.py
deleted file mode 100644
index ca6ad4a..0000000
--- a/pull_fb/credentials.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from getpass import getpass
-
-
-def get_credentials(username, password):
-    """
-    Prompt for Facebook login get_credentials
-
-    Future: add option to cache encrypted credentials
-    """
-
-    # Prompt for username
-    if username is None:
-
-        username = input("Email: ")
-
-    # Prompt for password
-    if password is None:
-
-        password = getpass("Password: ")
-
-    # Return dictionary of username and password
-    return {"email": username, "password": password}
diff --git a/pull_fb/driver.py b/pull_fb/driver.py
deleted file mode 100644
index 5516b5e..0000000
--- a/pull_fb/driver.py
+++ /dev/null
@@ -1,164 +0,0 @@
-import time
-import requests
-from datetime import datetime
-from selenium import webdriver
-from progress.bar import Bar
-from io import StringIO
-import pandas as pd
-
-
-def authenticate_driver(keys: dict,
-                        driver_path: str,
-                        driver_flags: list,
-                        driver_prefs: dict):
-
-    print('Authenticating webdriver...')
-
-    # Define options for web driver
-    chrome_options = webdriver.ChromeOptions()
-
-    # Apply preferences to chrome driver
-    chrome_options.add_experimental_option("prefs", driver_prefs)
-
-    # Add individual flags to chromedriver prefs
-    for flag in driver_flags:
-
-        chrome_options.add_argument(flag)
-
-    driver = webdriver.Chrome(
-        executable_path=driver_path, options=chrome_options
-    )
-
-    # Login url for Geoinsights platform
-    geoinsights_url = "https://www.facebook.com/login/?next=https%3A%2F%2Fwww.facebook.com%2Fgeoinsights-portal%2F"
-
-    # Access login url with webdriver
-    driver.get(geoinsights_url)
-
-    # Pause for page load (and cookie acceptance)
-    time.sleep(5)
-
-    # Try to accept cookies. On failure, pass
-    try:
-
-        driver.find_element_by_xpath('//button[@data-cookiebanner="accept_button"]').click()
-
-    except Exception:
-
-        pass
-
-    # Add username in username form field
-    driver.find_element_by_xpath('//*[@id="email"]').send_keys(keys["email"])
-
-    # Add password in password form field
-    driver.find_element_by_xpath('//*[@id="pass"]').send_keys(keys["password"])
-
-    # Click login button
-    driver.find_element_by_xpath('//*[@id="loginbutton"]').click()
-
-    time.sleep(1)
-
-    # Get cookies from authenticated web driver
-    request_cookies_browser = driver.get_cookies()
-
-    driver.quit()
-
-    print('Successfully authenticated webdriver.')
-
-    return(request_cookies_browser)
-
-
-def authenticate_session(request_cookies_browser: list):
-
-    # Create a nes requests session
-    s = requests.Session()
-
-    # Pass the cookies from the authenticated webdriver to the session
-    [s.cookies.set(c['name'], c['value']) for c in request_cookies_browser]
-
-    return s
-
-
-def download_data(download_urls: list,
-                  area: str,
-                  outdir: str,
-                  request_cookies_browser: list):
-
-    s = authenticate_session(request_cookies_browser)
-
-    # Start download bar
-    print("\n")
-    bar = Bar("Downloading", max=len(download_urls))
-
-    # Store unsuccessful download file names
-    download_failed = []
-
-    # For each download url, download dataset
-    for i, url in enumerate(download_urls):
-
-        # Request dataset from URL
-        resp = s.get(url["url"])
-
-        # Define output file name
-        out_fn = format_out_fn(outdir, area, url["date"])
-
-        download_failed = write_outfile(resp, out_fn, download_failed)
-
-        time.sleep(1)
-
-        # Update progress bar
-        bar.next()
-
-    # Close progress bar
-    bar.finish()
-
-    if len(download_failed) > 0:
-
-        print('Failed to download {} files. Please try again later.'.format(len(download_failed)))
-
-
-def write_outfile(resp: requests.Response, out_fn: str, download_failed: list):
-
-    if resp.status_code == 200:
-
-        try:
-
-            # try to convert response data to csv with >1 row
-            data = response_as_dataframe(resp.text)
-
-            # Write response data as csv
-            data.to_csv(out_fn)
-
-        except Exception:
-
-            # Append failed filename download
-            download_failed.append(out_fn)
-
-            pass
-
-    return download_failed
-
-
-def response_as_dataframe(text: str):
-
-    data = StringIO(text)
-
-    df = pd.read_csv(data)
-
-    try:
-
-        assert len(df.index) > 1
-
-    except Exception as e:
-
-        raise e
-
-    return(df)
-
-
-def format_out_fn(outdir: str, area: str, date: datetime):
-
-    # Define new file name as AREA_DATE.csv
-    new_name = outdir + "/" + area + date.strftime("_%Y_%m_%d_%H%M") + ".csv"
-
-    return(new_name)
diff --git a/pull_fb/pull_fb.py b/pull_fb/pull_fb.py
index 0810747..64024a9 100644
--- a/pull_fb/pull_fb.py
+++ b/pull_fb/pull_fb.py
@@ -1,151 +1,77 @@
 import click
-import os
 from datetime import datetime
-import pull_fb.utils as utils
-import pull_fb.url as url
-import pull_fb.driver as driver
-import pull_fb.credentials as credentials
-
-
-@click.command()
-@click.option("-d", "--dataset_name", help="Dataset name to be downloaded.")
-@click.option("-a", "--area", help="Area to be downloaded.")
-@click.option(
-    "-o",
-    "--outdir",
-    help="Outfile directory. Default: current directory.",
-    default=os.getcwd(),
-)
-@click.option(
-    "-e",
-    "--end_date",
-    help="Dataset end date. Default: datetime.now().",
-    default=datetime.now(),
-)
-@click.option(
-    "-f",
-    "--frequency",
-    help="Dataset update frequency (hours). Default: 8.",
-    default=8
-)
-@click.option(
-    "-driver",
-    "--driver_path",
-    help="Path to webdriver.",
-    default="/Applications/chromedriver",
-)
-@click.option(
-    "-config",
-    "--config_path",
-    help=".config path. Default is requested from the repo, otherwise is read from provided local path or other http connection.",
-    default="https://raw.githubusercontent.com/hamishgibbs/pull_facebook_data_for_good/master/.config",
-)
-@click.option(
-    "-user",
-    "--username",
-    help="Facebook username.",
-    default=None
-)
-@click.option(
-    "-pass",
-    "--password",
-    help="Facebook password.",
-    default=None
-)
-@click.option(
-    "-driver_flags",
-    "--driver_flags",
-    help="Flags passed to chromedriver.",
-    multiple=True,
-    default=["--headless"]
+
+from pull_fb.auth import (
+    get_auth_cookies,
+    check_auth
 )
-@click.option(
-    "-driver_prefs",
-    "--driver_prefs",
-    help="Preferences passed to chromedriver.",
-    default={"download.default_directory": os.getcwd()}
+
+from pull_fb.collection import (
+    download_data,
+    get_update_config,
 )
-def cli(
-        dataset_name,
-        area,
-        outdir=None,
-        end_date=None,
-        frequency=None,
-        driver_path=None,
-        config_path=None,
-        username=None,
-        password=None,
-        driver_flags=None,
-        driver_prefs=None):
-    """
-    Entry point for the pull_fb cli.
-
-    """
-
-    pull_fb(dataset_name,
-            area,
-            outdir,
-            end_date,
-            frequency,
-            driver_path,
-            config_path,
-            username,
-            password,
-            driver_flags,
-            driver_prefs)
-
-
-def pull_fb(dataset_name,
-            area,
-            outdir: str = os.getcwd(),
-            end_date: datetime = datetime.now(),
-            frequency: int = 8,
-            driver_path: str = "/Applications/chromedriver",
-            config_path: str = "https://raw.githubusercontent.com/hamishgibbs/pull_facebook_data_for_good/master/.config",
-            username: str = None,
-            password: str = None,
-            driver_flags: list = ["--headless"],
-            driver_prefs: dict = {"download.default_directory": os.getcwd()}):
-
-    print("Reading dataset configuration...")
-    # Get config variables from repository
-    config = utils.get_download_variables(dataset_name,
-                                          area,
-                                          end_date,
-                                          config_path)
-
-    # Get date sequence between start and end dates
-    data_dates = utils.get_file_dates(
-        config["start_date"], config["end_date"], frequency
+
+
+@click.group()
+def cli():
+    pass
+
+
+@click.group()
+def auth():
+    pass
+
+
+@click.group()
+def collection():
+    pass
+
+
+@auth.command('status')
+def auth_status():
+
+    cookies = get_auth_cookies()
+
+    check_auth(cookies)
+
+
+@collection.command("init")
+@click.option('--dataset_id', required=True)
+@click.option('--start_date', required=True)
+@click.option('--end_date')
+def collection_init(dataset_id,
+                    start_date,
+                    end_date=datetime.strftime(datetime.now(), "%Y-%m-%d")):
+
+    cookies = get_auth_cookies()
+
+    download_data(
+        dataset_id,
+        start_date,
+        end_date,
+        cookies
     )
 
-    # Get downloaded dates from outdir
-    existing_dates = utils.get_existing_dates(outdir, area)
 
-    # Only download dates that have not already been downloaded
-    download_dates = list(set(data_dates).difference(set(existing_dates)))
+@collection.command("update")
+def collection_update():
 
-    download_dates.sort()
+    cookies = get_auth_cookies()
 
-    # Get url of each of dataset
-    download_urls = url.format_urls(dataset_name,
-                                    config["dataset_id"],
-                                    download_dates)
+    config = get_update_config()
 
-    # Get credentials here
-    keys = credentials.get_credentials(username, password)
+    download_data(
+        config["dataset_id"],
+        config["start_date"],
+        config["end_date"],
+        cookies
+    )
 
-    # Authenticate webdriver
-    request_cookies_browser = driver.authenticate_driver(keys,
-                                                         driver_path,
-                                                         driver_flags,
-                                                         driver_prefs)
 
-    # Download url sequence and move to output directory
-    driver.download_data(download_urls,
-                         area,
-                         outdir,
-                         request_cookies_browser)
+cli.add_command(auth)
+cli.add_command(collection)
 
-    # Success message
-    print('Done.')
+# add pull_fb collection audit
+# to check - no duplicate files
+# all files are present in range
+# Only one dataset id
diff --git a/pull_fb/url.py b/pull_fb/url.py
deleted file mode 100644
index b9a7d1f..0000000
--- a/pull_fb/url.py
+++ /dev/null
@@ -1,37 +0,0 @@
-def format_urls(dataset_name: str, dataset_id: str, download_dates: list):
-    """Function to format urls with the appropriate format"""
-
-    # Define base urls for each supported dataset
-    # Move this into a config in the future
-    base_urls = {
-        "TileMovement": "https://www.facebook.com/geoinsights-portal/downloads/vector/?id={}&ds={}",
-        "TilePopulation": "https://www.facebook.com/geoinsights-portal/downloads/raster/?id={}&ds={}"
-    }
-
-    # Define date formats for download urls of each dataset
-    date_formats = {
-        "TileMovement": "%Y-%m-%d+%H%M",
-        "TilePopulation": "%Y-%m-%d+%H%M"
-    }
-
-    # Define the appropriate base_url
-    base_url = base_urls[dataset_name]
-
-    # Define the appropriate date_format
-    date_format = date_formats[dataset_name]
-
-    # List of download urls
-    urls = []
-
-    # For each download date, format a download url and record dataset date
-    for date in download_dates:
-
-        urls.append(
-            {
-                "url": base_url.format(dataset_id, date.strftime(date_format)),
-                "date": date,
-            }
-        )
-
-    # Return a list of url, date pair dictionaries
-    return urls
diff --git a/pull_fb/utils.py b/pull_fb/utils.py
deleted file mode 100644
index 42cdc10..0000000
--- a/pull_fb/utils.py
+++ /dev/null
@@ -1,159 +0,0 @@
-import requests
-import os
-import glob
-from datetime import datetime, timedelta
-
-
-def get_config(config_path):
-    """
-    Funciton to get configuration file from online repository
-    """
-
-    # Try to get config file or raise exception
-    try:
-        if config_path.startswith('http'):
-            r = requests.get(config_path)
-            config_var = r.text.split("\n")[:-1]
-        else:
-            with open(config_path) as f:
-                r = f.readlines()
-            config_var = [x.replace("\n", "") for x in r]
-
-    except requests.exceptions.RequestException as e:
-
-        raise SystemExit(e)
-
-    # Extract config variables to dictionary or raise Exception
-    try:
-        config = dict(x.split("=") for x in config_var)
-
-    except Exception:
-
-        raise Exception("Malformed .config file.")
-
-    # Return config variables as a dictionary
-    return(config)
-
-
-def get_download_variables(dataset: str, country: str, end_date: str, config_path: str):
-    """
-    Function to get downlaod variable for a particular dataset from config file
-
-    This could be simplified
-    """
-
-    # Get config variables from repository
-    config = get_config(config_path)
-
-    # Extract dataset id or raise missing dataset error
-    try:
-
-        dataset_id = config["_".join([country, dataset, "ID"])]
-
-    except Exception:
-
-        raise KeyError(
-            "No config value for {}. To add a new dataset, see the Readme.".format(
-                "_".join([country, dataset, "ID"])
-            )
-        )
-
-    # Extract dataset origin or raise missing dataset error
-    try:
-
-        dataset_origin = config["_".join([country, dataset, "Origin"])]
-
-    except Exception:
-
-        raise KeyError(
-            "No config value for {}.  To add a new dataset, see the Readme.".format(
-                "_".join([country, dataset, "Origin"])
-            )
-        )
-
-    # Convert datset origin string to datetime object
-    dataset_origin = date_str_to_datetime(dataset_origin)
-
-    # Return config variables as dict
-    return {
-        "dataset_id": dataset_id,
-        "start_date": dataset_origin,
-        "end_date": end_date,
-    }
-
-
-def date_str_to_datetime(date: str):
-    """
-    Function to parse origin date in the format '%Y_%m_%d_%H' or '%Y_%m_%d'
-    """
-
-    # List of recognized date formats
-    formats = ["%Y_%m_%d_%H%M", "%Y_%m_%d_%H", "%Y_%m_%d"]
-
-    # Try to match formats until one succeeds
-    for format in formats:
-
-        try:
-
-            # Return datetime object
-            return datetime.strptime(date, format)
-
-        except ValueError:
-
-            pass
-
-    # Raise ValueError for unknown date format
-    raise ValueError("Unknown date format.")
-
-
-def get_file_dates(start_date, end_date, frequency):
-    """
-    Function to get date sequence between start_date and end_date with a
-    given frequency
-
-    This could be replaced by a datetime function
-    """
-
-    # List to store dataset dates
-    data_dates = []
-
-    # Define start of date list
-    date = start_date
-
-    # Loop through date range, incrementing by `frequency` hours
-    while date < end_date:
-
-        data_dates.append(date)
-
-        date = date + timedelta(hours=frequency)
-
-    # Return list of dataset dates
-    return data_dates
-
-
-def get_existing_dates(outdir: str, area: str):
-    """
-    Function to get dates from files in the outdir
-    """
-
-    # Extract file names from csv files in outdir (only for current area)
-    date_str = [os.path.basename(x) for x in glob.glob(outdir + "/" + area + "_" + "*.csv")]
-
-    # Remove area from file name
-    date_str = [x.replace(area + "_", "") for x in date_str]
-
-    # Remove extension from file name
-    date_str = [x.replace(".csv", "") for x in date_str]
-
-    # Convert date string to datetime object
-    date_str = [date_str_to_datetime(x) for x in date_str]
-
-    # If any existing files are found, notify user
-    if len(date_str) > 0:
-
-        message = "Found existing collection in output directory ({} files).\nOnly new files will be downloaded."
-
-        print(message.format(str(len(date_str))))
-
-    # Return a list of the dates of datasets that have already been downloaded
-    return date_str
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..e195370
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,22 @@
+[tool.poetry]
+name = "pull_fb"
+version = "0.1.0"
+description = "Imitate an API for downloading data from Facebook Data For Good"
+authors = ["hamishgibbs <hamishgibbs@gmail.com>"]
+license = "MIT"
+
+[tool.poetry.dependencies]
+python = "^3.7"
+requests = "^2.25.1"
+browser-cookie3 = "^0.12.1"
+click = "^7.1.2"
+
+[tool.poetry.dev-dependencies]
+pytest = "^6.2.4"
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.poetry.scripts]
+pull_fb="pull_fb.pull_fb:cli"
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 359a625..0000000
--- a/setup.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import setuptools
-
-setuptools.setup(
-    name="pull_fb",
-    version="0.0.1",
-    author="Hamish Gibbs",
-    author_email="Hamish.Gibbs@lshtm.ac.uk",
-    description="CLI for downloading data from Facebook data for good.",
-    url="https://github.com/hamishgibbs/pull_facebook_data_for_good",
-    packages=setuptools.find_packages(),
-    install_requires=[
-        "Click",
-        "requests",
-        "pandas",
-        "progress"
-    ],
-    classifiers=[
-        "Programming Language :: Python :: 3",
-        "License :: OSI Approved :: MIT License",
-        "Operating System :: OS Independent",
-    ],
-    python_requires=">=3.6",
-    entry_points="""
-        [console_scripts]
-        pull_fb=pull_fb.pull_fb:cli
-    """,
-)
diff --git a/tests/test_check_auth_headers.py b/tests/test_check_auth_headers.py
new file mode 100644
index 0000000..b363095
--- /dev/null
+++ b/tests/test_check_auth_headers.py
@@ -0,0 +1,17 @@
+from pull_fb.auth import check_auth_headers
+
+
+def test_check_auth_headers_true():
+
+    headers = {
+        "x-fb-rlafr": "test"
+    }
+
+    assert check_auth_headers(headers, "a")
+
+
+def test_check_auth_headers_false():
+
+    headers = {}
+
+    assert not check_auth_headers(headers, "a")
diff --git a/tests/test_credentials.py b/tests/test_credentials.py
deleted file mode 100644
index 0b1658d..0000000
--- a/tests/test_credentials.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from pull_fb import credentials
-
-
-def test_credentials_filled():
-
-    res = credentials.get_credentials('a', 'b')
-
-    assert type(res) is dict
-
-
-def test_credentials_usenrame_none(monkeypatch):
-
-    monkeypatch.setattr('builtins.input', lambda _: "example@gmail.com")
-
-    # go about using input() like you normally would:
-    res = credentials.get_credentials(None, 'b')
-
-    assert type(res) is dict
-    assert res['email'] == "example@gmail.com"
diff --git a/tests/test_driver.py b/tests/test_driver.py
deleted file mode 100644
index 39476aa..0000000
--- a/tests/test_driver.py
+++ /dev/null
@@ -1,93 +0,0 @@
-import os
-import pytest
-from datetime import datetime
-from pull_fb import driver
-import requests
-import pandas as pd
-
-
-@pytest.fixture()
-def sample_csv_response():
-
-    return 'a,b\n1,2\n3,4'
-
-
-@pytest.fixture()
-def mock_csv_response():
-
-    class Mock_Response():
-
-        def __init__(self):
-
-            self.status_code = 200
-            self.text = 'a,b\n1,2\n3,4'
-
-    return Mock_Response()
-
-
-@pytest.fixture(scope="session")
-def tmp_path(tmpdir_factory):
-
-    path = tmpdir_factory.mktemp("tmp")
-
-    return path
-
-
-def test_format_out_fn():
-
-    res = driver.format_out_fn('a', 'b', datetime(2000, 1, 1, 0))
-
-    assert res == 'a/b_2000_01_01_0000.csv'
-
-
-def test_response_as_dataframe_reads_csv(sample_csv_response):
-
-    res = driver.response_as_dataframe(sample_csv_response)
-
-    assert type(res) is pd.DataFrame
-
-
-def test_response_as_dataframe_raises_one_row():
-
-    with pytest.raises(AssertionError):
-
-        driver.response_as_dataframe('a,b\n1,2')
-
-
-def test_response_as_dataframe_fails_html():
-
-    with pytest.raises(AssertionError):
-
-        driver.response_as_dataframe('<div>Other stuff</div>')
-
-
-def test_authenticate_session_with_cookies():
-
-    request_cookies_browser = [{'name': 'item', 'value': 'item'}]
-
-    res = driver.authenticate_session(request_cookies_browser)
-
-    assert type(res) is requests.Session
-
-
-def test_write_outfile(mock_csv_response):
-
-    res = driver.write_outfile(mock_csv_response, 'test.csv', [])
-
-    assert type(res) is list
-
-    os.remove('test.csv')
-
-
-# test download_data
-def test_download_data_tmp_dir(tmp_path):
-
-    download_urls = [{"url": "https://github.com/hamishgibbs/uk_tier_data/raw/master/output/uk_tier_data_parliament_2020_10_25_1606.csv",
-                     "date": datetime(2020, 3, 1, 0)}]
-
-    driver.download_data(download_urls,
-                         "Britain",
-                         str(tmp_path),
-                         [])
-
-    assert os.path.exists(str(tmp_path) + '/Britain_2020_03_01_0000.csv')
diff --git a/tests/test_get_file_dataset_ids.py b/tests/test_get_file_dataset_ids.py
new file mode 100644
index 0000000..a15010b
--- /dev/null
+++ b/tests/test_get_file_dataset_ids.py
@@ -0,0 +1,20 @@
+import pytest
+
+from pull_fb.collection import get_file_dataset_ids
+
+
+def test_get_file_dataset_ids():
+
+    files = ["a/b/c/1_2020-01-01_0000.csv"]
+
+    res = get_file_dataset_ids(files)
+
+    assert res == ["1"]
+
+
+def test_get_file_dataset_ids_raises():
+
+    files = [None]
+
+    with pytest.raises(Exception):
+        get_file_dataset_ids(files)
diff --git a/tests/test_get_file_dates.py b/tests/test_get_file_dates.py
new file mode 100644
index 0000000..b412a38
--- /dev/null
+++ b/tests/test_get_file_dates.py
@@ -0,0 +1,21 @@
+import pytest
+from datetime import datetime
+
+from pull_fb.collection import get_file_dates
+
+
+def test_get_file_dates_ids():
+
+    files = ["a/b/c/1_2020-01-01_0000.csv"]
+
+    res = get_file_dates(files)
+
+    assert res == [datetime(2020, 1, 1)]
+
+
+def test_get_file_dates_raises():
+
+    files = [None]
+
+    with pytest.raises(Exception):
+        get_file_dates(files)
diff --git a/tests/test_get_outfn.py b/tests/test_get_outfn.py
new file mode 100644
index 0000000..489c576
--- /dev/null
+++ b/tests/test_get_outfn.py
@@ -0,0 +1,8 @@
+from pull_fb.collection import get_outfn
+
+
+def test_get_outfn():
+
+    res = get_outfn("1", "a")
+
+    assert res == "a/1.csv.zip"
diff --git a/tests/test_set_file_dataset_ids.py b/tests/test_set_file_dataset_ids.py
new file mode 100644
index 0000000..51e333d
--- /dev/null
+++ b/tests/test_set_file_dataset_ids.py
@@ -0,0 +1,23 @@
+import os
+from tests.utils import tmpdir
+from pull_fb.collection import set_file_dataset_ids
+
+
+def test_set_file_dataset_ids(tmpdir):
+
+    fn = str(tmpdir + "/123456789123456_2020-01-01_0000.csv")
+
+    with open(fn, "w") as f:
+        f.write("text")
+
+    assert os.path.exists(fn)
+
+    set_file_dataset_ids([fn], "123")
+
+    fn_exp = tmpdir + "/123_2020-01-01_0000.csv"
+
+    assert os.path.exists(
+        str(fn_exp)
+    )
+
+    os.remove(fn_exp)
diff --git a/tests/test_unzip_data.py b/tests/test_unzip_data.py
new file mode 100644
index 0000000..2dbd9dc
--- /dev/null
+++ b/tests/test_unzip_data.py
@@ -0,0 +1,38 @@
+import os
+import pytest
+import shutil
+from tests.utils import tmpdir
+
+from pull_fb.collection import unzip_data
+
+
+def test_unzip_data(tmpdir):
+
+    fn = str(tmpdir + "/123456789123456_2020-01-01_0000.csv")
+    zip_fn = fn + ".zip"
+
+    with open(fn, "w") as f:
+        f.write("text")
+
+    assert os.path.exists(fn)
+
+    shutil.make_archive(fn, 'zip', tmpdir)
+
+    assert os.path.exists(zip_fn)
+
+    os.remove(fn)
+
+    assert not os.path.exists(fn)
+
+    unzip_data(zip_fn, tmpdir)
+
+    assert os.path.exists(fn)
+
+
+def test_unzip_data_raises(tmpdir):
+
+    fn = str(tmpdir + "/123456789123456_2020-01-01_0000.csv")
+    zip_fn = fn + ".zip"
+
+    with pytest.raises(Exception):
+        unzip_data(zip_fn, tmpdir)
diff --git a/tests/test_url.py b/tests/test_url.py
deleted file mode 100644
index 919af62..0000000
--- a/tests/test_url.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import pytest
-from datetime import datetime
-from pull_fb import url
-
-
-@pytest.fixture
-def tilemovement_res():
-
-    return url.format_urls('TileMovement', '123', [datetime(2000, 1, 1)])
-
-
-@pytest.fixture
-def tilepopulation_res():
-
-    return url.format_urls('TilePopulation', '123', [datetime(2000, 1, 1)])
-
-
-def test_format_urls_is_list(tilemovement_res):
-
-    assert type(tilemovement_res) is list
-
-
-def test_format_urls_item_is_dict(tilemovement_res):
-
-    assert type(tilemovement_res[0]) is dict
-
-
-def test_format_urls_url_is_str(tilemovement_res):
-
-    assert type(tilemovement_res[0]['url']) is str
-
-
-def test_format_urls_url_tilemovement_has_vector(tilemovement_res):
-
-    assert 'vector' in tilemovement_res[0]['url']
-
-
-def test_format_urls_url_tilepopulation_has_raster(tilepopulation_res):
-
-    assert 'raster' in tilepopulation_res[0]['url']
diff --git a/tests/test_utils.py b/tests/test_utils.py
deleted file mode 100644
index bb8bb95..0000000
--- a/tests/test_utils.py
+++ /dev/null
@@ -1,201 +0,0 @@
-import pytest
-from pull_fb import utils
-from datetime import datetime
-
-
-@pytest.fixture
-def example_date_config():
-    return {
-        "start_date": datetime(2020, 1, 1),
-        "end_date": datetime(2020, 1, 2),
-        "frequency": 8,
-    }
-
-
-@pytest.fixture(scope="session")
-def local_config_file(tmpdir_factory):
-
-    fn = tmpdir_factory.mktemp("tmp").join(".config")
-
-    with open(fn, 'w') as f:
-
-        f.write('Britain_TileMovement_ID=1671212783027520\nBritain_TileMovement_Origin=2020_03_10_0')
-
-    return fn
-
-
-@pytest.fixture(scope="session")
-def local_config_file_missing_id(tmpdir_factory):
-
-    fn = tmpdir_factory.mktemp("tmp").join(".config")
-
-    with open(fn, 'w') as f:
-
-        f.write('Britain_TileMovement_Origin=2020_03_10_0')
-
-    return fn
-
-
-@pytest.fixture(scope="session")
-def local_config_file_missing_origin(tmpdir_factory):
-
-    fn = tmpdir_factory.mktemp("tmp").join(".config")
-
-    with open(fn, 'w') as f:
-
-        f.write('Britain_TileMovement_ID=1671212783027520')
-
-    return fn
-
-
-@pytest.fixture(scope="session")
-def local_config_file_malformed(tmpdir_factory):
-
-    fn = tmpdir_factory.mktemp("tmp").join(".config")
-
-    with open(fn, 'w') as f:
-
-        f.write('Britain_Colocation_ID=229180671540661Britain_Colocation_Origin=2020_02_11Britain_TilePopulation_ID=881889318900484')
-
-    return fn
-
-
-@pytest.fixture(scope="session")
-def output_csv(tmpdir_factory):
-
-    fn = tmpdir_factory.mktemp("tmp").join("Britain_2020_06_05_1600.csv")
-
-    with open(fn, 'w') as f:
-
-        f.write('test')
-
-    return fn
-
-
-# Test date_str_to_datetime
-def test_date_str_to_datetime_hours():
-
-    s = "2020_04_30_16"
-
-    res = utils.date_str_to_datetime(s)
-
-    assert type(res) is datetime
-
-
-def test_date_str_to_datetime_days():
-
-    s = "2020_04_30"
-
-    res = utils.date_str_to_datetime(s)
-
-    assert type(res) is datetime
-
-
-def test_date_str_to_datetime_errors():
-
-    s = "not a date"
-
-    with pytest.raises(ValueError):
-
-        utils.date_str_to_datetime(s)
-
-
-# Test get_file_dates
-def test_get_file_dates_8h(example_date_config):
-
-    res = utils.get_file_dates(
-        example_date_config["start_date"], example_date_config["end_date"], 8
-    )
-
-    assert len(res) == 3
-
-
-def test_get_file_dates_12h(example_date_config):
-
-    res = utils.get_file_dates(
-        example_date_config["start_date"], example_date_config["end_date"], 12
-    )
-
-    assert len(res) == 2
-
-
-def test_get_file_dates_type(example_date_config):
-
-    res = utils.get_file_dates(
-        example_date_config["start_date"], example_date_config["end_date"], 12
-    )
-
-    assert type(res[0]) is datetime
-
-
-# test get_config
-def test_get_config_remote():
-
-    path = "https://raw.githubusercontent.com/hamishgibbs/pull_facebook_data_for_good/master/.config"
-
-    res = utils.get_config(path)
-
-    assert type(res) is dict
-
-
-def test_get_config_local(local_config_file):
-
-    res = utils.get_config(str(local_config_file))
-
-    assert type(res) is dict
-
-
-def test_get_config_local_raises_malformed(local_config_file_malformed):
-
-    with pytest.raises(Exception):
-
-        utils.get_config(str(local_config_file_malformed))
-
-
-# test get download variables
-def test_get_download_variables_works(local_config_file):
-
-    now = datetime.now()
-
-    res = utils.get_download_variables('TileMovement',
-                                       'Britain',
-                                       now,
-                                       str(local_config_file))
-
-    assert type(res) is dict
-
-    assert res['dataset_id'] == '1671212783027520'
-
-    assert res['start_date'] == datetime(2020, 3, 10, 0)
-
-    assert res['end_date'] == now
-
-
-def test_get_download_variables_missing_id(local_config_file_missing_id):
-
-    with pytest.raises(KeyError):
-
-        utils.get_download_variables('TileMovement',
-                                     'Britain',
-                                     datetime.now(),
-                                     str(local_config_file_missing_id))
-
-
-def test_get_download_variables_missing_origin(local_config_file_missing_origin):
-
-    with pytest.raises(KeyError):
-
-        utils.get_download_variables('TileMovement',
-                                     'Britain',
-                                     datetime.now(),
-                                     str(local_config_file_missing_origin))
-
-
-# test get_existing_dates
-def test_get_existing_dates(output_csv):
-
-    outdir = '/'.join(str(output_csv).split('/')[:-1])
-
-    res = utils.get_existing_dates(outdir, 'Britain')
-
-    assert type(res) is list
diff --git a/tests/test_write_zipfile.py b/tests/test_write_zipfile.py
new file mode 100644
index 0000000..222c765
--- /dev/null
+++ b/tests/test_write_zipfile.py
@@ -0,0 +1,24 @@
+import os
+import pytest
+from tests.utils import tmpdir
+
+from pull_fb.collection import write_zipfile
+
+
+def test_write_zipfile(tmpdir):
+
+    fn = tmpdir + "/test.zip"
+    data = [b"a"]
+
+    write_zipfile(fn, data)
+
+    assert os.path.exists(fn)
+
+
+def test_write_zipfile_raises(tmpdir):
+
+    fn = tmpdir + "/test.zip"
+    data = [None]
+
+    with pytest.raises(Exception):
+        write_zipfile(fn, data)
diff --git a/tests/utils.py b/tests/utils.py
new file mode 100644
index 0000000..3a9444e
--- /dev/null
+++ b/tests/utils.py
@@ -0,0 +1,6 @@
+import pytest
+
+@pytest.fixture(scope="session")
+def tmpdir(tmpdir_factory):
+    tmp = tmpdir_factory.mktemp("data")
+    return tmp
diff --git a/tox.ini b/tox.ini
index 97cf870..d85ec16 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,10 +1,15 @@
 [tox]
-envlist = py38
+envlist = py37 py38 py39
+isolated_build = True
+
+[gh-actions]
+python =
+    3.7: py37
+    3.8: py38
+    3.9: py39
 
 [testenv]
 deps = pytest
-       requests
-       selenium
-       pandas
+       browser_cookie3
 commands =
     pytest