diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d7b6454
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+test.py
+test.ipynb
+/dist
+/build
+**/__pycache__
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..dcb1530
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,4 @@
+{
+    "python.analysis.typeCheckingMode": "basic",
+    "python.analysis.autoImportCompletions": true
+}
\ No newline at end of file
diff --git a/README.md b/README.md
index ea3eda9..258ffb6 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,25 @@
 # repo2prompt
-Turn a Github Repo's contents into a big prompt for long-context models like Claude 3 Opus.
 
-<a target="_blank" href="https://colab.research.google.com/github/andrewgcodes/repo2prompt/blob/main/repo2prompt.ipynb">
-  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
-</a>
+This is a simple package with minimal dependencies that turns a Github Repo's contents into a big prompt for long-context models.
 
-Super easy:
-You will need a Github repo URL (public) and a Github access token. You can also use this with private repos but your token will need to have those permissions.
+this work for repos containing rust, python, javascript containing via the following file types:
+'.py', '.ipynb', '.html', '.css', '.js', '.jsx', '.rst', '.md', '.rs'
 
-Within the build_directory_tree function, you can specify which file extensions should be included in the output.
+Example Usage:
 
-The output is saved to a .txt file with name [repo]-formatted-prompt.txt
+```python
+from repo2prompt.extraction import extract_repo
+
+extract_repo(github_url="https://github.com/vllm-project/vllm/tree/main", github_token="your_github_token")
+```
+
+Or 
+
+```python
+from repo2prompt.extraction import extract_repo
+
+extract_repo(github_url="https://github.com/vllm-project/vllm/tree/main") # os.getenv("GITHUB_TOKEN") used internally
+```
+
+an important thing to note, github only allows 5000 requests per hour, so be careful
 
-By the way, Github is limited to 5,000 API requests per hour so if a bug happens, that might be why!
diff --git a/build/lib/repo2prompt/__init__.py b/build/lib/repo2prompt/__init__.py
new file mode 100644
index 0000000..ca8a898
--- /dev/null
+++ b/build/lib/repo2prompt/__init__.py
@@ -0,0 +1,2 @@
+from .extraction import extract_repo
+
diff --git a/build/lib/repo2prompt/extraction.py b/build/lib/repo2prompt/extraction.py
new file mode 100644
index 0000000..4ede5ba
--- /dev/null
+++ b/build/lib/repo2prompt/extraction.py
@@ -0,0 +1,152 @@
+import os
+import base64
+from urllib.parse import urlparse
+from typing import Optional
+from tqdm import tqdm
+from typing import List, Dict, Any
+import asyncio
+import aiohttp
+from .types import RateLimitExceeded
+
+
+def parse_github_url(url):
+    """
+    Parses your GitHub URL and extracts the repository owner and name.
+    """
+    parsed_url = urlparse(url)
+    path_segments = parsed_url.path.strip("/").split("/")
+    if len(path_segments) >= 2:
+        owner, repo = path_segments[0], path_segments[1]
+        return owner, repo
+    else:
+        raise ValueError("Invalid GitHub URL provided!")
+
+async def fetch_repo_content(owner, repo, path='', token=None):
+    """
+    Fetches the content of your GitHub repository.
+    """
+    base_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}"
+    headers = {"Accept": "application/vnd.github.v3+json"}
+    if token:
+        headers["Authorization"] = f"Bearer {token}"
+    async with aiohttp.ClientSession() as session:
+        async with session.get(base_url, headers=headers) as response:
+            if response.status == 200:
+                return await response.json()
+            else:
+                if response.status == 403:
+                    raise RateLimitExceeded
+                
+                else:
+                    raise Exception(f"Error fetching content: {response.status}")
+
+def get_file_content(file_info):
+    """
+    Retrieves and decodes the content of files
+    """
+    if file_info['encoding'] == 'base64':
+        return base64.b64decode(file_info['content']).decode('utf-8')
+    else:
+        return file_info['content']
+
+
+
+async def build_directory_tree(
+    owner: str,
+    repo: str,
+    path: str = '',
+    token: Optional[str] = None,
+    indent: int = 0,
+    file_paths: List[tuple[int, str]] = [],
+    is_base: bool = False
+) -> tuple[str, List[tuple[int, str]]]:
+    
+    async def process_item(item: Dict[str, Any], tree_str: str, file_paths: List[tuple[int, str]], indent: int) -> tuple[str, List[tuple[int, str]]]:
+        if '.github' in item['path'].split('/'):
+            pass
+        if item['type'] == 'dir':
+            tree_str += ' ' * indent + f"[{item['name']}/]\n"
+            tree_str += (await build_directory_tree(owner, repo, item['path'], token, indent + 1, file_paths, is_base=False))[0]
+        else:
+            tree_str += ' ' * indent + f"{item['name']}\n"
+            # Indicate which file extensions should be included in the prompt!
+            if item['name'].endswith(('.py', '.ipynb', '.html', '.css', '.js', '.jsx', '.rst', '.md', '.rs',)):
+                file_paths.append((indent, item['path']))
+        return tree_str, file_paths
+
+    items = await fetch_repo_content(owner, repo, path, token)
+    if items is None:
+        return "", file_paths
+    tree_str = ""
+    tasks = [process_item(item, "", file_paths, indent) for item in items]
+    file_paths = []
+    tree_str = ""
+    if is_base:
+        for future in tqdm(asyncio.as_completed(tasks), total=len(tasks), desc="Building tree"):
+            res = await future
+            tree_str += res[0]
+            file_paths.extend(res[1])
+    else:
+        for future in asyncio.as_completed(tasks):
+            res = await future
+            tree_str += res[0]
+            file_paths.extend(res[1])
+    return tree_str, file_paths
+
+
+
+                                            
+async def fetch_file_content(args, semaphore) -> str:
+    owner, repo, path, token, indent = args
+    async with semaphore:
+        file_info = await fetch_repo_content(owner, repo, path, token)
+        file_content = get_file_content(file_info)
+        return '\n' + ' ' * indent + f"{path}:\n" + ' ' * indent + '\n' + file_content + '\n' + ' ' * indent + '\n'
+
+async def fetch_file_contents(owner, repo, file_paths, github_token) -> str:
+    semaphore = asyncio.Semaphore(100)  # Limit the number of concurrent file fetches
+    tasks = [
+        fetch_file_content(
+            (owner, repo, path, github_token, indent), semaphore
+        ) 
+        for indent, path in file_paths
+    ]
+
+    # we use asyncio.gather to ensure the order of results matches the order of tasks
+    formatted_contents = await asyncio.gather(*tasks)
+    return ''.join(formatted_contents)
+
+async def extract_repo(
+    github_url: str,
+    github_token: Optional[str] = None,
+) -> tuple[str, str]:
+    '''
+    Args:
+    github_url : str : A URL to a Github repository, must use tree/main or tree/branch_name
+    github_token : Optional[str] : A Github personal access token, if not provided will use the GITHUB_TOKEN env variable
+    Returns:
+    str : A string representation of the repository information, suitable for use in a prompt
+    '''
+    if github_token is None:
+        github_token = os.getenv("GITHUB_TOKEN")
+    if github_url.split('/')[-2] != 'tree':
+        raise ValueError(
+    "Please provide a URL that ends with 'tree', 'tree/main', or 'tree/branch_name'. "
+    f"Got URL: {github_url}"
+    )
+    owner, repo = parse_github_url(github_url)
+   
+    readme_info = await fetch_repo_content(owner, repo, 'README.md', github_token)
+    readme_content = get_file_content(readme_info)
+    formatted_string = f"README.md:\n\n{readme_content}\n\n\n"
+
+    import time
+    t0 = time.time()
+    directory_tree, file_paths = await build_directory_tree(owner, repo, token=github_token, is_base=True)
+    print(f"Time in build_directory_tree: {time.time() - t0:.2f} seconds")
+
+    import time
+    t0 = time.time()
+    formatted_string += await fetch_file_contents(owner, repo, file_paths, github_token)
+    print(f"Time in fetch_file_contents: {time.time() - t0:.2f} seconds")
+    return formatted_string, directory_tree                                    
diff --git a/repo2prompt.ipynb b/repo2prompt.ipynb
deleted file mode 100644
index c555b29..0000000
--- a/repo2prompt.ipynb
+++ /dev/null
@@ -1,168 +0,0 @@
-{
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "provenance": []
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    }
-  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "source": [
-        "You will need a Github repo URL (public) and a Github access token.\n",
-        "You can also use this with private repos but your token will need to have those permissions.\n",
-        "\n",
-        "Within the build_directory_tree function, you can specify which file extensions should be included in the output.\n",
-        "\n",
-        "The output is saved to a .txt file with name [repo]-formatted-prompt.txt"
-      ],
-      "metadata": {
-        "id": "H0WyoRb5kAw0"
-      }
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "SrLm4adgYrgi"
-      },
-      "outputs": [],
-      "source": [
-        "import requests\n",
-        "import base64\n",
-        "from urllib.parse import urlparse\n",
-        "\n",
-        "def parse_github_url(url):\n",
-        "    \"\"\"\n",
-        "    Parses your GitHub URL and extracts the repository owner and name.\n",
-        "    \"\"\"\n",
-        "    parsed_url = urlparse(url)\n",
-        "    path_segments = parsed_url.path.strip(\"/\").split(\"/\")\n",
-        "    if len(path_segments) >= 2:\n",
-        "        owner, repo = path_segments[0], path_segments[1]\n",
-        "        return owner, repo\n",
-        "    else:\n",
-        "        raise ValueError(\"Invalid GitHub URL provided!\")\n",
-        "\n",
-        "def fetch_repo_content(owner, repo, path='', token=None):\n",
-        "    \"\"\"\n",
-        "    Fetches the content of your GitHub repository.\n",
-        "    \"\"\"\n",
-        "    base_url = f\"https://api.github.com/repos/{owner}/{repo}/contents/{path}\"\n",
-        "    headers = {\"Accept\": \"application/vnd.github.v3+json\"}\n",
-        "    if token:\n",
-        "        headers[\"Authorization\"] = f\"Bearer {token}\"\n",
-        "    response = requests.get(base_url, headers=headers)\n",
-        "    if response.status_code == 200:\n",
-        "        return response.json()\n",
-        "    else:\n",
-        "        response.raise_for_status()\n",
-        "\n",
-        "def get_file_content(file_info):\n",
-        "    \"\"\"\n",
-        "    Retrieves and decodes the content of files\n",
-        "    \"\"\"\n",
-        "    if file_info['encoding'] == 'base64':\n",
-        "        return base64.b64decode(file_info['content']).decode('utf-8')\n",
-        "    else:\n",
-        "        return file_info['content']\n",
-        "\n",
-        "def build_directory_tree(owner, repo, path='', token=None, indent=0, file_paths=[]):\n",
-        "    \"\"\"\n",
-        "    Builds a string representation of the directory tree and collects file paths.\n",
-        "    \"\"\"\n",
-        "    items = fetch_repo_content(owner, repo, path, token)\n",
-        "    tree_str = \"\"\n",
-        "    for item in items:\n",
-        "        if '.github' in item['path'].split('/'):\n",
-        "            continue\n",
-        "        if item['type'] == 'dir':\n",
-        "            tree_str += '    ' * indent + f\"[{item['name']}/]\\n\"\n",
-        "            tree_str += build_directory_tree(owner, repo, item['path'], token, indent + 1, file_paths)[0]\n",
-        "        else:\n",
-        "            tree_str += '    ' * indent + f\"{item['name']}\\n\"\n",
-        "            # Indicate which file extensions should be included in the prompt!\n",
-        "            if item['name'].endswith(('.py', '.ipynb', '.html', '.css', '.js', '.jsx', '.rst', '.md')):\n",
-        "                file_paths.append((indent, item['path']))\n",
-        "    return tree_str, file_paths\n",
-        "\n",
-        "def retrieve_github_repo_info(url, token=None):\n",
-        "    \"\"\"\n",
-        "    Retrieves and formats repository information, including README, the directory tree,\n",
-        "    and file contents, while ignoring the .github folder.\n",
-        "    \"\"\"\n",
-        "    owner, repo = parse_github_url(url)\n",
-        "\n",
-        "    try:\n",
-        "        readme_info = fetch_repo_content(owner, repo, 'README.md', token)\n",
-        "        readme_content = get_file_content(readme_info)\n",
-        "        formatted_string = f\"README.md:\\n```\\n{readme_content}\\n```\\n\\n\"\n",
-        "    except Exception as e:\n",
-        "        formatted_string = \"README.md: Not found or error fetching README\\n\\n\"\n",
-        "\n",
-        "    directory_tree, file_paths = build_directory_tree(owner, repo, token=token)\n",
-        "\n",
-        "    formatted_string += f\"Directory Structure:\\n{directory_tree}\\n\"\n",
-        "\n",
-        "    for indent, path in file_paths:\n",
-        "        file_info = fetch_repo_content(owner, repo, path, token)\n",
-        "        file_content = get_file_content(file_info)\n",
-        "        formatted_string += '\\n' + '    ' * indent + f\"{path}:\\n\" + '    ' * indent + '```\\n' + file_content + '\\n' + '    ' * indent + '```\\n'\n",
-        "\n",
-        "    return formatted_string"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# You provide a Github repo URL and a Github personal access token.\n",
-        "# How to get an access token: https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens\n",
-        "github_url = \"https://github.com/nomic-ai/nomic/tree/main\"\n",
-        "token = # Github access token (go to Developer Settings to generate one)\n",
-        "\n",
-        "owner, repo = parse_github_url(github_url)\n",
-        "output_file_name = f\"{repo}-formatted-prompt.txt\"\n",
-        "\n",
-        "formatted_repo_info = retrieve_github_repo_info(github_url, token = token)\n",
-        "with open(output_file_name, 'w', encoding='utf-8') as file:\n",
-        "    file.write(formatted_repo_info)\n",
-        "\n",
-        "print(f\"Repository information has been saved to {output_file_name}\")"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "cuV5LirEa5jI",
-        "outputId": "e89a5307-03f8-48e4-d721-88bb5c32e55c"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Repository information has been saved to nomic-formatted-prompt.txt\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [],
-      "metadata": {
-        "id": "rRBY0el6cDg5"
-      },
-      "execution_count": null,
-      "outputs": []
-    }
-  ]
-}
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..a05e60f
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,21 @@
+[metadata]
+name = repo2prompt
+version = 0.1.0
+author = Your Name
+author_email = your.email@example.com
+description = A simple description of my package
+long_description = file: README.md
+long_description_content_type = text/markdown
+url = https://github.com/yourusername/my_package
+classifiers =
+    Programming Language :: Python :: 3
+    License :: OSI Approved :: MIT License
+    Operating System :: OS Independent
+[options]
+package_dir =
+    = src
+packages = find:
+python_requires = >=3.6
+
+[options.packages.find]
+where = src
\ No newline at end of file
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..3bb7b99
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,14 @@
+from setuptools import setup, find_packages
+
+setup(
+    name='Repo2Prompt',
+    version='0.1.1',
+    description='Github repo -> prompt string',
+    packages=find_packages(where="src"),
+    package_dir={"": "src"},
+    install_requires=[
+        'requests>=2.20.0',
+        'tqdm>=4.60.0',
+        'aiohttp>=3.9.1',
+    ],
+)
\ No newline at end of file
diff --git a/src/repo2prompt/__init__.py b/src/repo2prompt/__init__.py
new file mode 100644
index 0000000..ca8a898
--- /dev/null
+++ b/src/repo2prompt/__init__.py
@@ -0,0 +1,2 @@
+from .extraction import extract_repo
+
diff --git a/src/repo2prompt/extraction.py b/src/repo2prompt/extraction.py
new file mode 100644
index 0000000..de8c6ac
--- /dev/null
+++ b/src/repo2prompt/extraction.py
@@ -0,0 +1,150 @@
+import os
+import base64
+from urllib.parse import urlparse
+from typing import Optional
+from tqdm import tqdm
+from typing import List, Dict, Any
+import asyncio
+import aiohttp
+from .types import RateLimitExceeded
+import time
+
+def parse_github_url(url):
+    """
+    Parses your GitHub URL and extracts the repository owner and name.
+    """
+    parsed_url = urlparse(url)
+    path_segments = parsed_url.path.strip("/").split("/")
+    if len(path_segments) >= 2:
+        owner, repo = path_segments[0], path_segments[1]
+        return owner, repo
+    else:
+        raise ValueError("Invalid GitHub URL provided!")
+
+async def fetch_repo_content(owner, repo, path='', token=None):
+    """
+    Fetches the content of your GitHub repository.
+    """
+    base_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}"
+    headers = {"Accept": "application/vnd.github.v3+json"}
+    if token:
+        headers["Authorization"] = f"Bearer {token}"
+    async with aiohttp.ClientSession() as session:
+        async with session.get(base_url, headers=headers) as response:
+            if response.status == 200:
+                return await response.json()
+            else:
+                if response.status == 403:
+                    raise RateLimitExceeded
+                
+                else:
+                    raise Exception(f"Error fetching content: {response.status}")
+
+def get_file_content(file_info):
+    """
+    Retrieves and decodes the content of files
+    """
+    if file_info['encoding'] == 'base64':
+        return base64.b64decode(file_info['content']).decode('utf-8')
+    else:
+        return file_info['content']
+
+
+
+async def build_directory_tree(
+    owner: str,
+    repo: str,
+    path: str = '',
+    token: Optional[str] = None,
+    indent: int = 0,
+    file_paths: List[tuple[int, str]] = [],
+    is_base: bool = False
+) -> tuple[str, List[tuple[int, str]]]:
+    
+    async def process_item(item: Dict[str, Any], tree_str: str, file_paths: List[tuple[int, str]], indent: int) -> tuple[str, List[tuple[int, str]]]:
+        if '.github' in item['path'].split('/'):
+            pass
+        if item['type'] == 'dir':
+            tree_str += ' ' * indent + f"[{item['name']}/]\n"
+            tree_str += (await build_directory_tree(owner, repo, item['path'], token, indent + 1, file_paths, is_base=False))[0]
+        else:
+            tree_str += ' ' * indent + f"{item['name']}\n"
+            # Indicate which file extensions should be included in the prompt!
+            if item['name'].endswith(('.py', '.ipynb', '.html', '.css', '.js', '.jsx', '.rst', '.md', '.rs',)):
+                file_paths.append((indent, item['path']))
+        return tree_str, file_paths
+
+    items = await fetch_repo_content(owner, repo, path, token)
+    if items is None:
+        return "", file_paths
+    tree_str = ""
+    tasks = [process_item(item, "", file_paths, indent) for item in items]
+    file_paths = []
+    tree_str = ""
+    if is_base:
+        for future in tqdm(asyncio.as_completed(tasks), total=len(tasks), desc="Building tree"):
+            res = await future
+            tree_str += res[0]
+            file_paths.extend(res[1])
+    else:
+        for future in asyncio.as_completed(tasks):
+            res = await future
+            tree_str += res[0]
+            file_paths.extend(res[1])
+    return tree_str, file_paths
+                                            
+async def fetch_file_content(args, semaphore) -> str:
+    owner, repo, path, token, indent = args
+    async with semaphore:
+        file_info = await fetch_repo_content(owner, repo, path, token)
+        file_content = get_file_content(file_info)
+        return '\n' + ' ' * indent + f"{path}:\n" + ' ' * indent + '\n' + file_content + '\n' + ' ' * indent + '\n'
+
+async def fetch_file_contents(owner, repo, file_paths, github_token, concurrency) -> str:
+    semaphore = asyncio.Semaphore(concurrency)  # Limit the number of concurrent file fetches
+    tasks = [
+        fetch_file_content(
+            (owner, repo, path, github_token, indent), semaphore
+        ) 
+        for indent, path in file_paths
+    ]
+
+    # we use asyncio.gather to ensure the order of results matches the order of tasks
+    formatted_contents = await asyncio.gather(*tasks)
+    return ''.join(formatted_contents)
+
+async def extract_repo(
+    github_url: str,
+    github_token: Optional[str] = None,
+    max_concurrent_requests: int = 100
+) -> tuple[str, str]:
+    '''
+    Args:
+    github_url : str,  A URL to a Github repository, must use tree/main or tree/branch_name
+    github_token : Optional[str],  A Github personal access token, if not provided will use the GITHUB_TOKEN env variable
+    max_concurrent_requests : int,  The number of concurrent files that are being read
+    Returns:
+    str : A string representation of the repository information, suitable for use in a prompt
+    '''
+    if github_token is None:
+        github_token = os.getenv("GITHUB_TOKEN")
+    if github_url.split('/')[-2] != 'tree':
+        raise ValueError(
+    "Please provide a URL that ends with 'tree', 'tree/main', or 'tree/branch_name'. "
+    f"Got URL: {github_url}"
+    )
+    owner, repo = parse_github_url(github_url)
+   
+    readme_info = await fetch_repo_content(owner, repo, 'README.md', github_token)
+    readme_content = get_file_content(readme_info)
+    formatted_string = f"README.md:\n\n{readme_content}\n\n\n"
+
+    t0 = time.time()
+    directory_tree, file_paths = await build_directory_tree(owner, repo, token=github_token, is_base=True)
+    print(f"Time in build_directory_tree: {time.time() - t0:.2f} seconds")
+    t0 = time.time()
+    formatted_string += await fetch_file_contents(
+        owner, repo, file_paths, github_token, max_concurrent_requests
+    )
+    print(f"Time in fetch_file_contents: {time.time() - t0:.2f} seconds")
+    return formatted_string, directory_tree                                    
diff --git a/src/repo2prompt/types.py b/src/repo2prompt/types.py
new file mode 100644
index 0000000..b141de8
--- /dev/null
+++ b/src/repo2prompt/types.py
@@ -0,0 +1,10 @@
+from datetime import datetime
+import time
+class RateLimitExceeded(Exception):
+    """
+    Exception raised when the rate limit is exceeded
+    """
+    def __init__(self):
+        future_time = int(time.time()) + 60*60
+        super().__init__(f"Rate limit exceeded. Please wait until {datetime.fromtimestamp(future_time)} to try again.")
+
diff --git a/workingRepo2PromptExtension.zip b/workingRepo2PromptExtension.zip
deleted file mode 100644
index 7cc772f..0000000
Binary files a/workingRepo2PromptExtension.zip and /dev/null differ