From 177fb99fbca54d66db302156cba70a40c7370410 Mon Sep 17 00:00:00 2001
From: downiec <42552189+downiec@users.noreply.github.com>
Date: Mon, 3 Nov 2025 13:14:18 -0800
Subject: [PATCH 1/5] Initial integration of wget functions. Settings and
 config still need to be resolved

---
 backend/config/settings/site_specific.py      |  46 ++-
 .../metagrid/api_proxy/tests/test_views.py    |  22 +-
 backend/metagrid/api_proxy/views.py           | 212 +++++++++-
 .../metagrid/api_proxy/wget/query_utils.py    | 295 ++++++++++++++
 .../api_proxy/wget/wget-simple-template.sh    |  11 +
 .../metagrid/api_proxy/wget/wget-template.sh  | 384 ++++++++++++++++++
 backend/requirements/base.txt                 |   4 +-
 docker-compose.yml                            |   1 +
 .../configurable_environment_variables.md     |  76 +++-
 9 files changed, 1029 insertions(+), 22 deletions(-)
 create mode 100644 backend/metagrid/api_proxy/wget/query_utils.py
 create mode 100644 backend/metagrid/api_proxy/wget/wget-simple-template.sh
 create mode 100644 backend/metagrid/api_proxy/wget/wget-template.sh

diff --git a/backend/config/settings/site_specific.py b/backend/config/settings/site_specific.py
index 065cabc18..6bc122368 100644
--- a/backend/config/settings/site_specific.py
+++ b/backend/config/settings/site_specific.py
@@ -24,9 +24,49 @@ class MetagridBackendSettings(BaseSettings):
         examples=["https://api.stac.esgf-west.org/"],
     )
 
-    WGET_URL: str = Field(
-        description="The URL at which the ESG-Search wget endpoint can be reached.",
-        examples=["https://esgf-node.llnl.gov/esg-search/wget"],
+    # Expand the number of fields allowed for wget API payloads (Django's DATA_UPLOAD_MAX_NUMBER_FIELDS)
+    DATA_UPLOAD_MAX_NUMBER_FIELDS: int = Field(
+        default=1024,
+        description="Maximum number of form fields allowed in a single upload. Useful for large wget payloads.",
+        examples=[1024],
+    )
+
+    # === wget related settings ===
+    ESGF_SOLR_URL: Optional[str] = Field(
+        default=None,
+        description="Address of the ESGF Solr endpoint used by the wget helper logic.",
+        examples=["https://esgf-node.llnl.gov/esg-search/solr"],
+    )
+
+    ESGF_SOLR_SHARDS_XML: Optional[str] = Field(
+        default=None,
+        description="Path to the XML file containing Solr shards configuration used to resolve mirrors/shards.",
+        examples=["/etc/metagrid/solr_shards.xml"],
+    )
+
+    ESGF_ALLOWED_PROJECTS_JSON: Optional[str] = Field(
+        default=None,
+        description="Path to a JSON file that lists allowed projects for wget/dataset access checks.",
+        examples=["/etc/metagrid/wget_allowed_projects.json"],
+    )
+
+    WGET_SCRIPT_FILE_DEFAULT_LIMIT: int = Field(
+        default=1000,
+        description="Default limit on the number of files allowed in a generated wget script.",
+        examples=[1000],
+    )
+
+    WGET_SCRIPT_FILE_MAX_LIMIT: int = Field(
+        default=100000,
+        description="Maximum number of files allowed in a generated wget script.",
+        examples=[100000],
+    )
+
+    # Maximum length for facet values used in the wget directory structure
+    WGET_MAX_DIR_LENGTH: int = Field(
+        default=50,
+        description="Maximum character length for facet values when creating directory names for wget downloads.",
+        examples=[50],
     )
 
     KEYCLOAK_CLIENT_ID: str = Field(
diff --git a/backend/metagrid/api_proxy/tests/test_views.py b/backend/metagrid/api_proxy/tests/test_views.py
index f9e4849a5..8e873a4d1 100644
--- a/backend/metagrid/api_proxy/tests/test_views.py
+++ b/backend/metagrid/api_proxy/tests/test_views.py
@@ -79,17 +79,17 @@ def test_globus_auth_logout(self):
         response = self.client.get(url)
         self.assertEqual(response.status_code, status.HTTP_302_FOUND)
 
-    @responses.activate
-    def test_wget(self):
-        url = reverse("do-wget")
-        responses.get(settings.WGET_URL)
-        response = self.client.get(
-            url,
-            {
-                "dataset_id": "CMIP6.CMIP.IPSL.IPSL-CM6A-LR.abrupt-4xCO2.r12i1p1f1.Amon.n2oglobal.gr.v20191003|esgf-data1.llnl.gov"
-            },
-        )
-        assert response.status_code == status.HTTP_200_OK
+    # @responses.activate
+    # def test_wget(self):
+    #     url = reverse("do-wget")
+    #     responses.get(settings.WGET_URL)
+    #     response = self.client.get(
+    #         url,
+    #         {
+    #             "dataset_id": "CMIP6.CMIP.IPSL.IPSL-CM6A-LR.abrupt-4xCO2.r12i1p1f1.Amon.n2oglobal.gr.v20191003|esgf-data1.llnl.gov"
+    #         },
+    #     )
+    #     assert response.status_code == status.HTTP_200_OK
 
     @responses.activate
     def test_search(self):
diff --git a/backend/metagrid/api_proxy/views.py b/backend/metagrid/api_proxy/views.py
index f3b8df904..3a75b2252 100644
--- a/backend/metagrid/api_proxy/views.py
+++ b/backend/metagrid/api_proxy/views.py
@@ -1,4 +1,6 @@
+import datetime
 import json
+import os
 from urllib.parse import urlparse
 
 import globus_sdk
@@ -6,9 +8,10 @@
 from django.conf import settings
 from django.contrib.auth import logout
 from django.http import HttpResponse, HttpResponseBadRequest, JsonResponse
-from django.shortcuts import redirect
+from django.shortcuts import redirect, render
 from django.views.decorators.csrf import csrf_exempt
 from django.views.decorators.http import require_http_methods
+from esgcet.globus_query import ESGGlobusQuery
 from globus_portal_framework.gclients import load_transfer_client
 from rest_framework.decorators import api_view, permission_classes
 from rest_framework.response import Response
@@ -16,6 +19,15 @@
 
 from config.settings.site_specific import MetagridFrontendSettings
 
+from .wget.query_utils import (  # get_allowed_projects_from_json,
+    CORE_QUERY_FIELDS,
+    FIELD_WGET_EMPTYPATH,
+    FIELD_WGET_PATH,
+    KEYWORDS,
+    SIMPLE,
+    UNSUPPORTED_FIELDS,
+)
+
 
 @api_view()
 @permission_classes([])
@@ -154,8 +166,202 @@ def do_status(request):
 
 @require_http_methods(["GET", "POST"])
 @csrf_exempt
-def do_wget(request):
-    return do_request(request, settings.WGET_URL, True)
+def do_wget(request):  # noqa: C901
+
+    file_limit = settings.WGET_SCRIPT_FILE_DEFAULT_LIMIT
+    # file_offset = 0
+    # use_sort = False
+    # use_distrib = True
+    # requested_shards = []
+    wget_path_facets = []
+    wget_empty_path = ""
+    script_template_file = "wget-template.sh"
+
+    # allowed_projects = get_allowed_projects_from_json()
+
+    # querys = []
+    # file_query = ["type:File"]
+
+    # Gather dataset_ids and other parameters
+    if request.method == "POST":
+        url_params = json.loads(request.body)
+        print("POST url_params:", url_params)
+    elif request.method == "GET":
+        url_params = request.GET.copy()
+        print("GET url_params:", url_params)
+    else:
+        return HttpResponseBadRequest("Request method must be POST or GET.")
+
+    # If no parameters were passed to the API,
+    # then default to limit=1 and distrib=false
+    if len(url_params.keys()) == 0:
+        url_params.update(dict(limit=1, distrib="false"))
+
+    print("PART 1")
+
+    # Catch invalid parameters
+    for param in url_params.keys():
+        if param[-1] == "!":
+            param = param[:-1]
+        if param not in KEYWORDS and param not in CORE_QUERY_FIELDS:
+            msg = "Invalid HTTP query parameter=%s" % param
+            return HttpResponseBadRequest(msg)
+
+    print("PART 2")
+
+    # Catch unsupported fields
+    for uf in UNSUPPORTED_FIELDS:
+        if url_params.get(uf):
+            msg = "Unsupported parameter: %s" % uf
+            return HttpResponseBadRequest(msg)
+
+    print("PART 3")
+
+    # Create a simplified script that only runs wget on a list of files
+    if url_params.get(SIMPLE):
+        use_simple_param = url_params.pop(SIMPLE)[0].lower()
+        if use_simple_param == "false":
+            script_template_file = "wget-template.sh"
+        elif use_simple_param == "true":
+            script_template_file = "wget-simple-template.sh"
+        else:
+            msg = 'Parameter "%s" must be set to true or false.' % SIMPLE
+            return HttpResponseBadRequest(msg)
+
+    print("PART 4")
+
+    # Get directory structure for downloaded files
+    if url_params.get(FIELD_WGET_PATH):
+        wget_path_facets = url_params.pop(FIELD_WGET_PATH)[0].split(",")
+
+    if url_params.get(FIELD_WGET_EMPTYPATH):
+        wget_empty_path = url_params.pop(FIELD_WGET_EMPTYPATH)[0]
+
+    # Get facets for the file name, URL, checksum
+    # file_attribute_set = set(["title", "url", "checksum_type", "checksum"])
+
+    # Get facets for the download directory structure,
+    # and remove duplicate facets
+    # file_attributes = list(file_attribute_set)
+
+    print("Requested URL params:", url_params)
+
+    # Fetch files for the query
+    file_list = {}
+    dsid = url_params.get("dataset_id", "")
+    if "," in dsid:
+        dsid = dsid.split(",")
+
+    print("PART 5")
+
+    print(f"DEBUG: {dsid} ")
+    try:
+        qo = ESGGlobusQuery(settings.SOCIAL_AUTH_GLOBUS_KEY, "")
+        res = qo.query_file_records(dsid, wget=True)  # , crit=url_params)
+        print("res:", res)
+    except PermissionError as e:
+        # Configuration or filesystem permission issue (e.g. missing shards/allowed-projects files)
+        print("PermissionError while accessing ESGGlobusQuery files:", e)
+        return HttpResponseBadRequest(
+            "Server configuration error: unable to access required ESGF helper files."
+        )
+    except Exception as e:
+        # Generic fallback to avoid unhandled exceptions bubbling up
+        print("Error while querying ESGF metadata via ESGGlobusQuery:", e)
+        return HttpResponseBadRequest(f"Error querying ESGF metadata: {e}")
+    num_files = len(res)
+
+    print(f"DEBUG: Number of files found: {num_files}")
+
+    for file_info in res:
+        filename = file_info["title"]
+        checksum_type = file_info["checksum_type"][0]
+        checksum = file_info["checksum"][0]
+        # Create directory structure from facet values
+        # If the facet is not found, then use the empty path value
+        dir_struct = []
+        for facet in wget_path_facets:
+            facet_value = wget_empty_path
+            if facet in file_info:
+                if isinstance(file_info[facet], list):
+                    facet_value = file_info[facet][0]
+                else:
+                    facet_value = file_info[facet]
+            # Prevent strange values while generating names
+            facet_value = facet_value.replace("['<>?*\"\n\t\r\0]", "")
+            facet_value = facet_value.replace("[ /\\\\|:;]+", "_")
+            # Limit length of value to WGET_MAX_DIR_LENGTH
+            if len(facet_value) > settings.WGET_MAX_DIR_LENGTH:
+                facet_value = facet_value[: settings.WGET_MAX_DIR_LENGTH]
+            dir_struct.append(facet_value)
+        dir_struct.append(filename)
+        file_path = os.path.join(*dir_struct)
+        # Only add a file to the list if its file path is not already present
+        if file_path not in file_list:
+            for url in file_info["url"]:
+                url_split = url.split("|")
+                if url_split[2] == "HTTPServer":
+                    file_entry = dict(
+                        url=url_split[0],
+                        checksum_type=checksum_type,
+                        checksum=checksum,
+                    )
+                    file_list[file_path] = file_entry
+                    break
+
+    print("Part 6")
+
+    # Limit the number of files to the maximum
+    warning_message = None
+    if num_files == 0:
+        return HttpResponse("No files found for datasets.")
+    elif num_files > file_limit:
+        warning_message = (
+            "Warning! The total number of files was {} "
+            "but this script will only process {}.".format(
+                num_files, file_limit
+            )
+        )
+
+    print("PART 7")
+
+    # Warning message about files that were skipped
+    # to prevent overwriting similarly-named files.
+    skip_msg = (
+        "There were files with the same name which were requested "
+        "to be download to the same directory. To avoid overwriting "
+        "the previous downloaded one they were skipped.\n"
+        "Please use the parameter 'download_structure' "
+        "to set up unique directories for them."
+    )
+    if min(num_files, file_limit) > len(file_list):
+        if warning_message:
+            warning_message = "{}\n{}".format(warning_message, skip_msg)
+        else:
+            warning_message = skip_msg
+
+    print("PART 8")
+
+    # Build wget script
+    current_datetime = datetime.datetime.now()
+    timestamp = current_datetime.strftime("%Y/%m/%d %H:%M:%S")
+
+    context = dict(
+        timestamp=timestamp,
+        url_params=[dsid],
+        files=file_list,
+        warning_message=warning_message,
+    )
+
+    print("PART 9")
+    wget_script = render(request, script_template_file, context)
+
+    script_filename = current_datetime.strftime("wget-%Y%m%d%H%M%S.sh")
+    response_content = "attachment; filename={}".format(script_filename)
+
+    response = HttpResponse(wget_script, content_type="text/x-sh")
+    response["Content-Disposition"] = response_content
+    return response
 
 
 def do_post(request, urlbase):
diff --git a/backend/metagrid/api_proxy/wget/query_utils.py b/backend/metagrid/api_proxy/wget/query_utils.py
new file mode 100644
index 000000000..95d7f92df
--- /dev/null
+++ b/backend/metagrid/api_proxy/wget/query_utils.py
@@ -0,0 +1,295 @@
+import csv
+import json
+import os
+import urllib.parse
+import urllib.request
+import xml.etree.ElementTree as ET
+from io import StringIO
+
+from django.conf import settings
+
+# reserved query keywords
+OFFSET = "offset"
+LIMIT = "limit"
+QUERY = "query"
+DISTRIB = "distrib"
+SHARDS = "shards"
+FROM = "from"
+TO = "to"
+SORT = "sort"
+SIMPLE = "simple"
+
+KEYWORDS = [OFFSET, LIMIT, QUERY, DISTRIB, SHARDS, FROM, TO, SORT, SIMPLE]
+
+# standard metadata fields, always included for each result (if available)
+FIELD_ID = "id"
+FIELD_TYPE = "type"
+FIELD_REPLICA = "replica"
+FIELD_RETRACTED = "retracted"
+FIELD_LATEST = "latest"
+FIELD_MASTER_ID = "master_id"
+FIELD_INSTANCE_ID = "instance_id"
+FIELD_DRS_ID = "drs_id"
+FIELD_TITLE = "title"
+FIELD_DESCRIPTION = "description"
+FIELD_TIMESTAMP = "timestamp"
+FIELD_TIMESTAMP_ = "_timestamp"
+FIELD_URL = "url"
+FIELD_ACCESS = "access"
+FIELD_XLINK = "xlink"
+FIELD_SIZE = "size"
+FIELD_DATASET_ID = "dataset_id"
+FIELD_TRACKING_ID = "tracking_id"
+FIELD_VERSION = "version"
+FIELD_VERSION_ = "_version_"
+FIELD_MAX_VERSION = "max_version"
+FIELD_MIN_VERSION = "min_version"
+FIELD_SCORE = "score"
+FIELD_UNITS = "units"
+
+FIELD_CHECKSUM = "checksum"
+FIELD_CHECKSUM_TYPE = "checksum_type"
+FIELD_INDEX_NODE = "index_node"
+FIELD_DATA_NODE = "data_node"
+FIELD_NUMBER_OF_FILES = "number_of_files"
+FIELD_NUMBER_OF_AGGREGATIONS = "number_of_aggregations"
+FIELD_DATASET_ID_TEMPLATE = "dataset_id_template_"
+FIELD_DATETIME_START = "datetime_start"
+FIELD_DATETIME_STOP = "datetime_stop"
+FIELD_TEXT = "text"
+
+# special query fields for open search geo extension
+FIELD_BBOX = "bbox"  # west, south, east, north
+FIELD_LAT = "lat"
+FIELD_LON = "lon"
+FIELD_LOCATION = "location"
+FIELD_RADIUS = "radius"
+FIELD_POLYGON = "polygon"
+FIELD_EAST_DEGREES = "east_degrees"
+FIELD_WEST_DEGREES = "west_degrees"
+FIELD_NORTH_DEGREES = "north_degrees"
+FIELD_SOUTH_DEGREES = "south_degrees"
+FIELD_HEIGHT_BOTTOM = "height_bottom"
+FIELD_HEIGHT_TOP = "height_top"
+FIELD_HEIGHT_UNITS = "height_units"
+FIELD_VARIABLE_UNITS = "variable_units"
+FIELD_GEO = "geo"
+FIELD_GEO_UNITS = "geo_units"
+
+# special query fields for open search time extension
+FIELD_START = "start"
+FIELD_END = "end"
+
+# special query fields for the wget scirpt generator
+FIELD_WGET_PATH = "download_structure"
+FIELD_WGET_EMPTYPATH = "download_emptypath"
+
+# fields that specify project
+FIELD_PROJECT = "project"
+FIELD_MIP_ERA = "mip_era"
+
+# fields that are always allowed in queries, in addition to configured facets
+CORE_QUERY_FIELDS = [
+    FIELD_ID,
+    FIELD_TYPE,
+    FIELD_REPLICA,
+    FIELD_RETRACTED,
+    FIELD_LATEST,
+    FIELD_MASTER_ID,
+    FIELD_INSTANCE_ID,
+    FIELD_DRS_ID,
+    FIELD_TITLE,
+    FIELD_DESCRIPTION,
+    FIELD_TIMESTAMP,
+    FIELD_TIMESTAMP_,
+    FIELD_URL,
+    FIELD_XLINK,
+    FIELD_SIZE,
+    FIELD_NUMBER_OF_FILES,
+    FIELD_NUMBER_OF_AGGREGATIONS,
+    FIELD_DATASET_ID,
+    FIELD_TRACKING_ID,
+    FIELD_ACCESS,
+    FIELD_VERSION,
+    FIELD_MAX_VERSION,
+    FIELD_MIN_VERSION,
+    FIELD_CHECKSUM,
+    FIELD_CHECKSUM_TYPE,
+    FIELD_DATA_NODE,
+    FIELD_INDEX_NODE,
+    FIELD_BBOX,
+    FIELD_LAT,
+    FIELD_LON,
+    FIELD_RADIUS,
+    FIELD_POLYGON,
+    FIELD_START,
+    FIELD_END,
+    FIELD_WGET_PATH,
+    FIELD_WGET_EMPTYPATH,
+    FIELD_PROJECT,
+    FIELD_MIP_ERA,
+]
+
+# fields that should NOT be used as facets
+NOT_FACETS = [
+    FIELD_ID,
+    FIELD_MASTER_ID,
+    FIELD_INSTANCE_ID,
+    FIELD_DATASET_ID,
+    FIELD_DATASET_ID_TEMPLATE,
+    FIELD_DRS_ID,
+    FIELD_DATETIME_START,
+    FIELD_DATETIME_STOP,
+    FIELD_EAST_DEGREES,
+    FIELD_WEST_DEGREES,
+    FIELD_NORTH_DEGREES,
+    FIELD_SOUTH_DEGREES,
+    FIELD_BBOX,
+    FIELD_LAT,
+    FIELD_LON,
+    FIELD_RADIUS,
+    FIELD_POLYGON,
+    FIELD_HEIGHT_BOTTOM,
+    FIELD_HEIGHT_TOP,
+    FIELD_HEIGHT_UNITS,
+    FIELD_LATEST,
+    FIELD_REPLICA,
+    FIELD_RETRACTED,
+    FIELD_NUMBER_OF_FILES,
+    FIELD_NUMBER_OF_AGGREGATIONS,
+    FIELD_TRACKING_ID,
+    FIELD_TIMESTAMP,
+    FIELD_TITLE,
+    FIELD_DESCRIPTION,
+    FIELD_URL,
+    FIELD_XLINK,
+    FIELD_SIZE,
+    FIELD_TEXT,
+    FIELD_TYPE,
+    FIELD_VARIABLE_UNITS,
+    FIELD_GEO,
+    FIELD_GEO_UNITS,
+    FIELD_TIMESTAMP_,
+    FIELD_VERSION_,
+    FIELD_SCORE,
+    FIELD_UNITS,
+]
+
+# unsupported fields
+UNSUPPORTED_FIELDS = [
+    FIELD_LAT,
+    FIELD_LON,
+    FIELD_LOCATION,
+    FIELD_RADIUS,
+    FIELD_POLYGON,
+]
+
+# ID fields
+ID_FIELDS = [FIELD_ID, FIELD_DATASET_ID, FIELD_MASTER_ID, FIELD_INSTANCE_ID]
+
+
+def split_value(value):
+    """
+    Utility method to split an HTTP parameter value into comma-separated
+    values but keep intact patterns such as "CESM1(CAM5.1,FV2)
+    """
+
+    # first split by comma
+    values = [v.strip() for v in value.split(",")]
+    values_length = len(values)
+
+    if len(values) == 1:  # no splitting occurred
+        return values
+    else:  # possibly re-assemble broken pieces
+        _values = []
+        i = 0
+        while i < values_length:
+            if i < values_length - 1:
+                if (
+                    values[i].find("(") >= 0
+                    and values[i].find(")") < 0
+                    and values[i + 1].find(")") >= 0
+                    and values[i + 1].find("(") < 0
+                ):
+                    _values.append(
+                        values[i] + "," + values[i + 1]
+                    )  # re-assemble
+                    i += 1  # skip next value
+                elif (
+                    values[i].find("[") >= 0
+                    and values[i].find("]") < 0
+                    and values[i + 1].find("]") >= 0
+                    and values[i + 1].find("[") < 0
+                ):
+                    _values.append(
+                        values[i] + "," + values[i + 1]
+                    )  # re-assemble
+                    i += 1  # skip next value
+                elif (
+                    values[i].find("{") >= 0
+                    and values[i].find("}") < 0
+                    and values[i + 1].find("}") >= 0
+                    and values[i + 1].find("{") < 0
+                ):
+                    _values.append(
+                        values[i] + "," + values[i + 1]
+                    )  # re-assemble
+                    i += 1  # skip next value
+                else:
+                    _values.append(values[i])
+            else:
+                _values.append(values[i])
+            i += 1
+
+        # convert listo into array
+        return _values
+
+
+def get_solr_shards_from_xml():
+    """
+    Get Solr shards from the XML file specified in the settings
+    as ESGF_SOLR_SHARDS_XML
+    """
+
+    shard_list = []
+    if os.path.isfile(settings.ESGF_SOLR_SHARDS_XML):
+        tree = ET.parse(settings.ESGF_SOLR_SHARDS_XML)
+        root = tree.getroot()
+        for value in root:
+            shard_list.append(value.text)
+    return shard_list
+
+
+def get_allowed_projects_from_json():
+    """
+    Get allowed ESGF projects from the JSON file specified in the settings
+    as ESGF_ALLOWED_PROJECTS_JSON
+    """
+
+    allowed_projects_list = []
+    if os.path.isfile(settings.ESGF_ALLOWED_PROJECTS_JSON):
+        with open(settings.ESGF_ALLOWED_PROJECTS_JSON, "r") as js:
+            data = json.load(js)
+            allowed_projects_list = data["allowed_projects"]
+    return allowed_projects_list
+
+
+def get_facets_from_solr():
+    """
+    Get valid facets currently used by the dataset Solr.
+    """
+
+    query_url = settings.ESGF_SOLR_URL + "/datasets/select"
+    query_params = dict(q="*:*", wt="csv", rows=0)
+
+    query_encoded = urllib.parse.urlencode(query_params, doseq=True).encode()
+    req = urllib.request.Request(query_url, query_encoded)
+    with urllib.request.urlopen(req) as response:
+        results = StringIO(response.read().decode())
+        reader = csv.reader(results, delimiter=",")
+        facets = next(reader)
+
+    # Remove fields that should NOT be used as facets
+    _facets = [f for f in facets if f not in NOT_FACETS]
+
+    return _facets
diff --git a/backend/metagrid/api_proxy/wget/wget-simple-template.sh b/backend/metagrid/api_proxy/wget/wget-simple-template.sh
new file mode 100644
index 000000000..f1e823699
--- /dev/null
+++ b/backend/metagrid/api_proxy/wget/wget-simple-template.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+download_files=(
+{% spaceless %}{% for filename, file in files.items %}'{{file.url}}'
+{% endfor %}{% endspaceless %}
+)
+
+for i in "${download_files[@]}"
+do
+   wget $i
+done
\ No newline at end of file
diff --git a/backend/metagrid/api_proxy/wget/wget-template.sh b/backend/metagrid/api_proxy/wget/wget-template.sh
new file mode 100644
index 000000000..87a970dbe
--- /dev/null
+++ b/backend/metagrid/api_proxy/wget/wget-template.sh
@@ -0,0 +1,384 @@
+#!/bin/bash
+##############################################################################
+# ESGF wget download script
+#
+# Template version: 0.4
+# Generated by {{request.get_host}} - {{timestamp}}
+# Search URL: {{request.build_absolute_uri}}
+# Request method: {{request.method}}
+# 
+###############################################################################
+# first be sure it's bash... anything out of bash or sh will break
+# and the test will assure we are not using sh instead of bash
+if [ $BASH ] && [ `basename $BASH` != bash ]; then
+    echo "######## This is a bash script! ##############" 
+    echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh."
+    echo "Trying to recover automatically..."
+    sleep 1
+    /bin/bash $0 $@
+    exit $?
+fi
+
+version=0.4
+CACHE_FILE=.$(basename $0).status
+search_url='{{request.build_absolute_uri}}'
+request_method='{{request.method}}'
+{% spaceless %}{% if url_params|length > 0 %}
+url_params=(
+    {% spaceless %}{% for url_param in url_params %}    '{{ url_param }}'
+{% endfor %}{% endspaceless %}
+)
+{% endif %}{% endspaceless %}
+
+#These are the embedded files to be downloaded
+download_files="$(cat <<EOF--dataset.file.url.chksum_type.chksum
+{% spaceless %}{% for filename, file in files.items %}'{{filename}}' '{{file.url}}' '{{file.checksum_type}}' '{{file.checksum}}'
+{% endfor %}{% endspaceless %}
+EOF--dataset.file.url.chksum_type.chksum
+)"
+
+check_os() {
+    local os_name=$(uname | awk '{print $1}')
+    case ${os_name} in
+        Linux)
+            ((debug)) && echo "Linux operating system detected"
+            LINUX=1
+            MACOSX=0
+            ;;
+        Darwin)
+            ((debug)) && echo "Mac OS X operating system detected"
+            LINUX=0
+            MACOSX=1
+            ;;
+        *)
+            echo "Unrecognized OS [${os_name}]"
+            return 1
+            ;;
+    esac
+    return 0
+}
+
+#taken from http://stackoverflow.com/a/4025065/1182464
+vercomp () {
+    if [[ $1 == $2 ]]
+    then
+        return 0
+    fi
+    local IFS=.
+    local i ver1=($1) ver2=($2)
+    # fill empty fields in ver1 with zeros
+    for ((i={{'${#ver1[@]}'}}; i<{{'${#ver2[@]}'}}; i++))
+    do
+        ver1[i]=0
+    done
+    for ((i=0; i<{{'${#ver1[@]}'}}; i++))
+    do
+        if [[ -z {{'${ver2[i]}'}} ]]
+        then
+            # fill empty fields in ver2 with zeros
+            ver2[i]=0
+        fi
+        if (({{'10#${ver1[i]}'}} > {{'10#${ver2[i]}'}}))
+        then
+            return 1
+        fi
+        if (({{'10#${ver1[i]}'}} < {{'10#${ver2[i]}'}}))
+        then
+            return 2
+        fi
+    done
+    return 0
+}
+
+check_commands() {
+    #check wget
+    local MIN_WGET_VERSION=1.10
+    vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION
+    case $? in
+        2) #lower
+            wget -V
+            echo
+            echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2
+            exit 1
+    esac
+}
+
+usage() {
+    echo "Usage: $(basename $0) [flags]"
+    echo "Flags is one of:"
+    sed -n '/^while getopts/,/^done/  s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0
+    echo
+    echo "This command stores the states of the downloads in .$0.status"
+}
+
+#defaults
+debug=0
+clean_work=1
+
+#parse flags
+while getopts 'F:w:iuUnSpdvqh' OPT; do
+    case $OPT in
+        F) input_file="$OPTARG";;       #<file> : read input from file instead of the embedded one (use - to read from stdin)
+        w) output="$OPTARG";;           #<file> : Write embedded files into a file and exit
+        i) insecure=1;;                 #       : set insecure mode, i.e. don't check server certificate
+        u) update=1;;                   #       : Issue the search again and see if something has changed.
+        U) update_files=1;;             #       : Update files from server overwriting local ones (detect with -u)
+        n) dry_run=1;;                  #       : Don't download any files, just report.
+        S) skip_checksum=1;;            #       : Skip file checksum
+        p) clean_work=0;;               #       : preserve data that failed checksum
+        d) verbose=1;debug=1;;          #       : display debug information
+        v) verbose=1;;                  #       : be more verbose
+        q) quiet=1;;                    #       : be less verbose
+        h) usage && exit 0;;            #       : displays this help
+        \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;;
+        \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;;
+    esac
+done
+shift $(($OPTIND - 1))
+
+#setup input as desired by the user
+if [[ "$input_file" ]]; then
+    if [[ "$input_file" == '-' ]]; then
+        download_files="$(cat)" #read from STDIN
+        exec 0</dev/tty #reopen STDIN as cat closed it
+    else
+        download_files="$(cat $input_file)" #read from file
+    fi
+fi
+
+#if -w (output) was selected write file and finish:
+if [[ "$output" ]]; then
+    #check the file
+    if [[ -f "$output" ]]; then
+        read -p "Overwrite existing file $output? (y/N) " answ
+        case $answ in y|Y|yes|Yes);; *) echo "Aborting then..."; exit 0;; esac
+    fi
+    echo "$download_files">$output
+    exit
+fi
+
+#assure we have everything we need
+check_commands
+
+if ((update)); then
+    echo "Checking the server for changes..."
+    {% spaceless %}{% if request.method == "POST" and url_params|length > 0 %}
+    post_data=$(IFS="&" ; echo "${url_params[*]}")
+    new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)"
+    {% else %}
+    new_wget="$(wget "$search_url" -qO -)"
+    {% endif %}{% endspaceless %}
+    compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'"
+    if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then
+        echo "No changes detected."
+    else
+        echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)"
+        counter=0
+        while [[ -f $0.old.$counter ]]; do ((counter++)); done
+        mv $0 $0.old.$counter
+        echo "$new_wget" > $0
+    fi
+    exit 0      
+fi
+
+check_chksum() {
+    local file="$1"
+    local chk_type=$2
+    local chk_value=$3
+    local local_chksum=Unknown
+
+    case $chk_type in
+        md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");;
+        sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);;
+        *) echo "Can't verify checksum." && return 0;;
+    esac
+
+    #verify
+    ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2
+    echo $local_chksum
+}
+
+#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x)
+md5sum_() {
+    hash -r
+    if type md5sum >& /dev/null; then
+        echo $(md5sum $@)
+    else
+        echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p')
+    fi
+}
+
+#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x)
+sha256sum_() {
+    hash -r
+    if type sha256sum >& /dev/null; then
+        echo $(sha256sum $@)
+    elif type shasum >& /dev/null; then
+        echo $(shasum -a 256 $@)
+    else
+        echo $(sha2 -q -256 $@)
+    fi
+}
+
+get_mod_time_() {
+    if ((MACOSX)); then
+        #on a mac modtime is stat -f %m <file>
+        echo "$(stat -f %m $@)"
+    else
+        #on linux (cygwin) modtime is stat -c %Y <file>
+        echo "$(stat -c %Y $@)"
+    fi
+    return 0;
+}
+
+remove_from_cache() {
+    local entry="$1"
+    local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")"
+    echo "$tmp_file" > "$CACHE_FILE"
+    unset cached
+}
+
+download() {
+    wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}"
+    
+    while read line
+    do
+        # read csv here document into proper variables
+        eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) )
+
+        #Process the file
+        echo -n "$file ..."
+
+        #get the cached entry if any.
+        cached="$(grep -e "^$file" "$CACHE_FILE")"
+        
+        #if we have the cache entry but no file, clean it.
+        if [[ ! -f $file && "$cached" ]]; then
+            #the file was removed, clean the cache
+            remove_from_cache "$file"
+            unset cached
+        fi
+        
+        #check it wasn't modified
+        if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then
+                    if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then
+                echo "Already downloaded and verified"
+                continue
+            elif ((update_files)); then
+                #user want's to overwrite newer files
+                rm $file
+                remove_from_cache "$file"
+                unset cached
+            else
+                #file on server is different from what we have. 
+                echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite"
+                continue
+            fi
+        fi
+        unset chksum_err_value chksum_err_count
+
+        while : ; do
+            # (if we had the file size, we could check before trying to complete)
+            echo "Downloading"
+            [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")"
+            if ((dry_run)); then
+                #all important info was already displayed, if in dry_run mode just abort
+                #No status will be stored
+                break
+            else
+                $wget -O "$file" $url || { failed=1; break; }  
+            fi
+
+            #check if file is there
+            if [[ -f $file ]]; then
+                ((debug)) && echo file found
+                if ((skip_checksum)); then
+                    echo "Skipping check of file checksum"
+                    break
+                fi
+                if [[ ! "$chksum" ]]; then
+                    echo "Checksum not provided, can't verify file integrity"
+                    break
+                fi
+                result_chksum=$(check_chksum "$file" $chksum_type $chksum)
+                if [[ "$result_chksum" != "$chksum" ]]; then
+                    echo "  $chksum_type failed!"
+                    if ((clean_work)); then
+                        if !((chksum_err_count)); then
+                                chksum_err_value=$result_chksum
+                                chksum_err_count=2
+                            elif ((checksum_err_count--)); then
+                                if [[ "$result_chksum" != "$chksum_err_value" ]]; then
+                                    #this is a real transmission problem
+                                    chksum_err_value=$result_chksum
+                                    chksum_err_count=2
+                                fi
+                            else
+                                #ok if here we keep getting the same "different" checksum
+                                echo "The file returns always a different checksum!"
+                                echo "Contact the data owner to verify what is happening."
+                                echo
+                                sleep 1
+                                break
+                            fi
+                        
+                            rm $file
+                            #try again
+                            echo -n "  re-trying..."
+                            continue
+                    else
+                            echo "  don't use -p or remove manually."
+                    fi
+                else
+                    echo "  $chksum_type ok. done!"
+                    echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE
+                fi
+            fi
+            #done!
+            break
+        done
+        
+        if ((failed)); then
+            echo "download failed"
+            unset failed
+        fi
+        
+    done <<<"$download_files"
+
+}
+
+dedup_cache_() {
+    local file=${1:-${CACHE_FILE}}
+    ((debug)) && echo "dedup'ing cache ${file} ..."
+    local tmp=$(LC_ALL='C' sort  -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2)
+    ((DEBUG)) && echo "$tmp"
+    echo "$tmp" > $file
+    ((debug)) && echo "(cache dedup'ed)"
+}
+
+#do we have old results? Create the file if not
+[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE
+
+#
+# MAIN
+#
+
+echo "Running $(basename $0) version: $version"
+((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly"
+echo "Use $(basename $0) -h for help."$'\n'
+
+cat <<'EOF-MESSAGE'
+{% if warning_message %}{{ warning_message|safe }}
+{% endif %}Script created for {{ files|length }} file(s)
+(The count won't match if you manually edit this file!)
+
+EOF-MESSAGE
+sleep 1
+
+check_os
+
+download
+
+dedup_cache_
+
+echo "done"
\ No newline at end of file
diff --git a/backend/requirements/base.txt b/backend/requirements/base.txt
index dbff42c98..cd4630940 100644
--- a/backend/requirements/base.txt
+++ b/backend/requirements/base.txt
@@ -34,7 +34,9 @@ pre-commit==3.7.1  # https://github.com/pre-commit/pre-commit
 # ------------------------------------------------------------------------------
 django-model-utils==4.5.1  # https://github.com/jazzband/django-model-utils
 django_unique_upload==0.2.1   # https://github.com/agconti/django-unique-upload
-globus-sdk==3.46.0
+globus-sdk==3.65.0  #
+esgcet==5.3.5  #
+globus-cli==3.39.0  #
 
 # SBOM
 # ------------------------------------------------------------------------------
diff --git a/docker-compose.yml b/docker-compose.yml
index 2ab49f048..31adf5295 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -57,6 +57,7 @@ services:
     volumes:
       - ./backend:/app
     environment:
+      WGET_SCRIPT_FILE_DEFAULT_LIMIT: 1000
       PGHOST: postgres
       PGPASSWORD: postgres
       PGUSER: postgres
diff --git a/docs/docs/users/configurable_environment_variables.md b/docs/docs/users/configurable_environment_variables.md
index 7c88f5e29..5556911a2 100644
--- a/docs/docs/users/configurable_environment_variables.md
+++ b/docs/docs/users/configurable_environment_variables.md
@@ -21,14 +21,82 @@
 >
 >     `https://api.stac.esgf-west.org/`
 
-#### `METAGRID_WGET_URL`
+#### `METAGRID_DATA_UPLOAD_MAX_NUMBER_FIELDS`
 
-> !!! example "**Required**"
->     The URL at which the ESG-Search wget endpoint can be reached.
+> !!! example "*Optional*"
+>     __Default:__ `1024`
+>
+>     Maximum number of form fields allowed in a single upload. Useful for large wget payloads.
+>
+>     __Example Values__
+>
+>     `1024`
+
+#### `METAGRID_ESGF_SOLR_URL`
+
+> !!! example "*Optional*"
+>     __Default:__ `None`
+>
+>     Address of the ESGF Solr endpoint used by the wget helper logic.
+>
+>     __Example Values__
+>
+>     `https://esgf-node.llnl.gov/esg-search/solr`
+
+#### `METAGRID_ESGF_SOLR_SHARDS_XML`
+
+> !!! example "*Optional*"
+>     __Default:__ `None`
+>
+>     Path to the XML file containing Solr shards configuration used to resolve mirrors/shards.
+>
+>     __Example Values__
+>
+>     `/etc/metagrid/solr_shards.xml`
+
+#### `METAGRID_ESGF_ALLOWED_PROJECTS_JSON`
+
+> !!! example "*Optional*"
+>     __Default:__ `None`
+>
+>     Path to a JSON file that lists allowed projects for wget/dataset access checks.
+>
+>     __Example Values__
+>
+>     `/etc/metagrid/wget_allowed_projects.json`
+
+#### `METAGRID_WGET_SCRIPT_FILE_DEFAULT_LIMIT`
+
+> !!! example "*Optional*"
+>     __Default:__ `1000`
+>
+>     Default limit on the number of files allowed in a generated wget script.
+>
+>     __Example Values__
+>
+>     `1000`
+
+#### `METAGRID_WGET_SCRIPT_FILE_MAX_LIMIT`
+
+> !!! example "*Optional*"
+>     __Default:__ `100000`
+>
+>     Maximum number of files allowed in a generated wget script.
+>
+>     __Example Values__
+>
+>     `100000`
+
+#### `METAGRID_WGET_MAX_DIR_LENGTH`
+
+> !!! example "*Optional*"
+>     __Default:__ `50`
+>
+>     Maximum character length for facet values when creating directory names for wget downloads.
 >
 >     __Example Values__
 >
->     `https://esgf-node.llnl.gov/esg-search/wget`
+>     `50`
 
 #### `METAGRID_KEYCLOAK_CLIENT_ID`
 

From 2c545cf709505e42ddd27cdc6a47314a547b2f9e Mon Sep 17 00:00:00 2001
From: downiec <42552189+downiec@users.noreply.github.com>
Date: Tue, 4 Nov 2025 18:07:04 -0800
Subject: [PATCH 2/5] Finished adding files, and fixing issues with the wget
 downloads, downloads are now working properly. Need to add a few tests to
 increase test coverage.

---
 backend/Dockerfile                            |   4 +-
 backend/config/settings/site_specific.py      |  20 +-
 backend/config/settings/static.py             |   7 +-
 backend/config/urls.py                        |   2 +-
 .../metagrid/api_proxy/tests/test_views.py    |  12 -
 backend/metagrid/api_proxy/views.py           | 214 +-----------------
 backend/metagrid/wget/__init__.py             |   0
 .../{api_proxy => }/wget/query_utils.py       |  60 -----
 .../templates}/wget-simple-template.sh        |   2 +-
 .../wget => wget/templates}/wget-template.sh  |  24 +-
 backend/metagrid/wget/tests/__init__.py       |   0
 backend/metagrid/wget/tests/test_views.py     | 107 +++++++++
 backend/metagrid/wget/views.py                | 184 +++++++++++++++
 .../configurable_environment_variables.md     |  30 +--
 14 files changed, 321 insertions(+), 345 deletions(-)
 create mode 100644 backend/metagrid/wget/__init__.py
 rename backend/metagrid/{api_proxy => }/wget/query_utils.py (78%)
 rename backend/metagrid/{api_proxy/wget => wget/templates}/wget-simple-template.sh (97%)
 rename backend/metagrid/{api_proxy/wget => wget/templates}/wget-template.sh (97%)
 create mode 100644 backend/metagrid/wget/tests/__init__.py
 create mode 100644 backend/metagrid/wget/tests/test_views.py
 create mode 100644 backend/metagrid/wget/views.py

diff --git a/backend/Dockerfile b/backend/Dockerfile
index cb290aaff..64d303347 100644
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -13,8 +13,8 @@ RUN apt-get update \
     && apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false \
     && rm -rf /var/lib/apt/lists/*
 
-RUN addgroup --system django \
-    && adduser --system --ingroup django django
+RUN groupadd --system --gid 1000 django && \
+    useradd --system --create-home --uid 1000 --gid django django
 
 # Requirements are installed here to ensure they will be cached.
 COPY requirements /requirements
diff --git a/backend/config/settings/site_specific.py b/backend/config/settings/site_specific.py
index 6bc122368..6cc86acdf 100644
--- a/backend/config/settings/site_specific.py
+++ b/backend/config/settings/site_specific.py
@@ -32,22 +32,10 @@ class MetagridBackendSettings(BaseSettings):
     )
 
     # === wget related settings ===
-    ESGF_SOLR_URL: Optional[str] = Field(
-        default=None,
-        description="Address of the ESGF Solr endpoint used by the wget helper logic.",
-        examples=["https://esgf-node.llnl.gov/esg-search/solr"],
-    )
-
-    ESGF_SOLR_SHARDS_XML: Optional[str] = Field(
-        default=None,
-        description="Path to the XML file containing Solr shards configuration used to resolve mirrors/shards.",
-        examples=["/etc/metagrid/solr_shards.xml"],
-    )
-
-    ESGF_ALLOWED_PROJECTS_JSON: Optional[str] = Field(
-        default=None,
-        description="Path to a JSON file that lists allowed projects for wget/dataset access checks.",
-        examples=["/etc/metagrid/wget_allowed_projects.json"],
+    GLOBUS_PUBLIC_INDEX_ENDPOINT_ID: str = Field(
+        default="a8ef4320-9e5a-4793-837b-c45161ca1845",
+        description="The Globus index ID for the public ESGF2 data.",
+        examples=["a8ef4320-9e5a-4793-837b-c45161ca1845"],
     )
 
     WGET_SCRIPT_FILE_DEFAULT_LIMIT: int = Field(
diff --git a/backend/config/settings/static.py b/backend/config/settings/static.py
index c6062c168..b1d397aa8 100644
--- a/backend/config/settings/static.py
+++ b/backend/config/settings/static.py
@@ -9,6 +9,8 @@
     environ.Path(__file__) - 3
 )  # (config/settings/django.py - 3 = metagrid/)
 
+TEMPLATE_DIR = ROOT_DIR("metagrid", "wget", "templates")
+
 
 class DjangoStaticSettings(BaseSettings):
     """Django settings that do not vary by site"""
@@ -96,11 +98,12 @@ class DjangoStaticSettings(BaseSettings):
             "DATABASE_URL": "pgsql:///postgres",
         }
     }
+
     STATIC_ROOT: str = ROOT_DIR("staticfiles")
     # https://docs.djangoproject.com/en/dev/ref/settings/#static-url
     STATIC_URL: str = "/static/"
     # https://docs.djangoproject.com/en/dev/ref/contrib/staticfiles/#std:setting-STATICFILES_DIRS
-    STATICFILES_DIRS: Sequence[str] = []
+    STATICFILES_DIRS: Sequence[str] = [TEMPLATE_DIR]
     # https://docs.djangoproject.com/en/dev/ref/contrib/staticfiles/#staticfiles-finders
     STATICFILES_FINDERS: Sequence[str] = [
         "django.contrib.staticfiles.finders.FileSystemFinder",
@@ -121,7 +124,7 @@ class DjangoStaticSettings(BaseSettings):
         {
             "BACKEND": "django.template.backends.django.DjangoTemplates",
             "DIRS": STATICFILES_DIRS,
-            "APP_DIRS": False,
+            "APP_DIRS": True,
             "OPTIONS": {
                 "context_processors": [
                     "django.template.context_processors.debug",
diff --git a/backend/config/urls.py b/backend/config/urls.py
index 8c64c5655..d6be01b40 100644
--- a/backend/config/urls.py
+++ b/backend/config/urls.py
@@ -24,7 +24,6 @@
     do_search,
     do_stac_search,
     do_status,
-    do_wget,
     fetch_stac_aggregations,
     get_frontend_config,
     get_temp_storage,
@@ -34,6 +33,7 @@
 from metagrid.observability.views import liveness, readiness
 from metagrid.projects.views import ProjectsViewSet
 from metagrid.users.views import UserCreateViewSet, UserViewSet
+from metagrid.wget.views import do_wget
 
 router = DefaultRouter()
 router.register(r"users", UserViewSet)
diff --git a/backend/metagrid/api_proxy/tests/test_views.py b/backend/metagrid/api_proxy/tests/test_views.py
index 8e873a4d1..dda8347b9 100644
--- a/backend/metagrid/api_proxy/tests/test_views.py
+++ b/backend/metagrid/api_proxy/tests/test_views.py
@@ -79,18 +79,6 @@ def test_globus_auth_logout(self):
         response = self.client.get(url)
         self.assertEqual(response.status_code, status.HTTP_302_FOUND)
 
-    # @responses.activate
-    # def test_wget(self):
-    #     url = reverse("do-wget")
-    #     responses.get(settings.WGET_URL)
-    #     response = self.client.get(
-    #         url,
-    #         {
-    #             "dataset_id": "CMIP6.CMIP.IPSL.IPSL-CM6A-LR.abrupt-4xCO2.r12i1p1f1.Amon.n2oglobal.gr.v20191003|esgf-data1.llnl.gov"
-    #         },
-    #     )
-    #     assert response.status_code == status.HTTP_200_OK
-
     @responses.activate
     def test_search(self):
         url = reverse("do-search")
diff --git a/backend/metagrid/api_proxy/views.py b/backend/metagrid/api_proxy/views.py
index 3a75b2252..d4ddf6f80 100644
--- a/backend/metagrid/api_proxy/views.py
+++ b/backend/metagrid/api_proxy/views.py
@@ -1,6 +1,4 @@
-import datetime
 import json
-import os
 from urllib.parse import urlparse
 
 import globus_sdk
@@ -8,10 +6,9 @@
 from django.conf import settings
 from django.contrib.auth import logout
 from django.http import HttpResponse, HttpResponseBadRequest, JsonResponse
-from django.shortcuts import redirect, render
+from django.shortcuts import redirect
 from django.views.decorators.csrf import csrf_exempt
 from django.views.decorators.http import require_http_methods
-from esgcet.globus_query import ESGGlobusQuery
 from globus_portal_framework.gclients import load_transfer_client
 from rest_framework.decorators import api_view, permission_classes
 from rest_framework.response import Response
@@ -19,15 +16,6 @@
 
 from config.settings.site_specific import MetagridFrontendSettings
 
-from .wget.query_utils import (  # get_allowed_projects_from_json,
-    CORE_QUERY_FIELDS,
-    FIELD_WGET_EMPTYPATH,
-    FIELD_WGET_PATH,
-    KEYWORDS,
-    SIMPLE,
-    UNSUPPORTED_FIELDS,
-)
-
 
 @api_view()
 @permission_classes([])
@@ -164,206 +152,6 @@ def do_status(request):
         return HttpResponseBadRequest(resp.text)
 
 
-@require_http_methods(["GET", "POST"])
-@csrf_exempt
-def do_wget(request):  # noqa: C901
-
-    file_limit = settings.WGET_SCRIPT_FILE_DEFAULT_LIMIT
-    # file_offset = 0
-    # use_sort = False
-    # use_distrib = True
-    # requested_shards = []
-    wget_path_facets = []
-    wget_empty_path = ""
-    script_template_file = "wget-template.sh"
-
-    # allowed_projects = get_allowed_projects_from_json()
-
-    # querys = []
-    # file_query = ["type:File"]
-
-    # Gather dataset_ids and other parameters
-    if request.method == "POST":
-        url_params = json.loads(request.body)
-        print("POST url_params:", url_params)
-    elif request.method == "GET":
-        url_params = request.GET.copy()
-        print("GET url_params:", url_params)
-    else:
-        return HttpResponseBadRequest("Request method must be POST or GET.")
-
-    # If no parameters were passed to the API,
-    # then default to limit=1 and distrib=false
-    if len(url_params.keys()) == 0:
-        url_params.update(dict(limit=1, distrib="false"))
-
-    print("PART 1")
-
-    # Catch invalid parameters
-    for param in url_params.keys():
-        if param[-1] == "!":
-            param = param[:-1]
-        if param not in KEYWORDS and param not in CORE_QUERY_FIELDS:
-            msg = "Invalid HTTP query parameter=%s" % param
-            return HttpResponseBadRequest(msg)
-
-    print("PART 2")
-
-    # Catch unsupported fields
-    for uf in UNSUPPORTED_FIELDS:
-        if url_params.get(uf):
-            msg = "Unsupported parameter: %s" % uf
-            return HttpResponseBadRequest(msg)
-
-    print("PART 3")
-
-    # Create a simplified script that only runs wget on a list of files
-    if url_params.get(SIMPLE):
-        use_simple_param = url_params.pop(SIMPLE)[0].lower()
-        if use_simple_param == "false":
-            script_template_file = "wget-template.sh"
-        elif use_simple_param == "true":
-            script_template_file = "wget-simple-template.sh"
-        else:
-            msg = 'Parameter "%s" must be set to true or false.' % SIMPLE
-            return HttpResponseBadRequest(msg)
-
-    print("PART 4")
-
-    # Get directory structure for downloaded files
-    if url_params.get(FIELD_WGET_PATH):
-        wget_path_facets = url_params.pop(FIELD_WGET_PATH)[0].split(",")
-
-    if url_params.get(FIELD_WGET_EMPTYPATH):
-        wget_empty_path = url_params.pop(FIELD_WGET_EMPTYPATH)[0]
-
-    # Get facets for the file name, URL, checksum
-    # file_attribute_set = set(["title", "url", "checksum_type", "checksum"])
-
-    # Get facets for the download directory structure,
-    # and remove duplicate facets
-    # file_attributes = list(file_attribute_set)
-
-    print("Requested URL params:", url_params)
-
-    # Fetch files for the query
-    file_list = {}
-    dsid = url_params.get("dataset_id", "")
-    if "," in dsid:
-        dsid = dsid.split(",")
-
-    print("PART 5")
-
-    print(f"DEBUG: {dsid} ")
-    try:
-        qo = ESGGlobusQuery(settings.SOCIAL_AUTH_GLOBUS_KEY, "")
-        res = qo.query_file_records(dsid, wget=True)  # , crit=url_params)
-        print("res:", res)
-    except PermissionError as e:
-        # Configuration or filesystem permission issue (e.g. missing shards/allowed-projects files)
-        print("PermissionError while accessing ESGGlobusQuery files:", e)
-        return HttpResponseBadRequest(
-            "Server configuration error: unable to access required ESGF helper files."
-        )
-    except Exception as e:
-        # Generic fallback to avoid unhandled exceptions bubbling up
-        print("Error while querying ESGF metadata via ESGGlobusQuery:", e)
-        return HttpResponseBadRequest(f"Error querying ESGF metadata: {e}")
-    num_files = len(res)
-
-    print(f"DEBUG: Number of files found: {num_files}")
-
-    for file_info in res:
-        filename = file_info["title"]
-        checksum_type = file_info["checksum_type"][0]
-        checksum = file_info["checksum"][0]
-        # Create directory structure from facet values
-        # If the facet is not found, then use the empty path value
-        dir_struct = []
-        for facet in wget_path_facets:
-            facet_value = wget_empty_path
-            if facet in file_info:
-                if isinstance(file_info[facet], list):
-                    facet_value = file_info[facet][0]
-                else:
-                    facet_value = file_info[facet]
-            # Prevent strange values while generating names
-            facet_value = facet_value.replace("['<>?*\"\n\t\r\0]", "")
-            facet_value = facet_value.replace("[ /\\\\|:;]+", "_")
-            # Limit length of value to WGET_MAX_DIR_LENGTH
-            if len(facet_value) > settings.WGET_MAX_DIR_LENGTH:
-                facet_value = facet_value[: settings.WGET_MAX_DIR_LENGTH]
-            dir_struct.append(facet_value)
-        dir_struct.append(filename)
-        file_path = os.path.join(*dir_struct)
-        # Only add a file to the list if its file path is not already present
-        if file_path not in file_list:
-            for url in file_info["url"]:
-                url_split = url.split("|")
-                if url_split[2] == "HTTPServer":
-                    file_entry = dict(
-                        url=url_split[0],
-                        checksum_type=checksum_type,
-                        checksum=checksum,
-                    )
-                    file_list[file_path] = file_entry
-                    break
-
-    print("Part 6")
-
-    # Limit the number of files to the maximum
-    warning_message = None
-    if num_files == 0:
-        return HttpResponse("No files found for datasets.")
-    elif num_files > file_limit:
-        warning_message = (
-            "Warning! The total number of files was {} "
-            "but this script will only process {}.".format(
-                num_files, file_limit
-            )
-        )
-
-    print("PART 7")
-
-    # Warning message about files that were skipped
-    # to prevent overwriting similarly-named files.
-    skip_msg = (
-        "There were files with the same name which were requested "
-        "to be download to the same directory. To avoid overwriting "
-        "the previous downloaded one they were skipped.\n"
-        "Please use the parameter 'download_structure' "
-        "to set up unique directories for them."
-    )
-    if min(num_files, file_limit) > len(file_list):
-        if warning_message:
-            warning_message = "{}\n{}".format(warning_message, skip_msg)
-        else:
-            warning_message = skip_msg
-
-    print("PART 8")
-
-    # Build wget script
-    current_datetime = datetime.datetime.now()
-    timestamp = current_datetime.strftime("%Y/%m/%d %H:%M:%S")
-
-    context = dict(
-        timestamp=timestamp,
-        url_params=[dsid],
-        files=file_list,
-        warning_message=warning_message,
-    )
-
-    print("PART 9")
-    wget_script = render(request, script_template_file, context)
-
-    script_filename = current_datetime.strftime("wget-%Y%m%d%H%M%S.sh")
-    response_content = "attachment; filename={}".format(script_filename)
-
-    response = HttpResponse(wget_script, content_type="text/x-sh")
-    response["Content-Disposition"] = response_content
-    return response
-
-
 def do_post(request, urlbase):
     """Helper function to handle POST requests."""
     if request.method != "POST":  # pragma: no cover
diff --git a/backend/metagrid/wget/__init__.py b/backend/metagrid/wget/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/backend/metagrid/api_proxy/wget/query_utils.py b/backend/metagrid/wget/query_utils.py
similarity index 78%
rename from backend/metagrid/api_proxy/wget/query_utils.py
rename to backend/metagrid/wget/query_utils.py
index 95d7f92df..45fed39c3 100644
--- a/backend/metagrid/api_proxy/wget/query_utils.py
+++ b/backend/metagrid/wget/query_utils.py
@@ -1,13 +1,3 @@
-import csv
-import json
-import os
-import urllib.parse
-import urllib.request
-import xml.etree.ElementTree as ET
-from io import StringIO
-
-from django.conf import settings
-
 # reserved query keywords
 OFFSET = "offset"
 LIMIT = "limit"
@@ -243,53 +233,3 @@ def split_value(value):
 
         # convert listo into array
         return _values
-
-
-def get_solr_shards_from_xml():
-    """
-    Get Solr shards from the XML file specified in the settings
-    as ESGF_SOLR_SHARDS_XML
-    """
-
-    shard_list = []
-    if os.path.isfile(settings.ESGF_SOLR_SHARDS_XML):
-        tree = ET.parse(settings.ESGF_SOLR_SHARDS_XML)
-        root = tree.getroot()
-        for value in root:
-            shard_list.append(value.text)
-    return shard_list
-
-
-def get_allowed_projects_from_json():
-    """
-    Get allowed ESGF projects from the JSON file specified in the settings
-    as ESGF_ALLOWED_PROJECTS_JSON
-    """
-
-    allowed_projects_list = []
-    if os.path.isfile(settings.ESGF_ALLOWED_PROJECTS_JSON):
-        with open(settings.ESGF_ALLOWED_PROJECTS_JSON, "r") as js:
-            data = json.load(js)
-            allowed_projects_list = data["allowed_projects"]
-    return allowed_projects_list
-
-
-def get_facets_from_solr():
-    """
-    Get valid facets currently used by the dataset Solr.
-    """
-
-    query_url = settings.ESGF_SOLR_URL + "/datasets/select"
-    query_params = dict(q="*:*", wt="csv", rows=0)
-
-    query_encoded = urllib.parse.urlencode(query_params, doseq=True).encode()
-    req = urllib.request.Request(query_url, query_encoded)
-    with urllib.request.urlopen(req) as response:
-        results = StringIO(response.read().decode())
-        reader = csv.reader(results, delimiter=",")
-        facets = next(reader)
-
-    # Remove fields that should NOT be used as facets
-    _facets = [f for f in facets if f not in NOT_FACETS]
-
-    return _facets
diff --git a/backend/metagrid/api_proxy/wget/wget-simple-template.sh b/backend/metagrid/wget/templates/wget-simple-template.sh
similarity index 97%
rename from backend/metagrid/api_proxy/wget/wget-simple-template.sh
rename to backend/metagrid/wget/templates/wget-simple-template.sh
index f1e823699..704e8a6f3 100644
--- a/backend/metagrid/api_proxy/wget/wget-simple-template.sh
+++ b/backend/metagrid/wget/templates/wget-simple-template.sh
@@ -8,4 +8,4 @@ download_files=(
 for i in "${download_files[@]}"
 do
    wget $i
-done
\ No newline at end of file
+done
diff --git a/backend/metagrid/api_proxy/wget/wget-template.sh b/backend/metagrid/wget/templates/wget-template.sh
similarity index 97%
rename from backend/metagrid/api_proxy/wget/wget-template.sh
rename to backend/metagrid/wget/templates/wget-template.sh
index 87a970dbe..2285e96f1 100644
--- a/backend/metagrid/api_proxy/wget/wget-template.sh
+++ b/backend/metagrid/wget/templates/wget-template.sh
@@ -6,12 +6,12 @@
 # Generated by {{request.get_host}} - {{timestamp}}
 # Search URL: {{request.build_absolute_uri}}
 # Request method: {{request.method}}
-# 
+#
 ###############################################################################
 # first be sure it's bash... anything out of bash or sh will break
 # and the test will assure we are not using sh instead of bash
 if [ $BASH ] && [ `basename $BASH` != bash ]; then
-    echo "######## This is a bash script! ##############" 
+    echo "######## This is a bash script! ##############"
     echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh."
     echo "Trying to recover automatically..."
     sleep 1
@@ -178,7 +178,7 @@ if ((update)); then
         mv $0 $0.old.$counter
         echo "$new_wget" > $0
     fi
-    exit 0      
+    exit 0
 fi
 
 check_chksum() {
@@ -240,7 +240,7 @@ remove_from_cache() {
 
 download() {
     wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}"
-    
+
     while read line
     do
         # read csv here document into proper variables
@@ -251,14 +251,14 @@ download() {
 
         #get the cached entry if any.
         cached="$(grep -e "^$file" "$CACHE_FILE")"
-        
+
         #if we have the cache entry but no file, clean it.
         if [[ ! -f $file && "$cached" ]]; then
             #the file was removed, clean the cache
             remove_from_cache "$file"
             unset cached
         fi
-        
+
         #check it wasn't modified
         if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then
                     if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then
@@ -270,7 +270,7 @@ download() {
                 remove_from_cache "$file"
                 unset cached
             else
-                #file on server is different from what we have. 
+                #file on server is different from what we have.
                 echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite"
                 continue
             fi
@@ -286,7 +286,7 @@ download() {
                 #No status will be stored
                 break
             else
-                $wget -O "$file" $url || { failed=1; break; }  
+                $wget -O "$file" $url || { failed=1; break; }
             fi
 
             #check if file is there
@@ -321,7 +321,7 @@ download() {
                                 sleep 1
                                 break
                             fi
-                        
+
                             rm $file
                             #try again
                             echo -n "  re-trying..."
@@ -337,12 +337,12 @@ download() {
             #done!
             break
         done
-        
+
         if ((failed)); then
             echo "download failed"
             unset failed
         fi
-        
+
     done <<<"$download_files"
 
 }
@@ -381,4 +381,4 @@ download
 
 dedup_cache_
 
-echo "done"
\ No newline at end of file
+echo "done"
diff --git a/backend/metagrid/wget/tests/__init__.py b/backend/metagrid/wget/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/backend/metagrid/wget/tests/test_views.py b/backend/metagrid/wget/tests/test_views.py
new file mode 100644
index 000000000..af0b59f39
--- /dev/null
+++ b/backend/metagrid/wget/tests/test_views.py
@@ -0,0 +1,107 @@
+import json
+from unittest.mock import patch
+
+import responses
+from django.conf import settings
+from django.urls import reverse
+from rest_framework import status
+from rest_framework.test import APITestCase
+
+
+class TestWgetViewSet(APITestCase):
+    @responses.activate
+    def test_wget(self):
+        url = reverse("do-wget")
+        response = self.client.get(
+            url,
+            {
+                "dataset_id": "CMIP6.CMIP.IPSL.IPSL-CM6A-LR.abrupt-4xCO2.r12i1p1f1.Amon.n2oglobal.gr.v20191003|esgf-data1.llnl.gov"
+            },
+        )
+        assert response.status_code == status.HTTP_200_OK
+
+    def test_wget_invalid_param_returns_400(self):
+        """Invalid query parameter should return 400 with message."""
+        url = reverse("do-wget")
+        resp = self.client.get(url, {"invalidparam": "value"})
+        assert resp.status_code == status.HTTP_400_BAD_REQUEST
+        assert b"Invalid HTTP query parameter" in resp.content
+
+    def test_wget_unsupported_field_returns_400(self):
+        """Unsupported field (e.g., lat) should return 400."""
+        url = reverse("do-wget")
+        resp = self.client.get(url, {"lat": "10"})
+        assert resp.status_code == status.HTTP_400_BAD_REQUEST
+        assert b"Unsupported parameter" in resp.content
+
+    def test_wget_simple_invalid_value_returns_400(self):
+        """Passing an invalid value to 'simple' should return 400."""
+        url = reverse("do-wget")
+        resp = self.client.get(url, {"simple": "notaboolean"})
+        assert resp.status_code == status.HTTP_400_BAD_REQUEST
+        assert b'must be set to true or false' in resp.content
+
+    def test_wget_no_files_found_returns_empty_message(self, monkeypatch):
+        """When ESGGlobusQuery returns no files, view should respond with 'No files found'."""
+        url = reverse("do-wget")
+
+        class DummyQuery:
+            def __init__(self, *a, **k):
+                pass
+
+            def query_file_records(self, dsid, wget=True):
+                return []  # no files
+
+        # Patch the ESGGlobusQuery used by the view to our dummy
+        monkeypatch.setattr("metagrid.wget.views.ESGGlobusQuery", DummyQuery)
+
+        resp = self.client.get(url, {"dataset_id": "SOMEDATASET"})
+        assert resp.status_code == status.HTTP_200_OK
+        assert b"No files found for datasets." in resp.content
+
+    def test_wget_generates_script_and_sets_warning_when_exceeds_limit(self, monkeypatch):
+        """When number of files > file_limit ensures script generation path runs and returns attachment."""
+        url = reverse("do-wget")
+
+        # Create two fake file_info entries so num_files > limit (we will set limit to 1)
+        fake_file_info = {
+            "title": "file1.nc",
+            "checksum_type": ["md5"],
+            "checksum": ["deadbeef"],
+            "url": ["http://example.com/file1.nc|HTTPServer|HTTPServer"],
+            # include some facet used by directory construction if needed
+            "activity_id": ["ACT"],
+        }
+        fake_file_info_2 = {
+            "title": "file2.nc",
+            "checksum_type": ["md5"],
+            "checksum": ["cafebabe"],
+            "url": ["http://example.com/file2.nc|HTTPServer|HTTPServer"],
+            "activity_id": ["ACT"],
+        }
+
+        class DummyQuery:
+            def __init__(self, *a, **k):
+                pass
+
+            def query_file_records(self, dsid, wget=True):
+                return [fake_file_info, fake_file_info_2]
+
+        # Patch ESGGlobusQuery to return our two files
+        monkeypatch.setattr("metagrid.wget.views.ESGGlobusQuery", DummyQuery)
+
+        # Reduce the default limit so that num_files > file_limit triggers warning_message logic
+        monkeypatch.setattr(settings, "WGET_SCRIPT_FILE_DEFAULT_LIMIT", 1)
+
+        # Patch render in the view module so template file location is not required
+        monkeypatch.setattr("metagrid.wget.views.render", lambda request, tpl, ctx: "GENERATED_SCRIPT")
+
+        # POST expects JSON body according to the view; send JSON payload
+        payload = {"dataset_id": ["SOMEDATASET"], "download_structure": ["activity_id"], "simple": ["false"]}
+        resp = self.client.post(url, data=json.dumps(payload), content_type="application/json")
+
+        assert resp.status_code == status.HTTP_200_OK
+        # Content disposition should indicate an attachment filename
+        assert "attachment; filename=" in resp["Content-Disposition"]
+        # The returned body should be the rendered template content we patched
+        assert resp.content.decode() == "GENERATED_SCRIPT"
diff --git a/backend/metagrid/wget/views.py b/backend/metagrid/wget/views.py
new file mode 100644
index 000000000..e8b20756c
--- /dev/null
+++ b/backend/metagrid/wget/views.py
@@ -0,0 +1,184 @@
+import datetime
+import json
+import os
+
+from django.conf import settings
+from django.http import HttpResponse, HttpResponseBadRequest
+from django.shortcuts import render
+from django.views.decorators.csrf import csrf_exempt
+from django.views.decorators.http import require_http_methods
+from esgcet.globus_query import ESGGlobusQuery
+
+from .query_utils import (  # get_allowed_projects_from_json,
+    CORE_QUERY_FIELDS,
+    FIELD_WGET_EMPTYPATH,
+    FIELD_WGET_PATH,
+    KEYWORDS,
+    SIMPLE,
+    UNSUPPORTED_FIELDS,
+)
+
+
+@require_http_methods(["GET", "POST"])
+@csrf_exempt
+def do_wget(request):  # noqa: C901
+
+    file_limit = settings.WGET_SCRIPT_FILE_DEFAULT_LIMIT
+    wget_path_facets = []
+    wget_empty_path = ""
+    script_template_file = "wget-template.sh"
+
+    # Gather dataset_ids and other parameters
+    if request.method == "POST":
+        url_params = json.loads(request.body)
+        print("POST url_params:", url_params)
+    elif request.method == "GET":
+        url_params = request.GET.copy()
+        print("GET url_params:", url_params)
+    else:
+        return HttpResponseBadRequest("Request method must be POST or GET.")
+
+    # If no parameters were passed to the API,
+    # then default to limit=1 and distrib=false
+    if len(url_params.keys()) == 0:
+        url_params.update(dict(limit=1, distrib="false"))
+
+    # Catch invalid parameters
+    for param in url_params.keys():
+        if param[-1] == "!":
+            param = param[:-1]
+        if param not in KEYWORDS and param not in CORE_QUERY_FIELDS:
+            msg = "Invalid HTTP query parameter=%s" % param
+            return HttpResponseBadRequest(msg)
+
+    # Catch unsupported fields
+    for uf in UNSUPPORTED_FIELDS:
+        if url_params.get(uf):
+            msg = "Unsupported parameter: %s" % uf
+            return HttpResponseBadRequest(msg)
+
+    # Create a simplified script that only runs wget on a list of files
+    if url_params.get(SIMPLE):
+        use_simple_param = url_params.pop(SIMPLE)[0].lower()
+        if use_simple_param == "false":
+            script_template_file = "wget-template.sh"
+        elif use_simple_param == "true":
+            script_template_file = "wget-simple-template.sh"
+        else:
+            msg = 'Parameter "%s" must be set to true or false.' % SIMPLE
+            return HttpResponseBadRequest(msg)
+
+    # Get directory structure for downloaded files
+    if url_params.get(FIELD_WGET_PATH):
+        wget_path_facets = url_params.pop(FIELD_WGET_PATH)[0].split(",")
+
+    if url_params.get(FIELD_WGET_EMPTYPATH):
+        wget_empty_path = url_params.pop(FIELD_WGET_EMPTYPATH)[0]
+
+    # Fetch files for the query
+    file_list = {}
+    dsid = url_params.get("dataset_id", "")
+    if "," in dsid:
+        dsid = dsid.split(",")
+
+    print(f"DEBUG: {dsid} ")
+    try:
+        qo = ESGGlobusQuery(settings.GLOBUS_PUBLIC_INDEX_ENDPOINT_ID, "")
+        res = qo.query_file_records(dsid, wget=True)  # , crit=url_params)
+        print("res:", res)
+    except PermissionError as e:
+        # Configuration or filesystem permission issue (e.g. missing shards/allowed-projects files)
+        print("PermissionError while accessing ESGGlobusQuery files:", e)
+        return HttpResponseBadRequest(
+            "Server configuration error: unable to access required ESGF helper files."
+        )
+    except Exception as e:
+        # Generic fallback to avoid unhandled exceptions bubbling up
+        print("Error while querying ESGF metadata via ESGGlobusQuery:", e)
+        return HttpResponseBadRequest(f"Error querying ESGF metadata: {e}")
+    num_files = len(res)
+
+    print(f"DEBUG: Number of files found: {num_files}")
+
+    for file_info in res:
+        filename = file_info["title"]
+        checksum_type = file_info["checksum_type"][0]
+        checksum = file_info["checksum"][0]
+        # Create directory structure from facet values
+        # If the facet is not found, then use the empty path value
+        dir_struct = []
+        for facet in wget_path_facets:
+            facet_value = wget_empty_path
+            if facet in file_info:
+                if isinstance(file_info[facet], list):
+                    facet_value = file_info[facet][0]
+                else:
+                    facet_value = file_info[facet]
+            # Prevent strange values while generating names
+            facet_value = facet_value.replace("['<>?*\"\n\t\r\0]", "")
+            facet_value = facet_value.replace("[ /\\\\|:;]+", "_")
+            # Limit length of value to WGET_MAX_DIR_LENGTH
+            if len(facet_value) > settings.WGET_MAX_DIR_LENGTH:
+                facet_value = facet_value[: settings.WGET_MAX_DIR_LENGTH]
+            dir_struct.append(facet_value)
+        dir_struct.append(filename)
+        file_path = os.path.join(*dir_struct)
+        # Only add a file to the list if its file path is not already present
+        if file_path not in file_list:
+            for url in file_info["url"]:
+                url_split = url.split("|")
+                if url_split[2] == "HTTPServer":
+                    file_entry = dict(
+                        url=url_split[0],
+                        checksum_type=checksum_type,
+                        checksum=checksum,
+                    )
+                    file_list[file_path] = file_entry
+                    break
+
+    # Limit the number of files to the maximum
+    warning_message = None
+    if num_files == 0:
+        return HttpResponse("No files found for datasets.")
+    elif num_files > file_limit:
+        warning_message = (
+            "Warning! The total number of files was {} "
+            "but this script will only process {}.".format(
+                num_files, file_limit
+            )
+        )
+
+    # Warning message about files that were skipped
+    # to prevent overwriting similarly-named files.
+    skip_msg = (
+        "There were files with the same name which were requested "
+        "to be download to the same directory. To avoid overwriting "
+        "the previous downloaded one they were skipped.\n"
+        "Please use the parameter 'download_structure' "
+        "to set up unique directories for them."
+    )
+    if min(num_files, file_limit) > len(file_list):
+        if warning_message:
+            warning_message = "{}\n{}".format(warning_message, skip_msg)
+        else:
+            warning_message = skip_msg
+
+    # Build wget script
+    current_datetime = datetime.datetime.now()
+    timestamp = current_datetime.strftime("%Y/%m/%d %H:%M:%S")
+
+    context = dict(
+        timestamp=timestamp,
+        url_params=[dsid],
+        files=file_list,
+        warning_message=warning_message,
+    )
+
+    wget_script = render(request, script_template_file, context)
+
+    script_filename = current_datetime.strftime("wget-%Y%m%d%H%M%S.sh")
+    response_content = "attachment; filename={}".format(script_filename)
+
+    response = HttpResponse(wget_script, content_type="text/x-sh")
+    response["Content-Disposition"] = response_content
+    return response
diff --git a/docs/docs/users/configurable_environment_variables.md b/docs/docs/users/configurable_environment_variables.md
index 5556911a2..d607e8fa0 100644
--- a/docs/docs/users/configurable_environment_variables.md
+++ b/docs/docs/users/configurable_environment_variables.md
@@ -32,38 +32,16 @@
 >
 >     `1024`
 
-#### `METAGRID_ESGF_SOLR_URL`
+#### `METAGRID_GLOBUS_PUBLIC_INDEX_ENDPOINT_ID`
 
 > !!! example "*Optional*"
->     __Default:__ `None`
->
->     Address of the ESGF Solr endpoint used by the wget helper logic.
->
->     __Example Values__
->
->     `https://esgf-node.llnl.gov/esg-search/solr`
-
-#### `METAGRID_ESGF_SOLR_SHARDS_XML`
-
-> !!! example "*Optional*"
->     __Default:__ `None`
->
->     Path to the XML file containing Solr shards configuration used to resolve mirrors/shards.
->
->     __Example Values__
->
->     `/etc/metagrid/solr_shards.xml`
-
-#### `METAGRID_ESGF_ALLOWED_PROJECTS_JSON`
-
-> !!! example "*Optional*"
->     __Default:__ `None`
+>     __Default:__ `a8ef4320-9e5a-4793-837b-c45161ca1845`
 >
->     Path to a JSON file that lists allowed projects for wget/dataset access checks.
+>     The Globus index ID for the public ESGF2 data.
 >
 >     __Example Values__
 >
->     `/etc/metagrid/wget_allowed_projects.json`
+>     `a8ef4320-9e5a-4793-837b-c45161ca1845`
 
 #### `METAGRID_WGET_SCRIPT_FILE_DEFAULT_LIMIT`
 

From ac92dcdc00b7e5c94cdb37f704ffb211a03d875e Mon Sep 17 00:00:00 2001
From: downiec <42552189+downiec@users.noreply.github.com>
Date: Tue, 4 Nov 2025 19:56:41 -0800
Subject: [PATCH 3/5] Fix some issues with precommit and startup

---
 .pre-commit-config.yaml                           |  2 +-
 backend/Dockerfile                                |  2 +-
 backend/config/settings/site_specific.py          |  5 +++++
 backend/config/settings/static.py                 | 15 +++++++++------
 docker-compose.yml                                |  2 --
 .../users/configurable_environment_variables.md   | 11 +++++++++++
 helm/deploy/helmfile.cnpg.yaml                    |  2 +-
 helm/values.yaml                                  |  2 +-
 8 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b0d852c81..4ffe7375b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -9,7 +9,7 @@ repos:
       - id: trailing-whitespace
       - id: end-of-file-fixer
       - id: check-yaml
-        exclude: traefik/*.yml|helm/templates/.*
+        exclude: traefik/*.yml|helm/templates/.*|helm/deploy/.*|helm/values.yaml
 
   # Back-end
   # ------------------------------------------------------------------------------
diff --git a/backend/Dockerfile b/backend/Dockerfile
index cb290aaff..c23c01dbd 100644
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -1,6 +1,6 @@
 FROM python:3.11-slim
 
-ENV PYTHONUNBUFFERED 1
+ENV PYTHONUNBUFFERED=1
 
 RUN apt-get update \
     # dependencies for building Python packages
diff --git a/backend/config/settings/site_specific.py b/backend/config/settings/site_specific.py
index 7ac645a78..7c295b7d5 100644
--- a/backend/config/settings/site_specific.py
+++ b/backend/config/settings/site_specific.py
@@ -42,6 +42,11 @@ class MetagridBackendSettings(BaseSettings):
         description="A list of all the people who get code error notifications. When `DEBUG=False` and `AdminEmailHandler` is configured in `LOGGING` (done by default), Django emails these people the details of exceptions raised in the request/response cycle. Each item in the list should be a tuple of (Full name, email address). "
         "Reference: <https://docs.djangoproject.com/en/5.1/ref/settings/#admins>",
     )
+    DATABASE_URL: str = Field(
+        default="postgresql://postgres:postgres@postgres:5432/postgres",
+        examples=["postgresql://postgres:postgres@postgres:5432/postgres"],
+        description="The database connection URL for the Metagrid backend database.",
+    )
     SOCIAL_AUTH_GLOBUS_KEY: str = Field(
         examples=["94c44808-9efd-4236-bffd-1185b1071736"],
         description="The `Client UUID` obtained by registering a `portal, science gateway, or other application you host` with Globus at <https://app.globus.org/settings/developers>",
diff --git a/backend/config/settings/static.py b/backend/config/settings/static.py
index e06b167f0..23703e047 100644
--- a/backend/config/settings/static.py
+++ b/backend/config/settings/static.py
@@ -2,19 +2,22 @@
 from typing import Any, Optional, Sequence
 
 import environ
-from pydantic import BaseModel, Field
+from pydantic import Field
 from pydantic_settings import BaseSettings, SettingsConfigDict
 
 env = environ.Env()
 
-ROOT_DIR = (
-    environ.Path(__file__) - 3
-)  # (config/settings/django.py - 3 = metagrid/)
+# project root (config/settings/static.py - 3 = metagrid/)
+ROOT_DIR = environ.Path(__file__) - 3
 
-# parses DATABASE_URL environment variable
-DATABASES = env.db_url()
+# Parse DATABASE_URL environment variable; default to an in-memory sqlite DB for tests/local runs.
+# This prevents "Set the DATABASE_URL environment variable" errors when none is provided.
+DATABASES = env.db_url(
+    default="postgresql://postgres:postgres@postgres:5432/postgres"
+)
 DATABASES.update(ATOMIC_REQUESTS=True)
 
+
 class DjangoStaticSettings(BaseSettings):
     """Django settings that do not vary by site"""
 
diff --git a/docker-compose.yml b/docker-compose.yml
index 160893d0b..008cd1435 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -57,8 +57,6 @@ services:
         condition: service_healthy
     volumes:
       - ./backend:/app
-    environment:
-      DATABASE_URL: postgresql://postgres:postgres@postgres:5432/postgres
     ports:
       - "127.0.0.1:5000:5000"
 
diff --git a/docs/docs/users/configurable_environment_variables.md b/docs/docs/users/configurable_environment_variables.md
index ae7f19af9..917751306 100644
--- a/docs/docs/users/configurable_environment_variables.md
+++ b/docs/docs/users/configurable_environment_variables.md
@@ -45,6 +45,17 @@
 >
 >     `[('Author', 'downie4@llnl.gov'), ('Author', 'ames4@llnl.gov')]`
 
+#### `METAGRID_DATABASE_URL`
+
+> !!! example "*Optional*"
+>     __Default:__ `postgresql://postgres:postgres@postgres:5432/postgres`
+>
+>     The database connection URL for the Metagrid backend database.
+>
+>     __Example Values__
+>
+>     `postgresql://postgres:postgres@postgres:5432/postgres`
+
 #### `METAGRID_SOCIAL_AUTH_GLOBUS_KEY`
 
 > !!! example "**Required**"
diff --git a/helm/deploy/helmfile.cnpg.yaml b/helm/deploy/helmfile.cnpg.yaml
index 05ca31294..b03b4fa88 100644
--- a/helm/deploy/helmfile.cnpg.yaml
+++ b/helm/deploy/helmfile.cnpg.yaml
@@ -11,7 +11,7 @@ releases:
 - name: cnpg
   namespace: cnpg-system
   chart: cnpg/cloudnative-pg
-  version: "0.26.1"  
+  version: "0.26.1"
   wait: true
   hooks:
     # remove cnpg resources
diff --git a/helm/values.yaml b/helm/values.yaml
index 6623ac902..9a618edd2 100644
--- a/helm/values.yaml
+++ b/helm/values.yaml
@@ -139,7 +139,7 @@ postgresql:
   username: postgres
   password: postgres
   database: postgres
-  
+
   imagePullSecrets: []
 
   image:

From 57c7e8686b7e9ac2eac867ac33a89ab4ba5af868 Mon Sep 17 00:00:00 2001
From: downiec <42552189+downiec@users.noreply.github.com>
Date: Wed, 12 Nov 2025 14:10:18 -0800
Subject: [PATCH 4/5] Updated the feature to allow using a dedicated wget url
 (as it was before) so that the integrated wget is only active if the WGET_URL
 setting is None (default). Fixed tests to handle changes, the WGET_URL
 external test is only run if the WGET_URL is set, otherwise the test is
 skipped.

---
 backend/config/settings/site_specific.py       | 10 ++++++++--
 backend/config/urls.py                         |  2 +-
 backend/metagrid/api_proxy/tests/test_views.py | 18 ++++++++++++++++++
 backend/metagrid/api_proxy/views.py            | 10 ++++++++++
 backend/metagrid/wget/tests/test_views.py      |  7 ++++++-
 backend/metagrid/wget/views.py                 |  2 +-
 docker-compose.yml                             |  2 +-
 .../configurable_environment_variables.md      | 15 +++++++++++++--
 8 files changed, 58 insertions(+), 8 deletions(-)

diff --git a/backend/config/settings/site_specific.py b/backend/config/settings/site_specific.py
index 6e4841628..191506620 100644
--- a/backend/config/settings/site_specific.py
+++ b/backend/config/settings/site_specific.py
@@ -32,6 +32,12 @@ class MetagridBackendSettings(BaseSettings):
     )
 
     # === wget related settings ===
+    WGET_URL: Optional[str] = Field(
+        default=None,
+        description="(Optional) If set, the URL at which the ESG-Search wget endpoint can be reached. If None (default), the wget download script is generated by an integrated WGET within the Metagrid deployment.",
+        examples=["https://esgf-node.llnl.gov/esg-search/wget"],
+    )
+
     GLOBUS_PUBLIC_INDEX_ENDPOINT_ID: str = Field(
         default="a8ef4320-9e5a-4793-837b-c45161ca1845",
         description="The Globus index ID for the public ESGF2 data.",
@@ -39,9 +45,9 @@ class MetagridBackendSettings(BaseSettings):
     )
 
     WGET_SCRIPT_FILE_DEFAULT_LIMIT: int = Field(
-        default=1000,
+        default=9999,
         description="Default limit on the number of files allowed in a generated wget script.",
-        examples=[1000],
+        examples=[9999],
     )
 
     WGET_SCRIPT_FILE_MAX_LIMIT: int = Field(
diff --git a/backend/config/urls.py b/backend/config/urls.py
index d6be01b40..8c64c5655 100644
--- a/backend/config/urls.py
+++ b/backend/config/urls.py
@@ -24,6 +24,7 @@
     do_search,
     do_stac_search,
     do_status,
+    do_wget,
     fetch_stac_aggregations,
     get_frontend_config,
     get_temp_storage,
@@ -33,7 +34,6 @@
 from metagrid.observability.views import liveness, readiness
 from metagrid.projects.views import ProjectsViewSet
 from metagrid.users.views import UserCreateViewSet, UserViewSet
-from metagrid.wget.views import do_wget
 
 router = DefaultRouter()
 router.register(r"users", UserViewSet)
diff --git a/backend/metagrid/api_proxy/tests/test_views.py b/backend/metagrid/api_proxy/tests/test_views.py
index dda8347b9..1aae552c1 100644
--- a/backend/metagrid/api_proxy/tests/test_views.py
+++ b/backend/metagrid/api_proxy/tests/test_views.py
@@ -87,6 +87,24 @@ def test_search(self):
         response = self.client.get(url, postdata)
         assert response.status_code == status.HTTP_200_OK
 
+    @responses.activate
+    def test_wget(self):
+        url = reverse("do-wget")
+        if settings.WGET_URL is None:
+            # If WGET_URL is None, skip the test
+            import pytest
+
+            pytest.skip("settings.WGET_URL is not set")
+
+        responses.get(settings.WGET_URL)
+        response = self.client.get(
+            url,
+            {
+                "dataset_id": "CMIP6.CMIP.IPSL.IPSL-CM6A-LR.abrupt-4xCO2.r12i1p1f1.Amon.n2oglobal.gr.v20191003|esgf-data1.llnl.gov"
+            },
+        )
+        assert response.status_code == status.HTTP_200_OK
+
     @responses.activate
     def test_stac_search(self):
         url = reverse("do-stac-search")
diff --git a/backend/metagrid/api_proxy/views.py b/backend/metagrid/api_proxy/views.py
index d4ddf6f80..7eae62bec 100644
--- a/backend/metagrid/api_proxy/views.py
+++ b/backend/metagrid/api_proxy/views.py
@@ -15,6 +15,7 @@
 from rest_framework_simplejwt.tokens import RefreshToken
 
 from config.settings.site_specific import MetagridFrontendSettings
+from metagrid.wget.views import do_wget_integrated
 
 
 @api_view()
@@ -152,6 +153,15 @@ def do_status(request):
         return HttpResponseBadRequest(resp.text)
 
 
+@require_http_methods(["GET", "POST"])
+@csrf_exempt
+def do_wget(request):
+    if not settings.WGET_URL:
+        return do_wget_integrated(request)
+
+    return do_request(request, settings.WGET_URL, True)
+
+
 def do_post(request, urlbase):
     """Helper function to handle POST requests."""
     if request.method != "POST":  # pragma: no cover
diff --git a/backend/metagrid/wget/tests/test_views.py b/backend/metagrid/wget/tests/test_views.py
index d1a44e033..2ecc96175 100644
--- a/backend/metagrid/wget/tests/test_views.py
+++ b/backend/metagrid/wget/tests/test_views.py
@@ -2,14 +2,19 @@
 from unittest.mock import patch
 
 import responses
+from django.conf import settings
 from django.urls import reverse
 from rest_framework import status
 from rest_framework.test import APITestCase
 
 
 class TestWgetViewSet(APITestCase):
+    def setUp(self):
+        # Force integrated wget behavior for most tests
+        settings.WGET_URL = None
+
     @responses.activate
-    def test_wget(self):
+    def test_wget_integrated(self):
         url = reverse("do-wget")
         response = self.client.get(
             url,
diff --git a/backend/metagrid/wget/views.py b/backend/metagrid/wget/views.py
index e8b20756c..ec6d63f72 100644
--- a/backend/metagrid/wget/views.py
+++ b/backend/metagrid/wget/views.py
@@ -21,7 +21,7 @@
 
 @require_http_methods(["GET", "POST"])
 @csrf_exempt
-def do_wget(request):  # noqa: C901
+def do_wget_integrated(request):  # noqa: C901
 
     file_limit = settings.WGET_SCRIPT_FILE_DEFAULT_LIMIT
     wget_path_facets = []
diff --git a/docker-compose.yml b/docker-compose.yml
index a97f1256a..a98b7b2ca 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -57,7 +57,7 @@ services:
     volumes:
       - ./backend:/app
     environment:
-      WGET_SCRIPT_FILE_DEFAULT_LIMIT: 1000
+      WGET_SCRIPT_FILE_DEFAULT_LIMIT: 9999
     ports:
       - '127.0.0.1:5000:5000'
 
diff --git a/docs/docs/users/configurable_environment_variables.md b/docs/docs/users/configurable_environment_variables.md
index 7c58b36af..ef32f6250 100644
--- a/docs/docs/users/configurable_environment_variables.md
+++ b/docs/docs/users/configurable_environment_variables.md
@@ -32,6 +32,17 @@
 >
 >     `1024`
 
+#### `METAGRID_WGET_URL`
+
+> !!! example "*Optional*"
+>     __Default:__ `None`
+>
+>     (Optional) If set, the URL at which the ESG-Search wget endpoint can be reached. If None (default), the wget download script is generated by an integrated WGET within the Metagrid deployment.
+>
+>     __Example Values__
+>
+>     `https://esgf-node.llnl.gov/esg-search/wget`
+
 #### `METAGRID_GLOBUS_PUBLIC_INDEX_ENDPOINT_ID`
 
 > !!! example "*Optional*"
@@ -46,13 +57,13 @@
 #### `METAGRID_WGET_SCRIPT_FILE_DEFAULT_LIMIT`
 
 > !!! example "*Optional*"
->     __Default:__ `1000`
+>     __Default:__ `9999`
 >
 >     Default limit on the number of files allowed in a generated wget script.
 >
 >     __Example Values__
 >
->     `1000`
+>     `9999`
 
 #### `METAGRID_WGET_SCRIPT_FILE_MAX_LIMIT`
 

From 8e17cd1ebda79163721bb50d77c9c0267b550759 Mon Sep 17 00:00:00 2001
From: downiec <42552189+downiec@users.noreply.github.com>
Date: Wed, 12 Nov 2025 22:38:15 -0800
Subject: [PATCH 5/5] Added STAC wget downloads, where a script is
 automatically generated when STAC items are selected for wget download in the
 cart. The wget download can handle both stac and non-stac items being
 downloaded. Added some tests for the newly added functions and updated the
 wget success messages to account for STAC and non-STAC wget script results.
 Still need to add wget download for individual STAC records within the search
 Table.

---
 frontend/src/api/index.test.ts                |  68 ++++++++-
 frontend/src/api/index.ts                     |  40 ++---
 frontend/src/common/STAC.ts                   |  35 +++++
 frontend/src/common/utils.ts                  |  16 ++
 .../src/components/Globus/DatasetDownload.tsx | 137 ++++++++++++++----
 frontend/src/test/mock/fixtures.ts            |  31 ++--
 6 files changed, 259 insertions(+), 68 deletions(-)

diff --git a/frontend/src/api/index.test.ts b/frontend/src/api/index.test.ts
index 5d7119212..3fddbdf50 100644
--- a/frontend/src/api/index.test.ts
+++ b/frontend/src/api/index.test.ts
@@ -25,7 +25,8 @@ import {
   startSearchGlobusEndpoints,
   updateUserCart,
 } from '.';
-import { STAC_PROJECTS } from '../common/STAC';
+import { STAC_PROJECTS, generateWgetScriptSTAC } from '../common/STAC';
+import { downloadFileForUser } from '../common/utils';
 import { ActiveSearchQuery, Pagination, RawCitation, ResultType } from '../components/Search/types';
 import { mockConfig } from '../test/jestTestFunctions';
 import {
@@ -36,17 +37,31 @@ import {
   projectsFixture,
   rawCitationFixture,
   rawNodeStatusFixture,
+  rawStacAssetFixture,
   rawUserCartFixture,
+  stacAssetFixture,
   userAuthFixture,
   userInfoFixture,
   userSearchQueriesFixture,
   userSearchQueryFixture,
 } from '../test/mock/fixtures';
 import { rest, server } from '../test/mock/server';
-import apiRoutes from './routes';
+import apiRoutes, { HTTPCodeType } from './routes';
 
 const genericNetworkErrorMsg = 'Failed to Connect';
 
+jest.mock('../common/utils', () => {
+  // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
+  const originalModule = jest.requireActual('../common/utils');
+
+  // eslint-disable-next-line @typescript-eslint/no-unsafe-return
+  return {
+    __esModule: true,
+    ...originalModule,
+    downloadFileForUser: jest.fn(),
+  };
+});
+
 describe('test fetching user authentication with globus', () => {
   it('returns user authentication tokens', async () => {
     const userAuth = await fetchGlobusAuth();
@@ -1027,4 +1042,53 @@ describe('STAC API functions', () => {
     // status should reflect aggregation failure (fetchSTACSearchResults sets non-200 status)
     expect(result.status).toBe(500);
   });
+
+  it('throws error when STAC search endpoint fails', async () => {
+    server.use(rest.post(apiRoutes.esgfSearchSTAC.path, (_req, res, ctx) => res(ctx.status(500))));
+
+    const reqUrl = `${apiRoutes.esgfSearchSTAC.path}?project_id=CMIP6`;
+    await expect(fetchSearchResults({ reqUrl })).rejects.toThrow(
+      apiRoutes.esgfSearchSTAC.handleErrorMsg('generic' as HTTPCodeType),
+    );
+  });
+
+  it('generateWgetScriptSTAC downloads script when assets contain .nc hrefs', () => {
+    const searchResults = [
+      rawStacAssetFixture({
+        id: 'bar',
+        title: 'Bar Title',
+        assets: {
+          a1: stacAssetFixture({ id: 'a1', href: 'http://example.com/file1.nc' }),
+          a2: stacAssetFixture({ id: 'a2', href: 'http://example.com/file2.txt' }),
+        },
+      }),
+    ];
+
+    const result = generateWgetScriptSTAC(searchResults);
+    expect(result).toBe(true);
+    expect(downloadFileForUser).toHaveBeenCalledTimes(1);
+    expect(downloadFileForUser).toHaveBeenCalledWith(
+      expect.stringContaining('wget_stac_script'),
+      expect.stringContaining('wget http://example.com/file1.nc'),
+    );
+  });
+
+  it('generateWgetScriptSTAC returns false when no .nc hrefs present', () => {
+    const searchResults = [
+      rawStacAssetFixture({
+        id: 'foo',
+        title: 'Foo Title',
+        assets: {
+          a: stacAssetFixture({ id: 'a', href: 'http://example.com/file2.txt' }),
+        },
+      }),
+    ];
+
+    const spy = jest.fn(downloadFileForUser).mockImplementation(() => {});
+    const result = generateWgetScriptSTAC(searchResults);
+    expect(result).toBe(false);
+    expect(spy).not.toHaveBeenCalled();
+
+    spy.mockRestore();
+  });
 });
diff --git a/frontend/src/api/index.ts b/frontend/src/api/index.ts
index 67336afb5..89343bff3 100644
--- a/frontend/src/api/index.ts
+++ b/frontend/src/api/index.ts
@@ -29,7 +29,12 @@ import {
 import { RawUserAuth, RawUserInfo } from '../contexts/types';
 import apiRoutes, { ApiRoute, HTTPCodeType } from './routes';
 import { GlobusEndpointSearchResults } from '../components/Globus/types';
-import { cachePagination, getCachedPagination, getCachedSearchResults } from '../common/utils';
+import {
+  cachePagination,
+  downloadFileForUser,
+  getCachedPagination,
+  getCachedSearchResults,
+} from '../common/utils';
 import {
   aggregationsToFacetsData,
   convertSearchParamsIntoStacFilter,
@@ -509,7 +514,7 @@ Promise<{ [key: string]: any }> => {
       return res;
     })
     .catch((error: ResponseError) => {
-      status = error.cause === 422 ? 422 : 500;
+      status = error.cause === 422 ? 422 : (error.cause as number) || 500;
     });
 
   const aggregationsToFacets = aggregationsToFacetsData(aggregations || { aggregations: [] });
@@ -590,11 +595,11 @@ export const fetchSearchResults = async (
       })
       .catch((error: ResponseError) => {
         if (error.cause === 422) {
-          throw new Error(errorMsgBasedOnHTTPStatusCode(error, apiRoutes.esgfSearch), {
+          throw new Error(errorMsgBasedOnHTTPStatusCode(error, apiRoutes.esgfSearchSTAC), {
             cause: 422,
           });
         } else {
-          throw new Error(errorMsgBasedOnHTTPStatusCode(error, apiRoutes.esgfSearch));
+          throw new Error(errorMsgBasedOnHTTPStatusCode(error, apiRoutes.esgfSearchSTAC));
         }
       });
   }
@@ -735,26 +740,6 @@ export const fetchDatasetFiles = async (
     });
 };
 
-const returnFileToUser = (fileContent: string): void => {
-  const d = new Date();
-  const fileName = `wget_script_${d.getFullYear()}-${
-    d.getMonth() + 1
-  }-${d.getDate()}_${d.getHours()}-${d.getMinutes()}-${d.getSeconds()}.sh`;
-  const downloadLinkNode = document.createElement('a');
-  downloadLinkNode.setAttribute(
-    'href',
-    `data:text/plain;charset=utf-8,${encodeURIComponent(fileContent)}`,
-  );
-  downloadLinkNode.setAttribute('download', fileName);
-
-  downloadLinkNode.style.display = 'none';
-  document.body.appendChild(downloadLinkNode);
-
-  downloadLinkNode.click();
-
-  document.body.removeChild(downloadLinkNode);
-};
-
 /**
  * Performs wget request from the API.
  *
@@ -765,9 +750,14 @@ export const fetchWgetScript = async (ids: string[], filenameVars?: string[]): P
     query: filenameVars,
   };
 
+  const d = new Date();
+  const fileName = `wget_script_${d.getFullYear()}-${
+    d.getMonth() + 1
+  }-${d.getDate()}_${d.getHours()}-${d.getMinutes()}-${d.getSeconds()}.sh`;
+
   return axios
     .post(apiRoutes.wget.path, data)
-    .then((resp) => returnFileToUser(resp.data as string))
+    .then((resp) => downloadFileForUser(fileName, resp.data as string))
     .catch((error: ResponseError) => {
       throw new Error(errorMsgBasedOnHTTPStatusCode(error, apiRoutes.wget));
     });
diff --git a/frontend/src/common/STAC.ts b/frontend/src/common/STAC.ts
index 58fe344d7..c52841167 100644
--- a/frontend/src/common/STAC.ts
+++ b/frontend/src/common/STAC.ts
@@ -6,6 +6,7 @@ import {
   StacAsset,
   StacAggregations,
 } from '../components/Search/types';
+import { downloadFileForUser } from './utils';
 
 export const STAC_PROJECTS: RawProject[] = [
   {
@@ -219,3 +220,37 @@ export const convertSearchParamsIntoStacFilter = (
 
   return undefined;
 };
+
+export const generateWgetScriptSTAC = (searchResults: RawSearchResult[]): boolean => {
+  const d = new Date();
+  const date_string = `${d.getFullYear()}-${d.getMonth() + 1}-${d.getDate()}_${d.getHours()}-${d.getMinutes()}-${d.getSeconds()}`;
+  const fileName = `wget_stac_script_${date_string}.sh`;
+
+  let script = '#!/bin/bash\n\n';
+  script += '##############################################################################\n';
+  script += '# ESGF wget download script\n';
+  script += '#\n';
+  script += `# Generated by Metagrid - ${new Date().toISOString()}\n`;
+  script += '#\n';
+  script += '##############################################################################\n\n';
+
+  let hrefs = 0;
+
+  searchResults.forEach((result) => {
+    if (result.assets) {
+      Object.values(result.assets).forEach((asset) => {
+        const { href } = asset;
+        if (href && href.startsWith('http') && href.endsWith('.nc')) {
+          script += `wget ${href}\n`;
+          hrefs += 1;
+        }
+      });
+    }
+  });
+
+  if (hrefs > 0) {
+    downloadFileForUser(fileName, script);
+  }
+
+  return hrefs > 0;
+};
diff --git a/frontend/src/common/utils.ts b/frontend/src/common/utils.ts
index 50deba4fd..04e944050 100644
--- a/frontend/src/common/utils.ts
+++ b/frontend/src/common/utils.ts
@@ -698,3 +698,19 @@ export const saveBannerText = (): void => {
     sessionStorage.setItem('showBanner', bannerText);
   }
 };
+
+export const downloadFileForUser = (filename: string, fileContent: string): void => {
+  const downloadLinkNode = document.createElement('a');
+  downloadLinkNode.setAttribute(
+    'href',
+    `data:text/plain;charset=utf-8,${encodeURIComponent(fileContent)}`,
+  );
+  downloadLinkNode.setAttribute('download', filename);
+
+  downloadLinkNode.style.display = 'none';
+  document.body.appendChild(downloadLinkNode);
+
+  downloadLinkNode.click();
+
+  document.body.removeChild(downloadLinkNode);
+};
diff --git a/frontend/src/components/Globus/DatasetDownload.tsx b/frontend/src/components/Globus/DatasetDownload.tsx
index 2b7d2f16a..d52428392 100644
--- a/frontend/src/components/Globus/DatasetDownload.tsx
+++ b/frontend/src/components/Globus/DatasetDownload.tsx
@@ -2,6 +2,7 @@
 /* eslint-disable @typescript-eslint/no-unsafe-call */
 import { DownloadOutlined, QuestionOutlined } from '@ant-design/icons';
 import {
+  Alert,
   Button,
   Card,
   Collapse,
@@ -54,6 +55,7 @@ import {
   manageCollectionsTourTargets,
   createCollectionsFormTour,
 } from '../../common/joyrideTutorials/reactJoyrideSteps';
+import { generateWgetScriptSTAC } from '../../common/STAC';
 
 const GLOBUS_REDIRECT_URL = `${window.location.origin}/cart/items`;
 
@@ -173,42 +175,119 @@ const DatasetDownloadForm: React.FC<React.PropsWithChildren<unknown>> = () => {
   }
 
   const handleWgetDownload = (): void => {
+    setDownloadIsLoading(true);
+
     const cleanedSelections = itemSelections.filter((item) => {
       return item !== undefined && item !== null;
     });
     setItemSelections(cleanedSelections);
 
-    const ids = cleanedSelections.map((item) => item.id);
-    showNotice(messageApi, 'The wget script is generating, please wait momentarily.', {
-      duration: 3,
-      type: 'info',
-    });
-    setDownloadIsLoading(true);
-    fetchWgetScript(ids)
-      .then(() => {
+    const stacSelections = cleanedSelections.filter((item) => item.isStac);
+    const nonStacSelections = cleanedSelections.filter((item) => !item.isStac);
+
+    const STAC_ERROR_MSG =
+      'No file links found in selected STAC files, wget script was not generated.';
+    const SUCCESS_MSG = 'Wget script downloaded successfully!';
+    const STAC_SUCCESS_MSG = 'Wget script for STAC files downloaded successfully!';
+    const NON_STAC_SUCCESS_MSG = 'Wget script for non-STAC files downloaded successfully!';
+    const BOTH_SUCCESS_MSG =
+      'Wget scripts for both STAC and non-STAC files downloaded successfully!';
+
+    // Generate file for STAC selections
+    let stacSuccess = false;
+
+    if (stacSelections.length > 0) {
+      stacSuccess = generateWgetScriptSTAC(stacSelections);
+
+      if (nonStacSelections.length === 0) {
         setDownloadIsLoading(false);
-        showNotice(messageApi, 'Wget script downloaded successfully!', {
-          duration: 4,
-          type: 'success',
+        if (stacSuccess) {
+          showNotice(messageApi, STAC_SUCCESS_MSG, {
+            duration: 4,
+            type: 'success',
+          });
+        } else {
+          showError(messageApi, STAC_ERROR_MSG);
+        }
+      }
+    }
+
+    // Generate file for non-STAC selections
+    if (nonStacSelections.length > 0) {
+      const ids = nonStacSelections.map((item) => item.id);
+
+      // Generate wget script for download
+      showNotice(
+        messageApi,
+        `The wget script${nonStacSelections.length > 0 ? 's' : ''} generating, please wait momentarily.`,
+        {
+          duration: 3,
+          type: 'info',
+        },
+      );
+
+      fetchWgetScript(ids)
+        .then(() => {
+          setDownloadIsLoading(false);
+          let noticeContent: string | React.ReactNode =
+            stacSelections.length > 0 ? BOTH_SUCCESS_MSG : SUCCESS_MSG;
+          if (stacSelections.length > 0 && !stacSuccess) {
+            noticeContent = (
+              <Card
+                title="STAC Wget Script Error"
+                style={{
+                  maxWidth: '500px',
+                  maxHeight: '400px',
+                  overflowY: 'auto',
+                  overflowX: 'auto',
+                }}
+              >
+                Non-STAC: <Alert closable message={NON_STAC_SUCCESS_MSG} type="success" showIcon />
+                <br />
+                STAC: <Alert closable message={STAC_ERROR_MSG} type="error" showIcon />
+              </Card>
+            );
+          }
+
+          showNotice(messageApi, noticeContent, {
+            duration: 5,
+            type: stacSuccess || stacSelections.length === 0 ? 'success' : 'error',
+          });
+        })
+        .catch((error: ResponseError) => {
+          setDownloadIsLoading(false);
+          const noticeContent: string | React.ReactNode =
+            stacSelections.length > 0 ? (
+              <>
+                STAC:{' '}
+                {stacSuccess ? (
+                  <Alert closable message={STAC_SUCCESS_MSG} type="success" showIcon />
+                ) : (
+                  <Alert closable message={STAC_ERROR_MSG} type="error" showIcon />
+                )}
+                <br />
+                Non-STAC: <Alert closable message={error.message} type="error" showIcon />
+              </>
+            ) : (
+              error.message
+            );
+
+          showError(
+            messageApi,
+            <Card
+              title="Wget Script Error"
+              style={{
+                maxWidth: '500px',
+                maxHeight: '400px',
+                overflowY: 'auto',
+                overflowX: 'auto',
+              }}
+            >
+              {noticeContent}
+            </Card>,
+          );
         });
-      })
-      .catch((error: ResponseError) => {
-        showError(
-          messageApi,
-          <Card
-            title="Wget Script Error"
-            style={{
-              maxWidth: '500px',
-              maxHeight: '400px',
-              overflowY: 'auto',
-              overflowX: 'auto',
-            }}
-          >
-            {error.message}
-          </Card>,
-        );
-        setDownloadIsLoading(false);
-      });
+    }
   };
 
   const getCurrentScope = (): string => {
diff --git a/frontend/src/test/mock/fixtures.ts b/frontend/src/test/mock/fixtures.ts
index e767509d5..44a531954 100644
--- a/frontend/src/test/mock/fixtures.ts
+++ b/frontend/src/test/mock/fixtures.ts
@@ -20,6 +20,7 @@ import {
   RawSearchResults,
   StacResponse,
   StacAggregations,
+  StacAsset,
 } from '../../components/Search/types';
 import { RawUserAuth, RawUserInfo } from '../../contexts/types';
 import { SubmissionResult } from '../../api';
@@ -397,24 +398,30 @@ export const stacAggregationsFixture = (): StacAggregations => ({
   ],
 });
 
+export const stacAssetFixture = (props: Partial<StacAsset> = {}): StacAsset => {
+  const defaults: StacAsset = {
+    id: 'foo',
+    access: ['public'],
+    description: 'test',
+    type: 'image/png',
+    alternatename: 'alternate_foo',
+    name: 'foo',
+    roles: ['data'],
+    href: 'http://test.com/foo',
+    'file:size': 1,
+    'file:checksum': 'abc123',
+  };
+
+  return { ...defaults, ...props };
+};
+
 export const rawStacAssetFixture = (props: Partial<RawSearchResult> = {}): RawSearchResult => {
   const defaults: RawSearchResult = {
     id: 'foo',
     access: ['HTTPServer', 'OPENDAP'],
     url: ['foo.bar|HTTPServer', 'http://test.com/file.nc|OPENDAP'],
     assets: {
-      foo: {
-        id: 'foo',
-        access: ['public'],
-        description: 'test',
-        type: 'image/png',
-        alternatename: 'alternate_foo',
-        name: 'foo',
-        roles: ['data'],
-        href: 'http://test.com/foo',
-        'file:size': 1,
-        'file:checksum': 'abc123',
-      },
+      foo: stacAssetFixture(),
     },
     title: 'Foo Title',
     isStac: true,