diff --git a/.gitignore b/.gitignore index 4790724..a1e7c7c 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ schema.json.gz +locales/ diff --git a/scripts/update-gz b/scripts/update-gz index 6a8306f..6b3417b 100755 --- a/scripts/update-gz +++ b/scripts/update-gz @@ -1,19 +1,95 @@ -#!/bin/bash +#!/usr/bin/python3 +import os +import shutil +import gzip +import json +from pathlib import Path +import copy -dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -cd $dir/.. +parent_folder = Path(__file__).resolve().parent.parent +gzipped_file = f'{parent_folder}/schema.json.gz' +schema_file = f'{parent_folder}/schema.json' +locales_folder = f'{parent_folder}/locales' -file=schema.json.gz -tmp_file=$file-tmp +with open(schema_file, 'r') as f: + schema_text = f.read() + schema = json.loads(schema_text) -gzip -c schema.json > $tmp_file +if os.path.exists(locales_folder): + shutil.rmtree(locales_folder) -DIFF=$(diff $file $tmp_file) -if [ "$DIFF" != "" ] -then - echo "$file updated" - mv $tmp_file $file -else - echo "Schema hasn't changed" - rm $tmp_file -fi +os.mkdir(locales_folder) + +# Begin by checking if gzip is supported. If not - return schema.json +htaccess_rules = f'''RewriteCond %{{REQUEST_URI}} !^/schema +RewriteRule ".?" "-" [S=LINES_TO_SKIP] +RewriteCond %{{HTTP:Accept-Encoding}} !gzip +RewriteRule ^schema(/.*)?$ /zotero-schema/schema.json [QSD,L] +RewriteCond %{{QUERY_STRING}} (?:^|&)locale=({'|'.join(schema['locales'].keys())})(?:&|$) +RewriteRule ^schema(/.*)?$ /zotero-schema/locales/%1.json.gz [QSD,L] +''' +#Dict to collect country codes with locales to handle multiple locales per country code +htaccess_mapings = {} + +# For each item in the 'locales' list of the schema +for locale_name in schema['locales'].keys(): + schema_with_one_locale = copy.deepcopy(schema) + current_locale = schema_with_one_locale['locales'][locale_name] + + schema_with_one_locale['locales'] = {locale_name : current_locale} + + # Rule to match by country + if locale_name[:2] in htaccess_mapings: + htaccess_mapings[locale_name[:2]].append(locale_name) + else: + htaccess_mapings[locale_name[:2]] = [locale_name] + + # generate a gzip file and save it to the locales subdirectory + with gzip.open(f'{locales_folder}/{locale_name}.json.gz', 'wb') as f_out: + json_bytes = json.dumps(schema_with_one_locale, ensure_ascii=False).encode('utf8') + f_out.write(json_bytes) + + +def locale_sort_key(a): + # Prefer en-US + if a == 'en-US': + return 'A' + # Prefer canonical locales + if a[:2] == a[3:5].lower(): + return 'A' + return a[3:5] + + +# For every country code, sort locale candidates and add rule to htacecss +for country_code in htaccess_mapings.keys(): + htaccess_mapings[country_code].sort(key=locale_sort_key) + # Each rule is only applied if gzip encoding is accepted + htaccess_rules += f'''RewriteCond %{{QUERY_STRING}} (?:^|&)locale=({country_code}(-.*)?)(?:&|$) +RewriteRule ^schema(/.*)?$ /zotero-schema/locales/{htaccess_mapings[country_code][0]}.json.gz [QSD,L] +''' + +# Catch all for default schema with all locales +htaccess_rules += '''RewriteRule ^schema$ /zotero-schema/schema.json.gz [QSD,L]''' + + +htaccess_rules += f''' + + # Serve correct encoding type + Header append Content-Encoding gzip + # Set correct content type + Header set Content-Type application/json + # Prevent double-gzip + SetEnv no-gzip 1 + # Force proxies to cache gzipped & non-gzipped schema file separately + Header append Vary Accept-Encoding + # CORS + Header set Access-Control-Allow-Origin "*" +''' + +htaccess_rules = htaccess_rules.replace("LINES_TO_SKIP", str(len(htaccess_rules.split('RewriteRule'))-2)) +print(htaccess_rules) + + +# Create gzip for the main schema +with open(schema_file, 'rb') as f_in, gzip.open(gzipped_file, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out)