From 9d9f283312a36a7c77f652aec7033491e00bf983 Mon Sep 17 00:00:00 2001 From: Christoph von Gabler-Sahm Date: Fri, 15 Apr 2016 13:52:51 +0200 Subject: [PATCH 1/5] preliminary support for gzipped catalogs to increase client checking speed. Requires python gzip module and "with" support (available since python 2.7, i.e. since OS X 10.7). --- code/reposadolib/reposadocommon.py | 6 +++++- docs/URL_rewrites.md | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/code/reposadolib/reposadocommon.py b/code/reposadolib/reposadocommon.py index a142573..c3c5bb5 100644 --- a/code/reposadolib/reposadocommon.py +++ b/code/reposadolib/reposadocommon.py @@ -46,6 +46,7 @@ import time import urlparse import warnings +import gzip from xml.parsers.expat import ExpatError from xml.dom import minidom @@ -430,7 +431,8 @@ def writeBranchCatalogs(localcatalogpath): product_key, branch, localcatalogname) plistlib.writePlist(catalog, branchcatalogpath) - + with open(branchcatalogpath) as f_in, gzip.open(branchcatalogpath + '.gz', 'wb') as f_out: + f_out.writelines(f_in) def writeAllLocalCatalogs(): '''Writes out all local and branch catalogs. Used when we purge products.''' @@ -472,6 +474,8 @@ def writeLocalCatalogs(applecatalogpath): # write raw (unstable/development) catalog # with all downloaded Apple updates enabled plistlib.writePlist(catalog, localcatalogpath) + with open(localcatalogpath) as f_in, gzip.open(localcatalogpath + '.gz', 'wb') as f_out: + f_out.writelines(f_in) # now write filtered catalogs (branches) based on this catalog writeBranchCatalogs(localcatalogpath) diff --git a/docs/URL_rewrites.md b/docs/URL_rewrites.md index fa98b5e..f14a00e 100644 --- a/docs/URL_rewrites.md +++ b/docs/URL_rewrites.md @@ -20,6 +20,8 @@ Here is an example .htaccess file you could place at the root of your Reposado r RewriteEngine On Options FollowSymLinks RewriteBase / + RewriteCond %{HTTP:Accept-Encoding} gzip + RewriteRule ^(.+\.(sucatalog))$ /$1.gz [L] RewriteCond %{HTTP_USER_AGENT} Darwin/8 RewriteRule ^index(.*)\.sucatalog$ content/catalogs/index$1.sucatalog [L] RewriteCond %{HTTP_USER_AGENT} Darwin/9 From 416eee3e14a23c06ef118cb7b1af0378fbca7b94 Mon Sep 17 00:00:00 2001 From: Christoph von Gabler-Sahm Date: Fri, 15 Apr 2016 14:39:53 +0200 Subject: [PATCH 2/5] gzip compression for metadata --- code/repo_sync | 3 ++- code/reposadolib/reposadocommon.py | 12 ++++++++---- docs/URL_rewrites.md | 4 ++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/code/repo_sync b/code/repo_sync index 15b9cf0..8b7d8ad 100755 --- a/code/repo_sync +++ b/code/repo_sync @@ -488,7 +488,6 @@ class ReplicationError(Exception): '''A custom error when replication fails''' pass - def replicateURLtoFilesystem(full_url, root_dir=None, base_url=None, copy_only_if_missing=False, appendToFilename=''): @@ -511,6 +510,7 @@ def replicateURLtoFilesystem(full_url, root_dir=None, local_file_path = os.path.join(root_dir, relative_url) + appendToFilename local_dir_path = os.path.dirname(local_file_path) if copy_only_if_missing and os.path.exists(local_file_path): + reposadocommon.createCompressedFileCopy(local_file_path, True) return local_file_path if not os.path.exists(local_dir_path): try: @@ -521,6 +521,7 @@ def replicateURLtoFilesystem(full_url, root_dir=None, getURL(full_url, local_file_path) except CurlDownloadError, err: raise ReplicationError(err) + reposadocommon.createCompressedFileCopy(local_file_path) return local_file_path diff --git a/code/reposadolib/reposadocommon.py b/code/reposadolib/reposadocommon.py index c3c5bb5..22c05a9 100644 --- a/code/reposadolib/reposadocommon.py +++ b/code/reposadolib/reposadocommon.py @@ -258,6 +258,12 @@ def getDataFromPlist(filename): except (IOError, ExpatError): return {} +def createCompressedFileCopy(local_file_path, copy_only_if_missing=False): + '''creates a gzipped copy of the given file at the same location with .gz suffix''' + local_gz_file_path = local_file_path + '.gz' + if not ( copy_only_if_missing and os.path.exists(local_gz_file_path)): + with open(local_file_path) as f_in, gzip.open(local_gz_file_path, 'wb') as f_out: + f_out.writelines(f_in) def getDownloadStatus(): '''Reads download status info from disk''' @@ -431,8 +437,7 @@ def writeBranchCatalogs(localcatalogpath): product_key, branch, localcatalogname) plistlib.writePlist(catalog, branchcatalogpath) - with open(branchcatalogpath) as f_in, gzip.open(branchcatalogpath + '.gz', 'wb') as f_out: - f_out.writelines(f_in) + createCompressedFileCopy(branchcatalogpath) def writeAllLocalCatalogs(): '''Writes out all local and branch catalogs. Used when we purge products.''' @@ -474,8 +479,7 @@ def writeLocalCatalogs(applecatalogpath): # write raw (unstable/development) catalog # with all downloaded Apple updates enabled plistlib.writePlist(catalog, localcatalogpath) - with open(localcatalogpath) as f_in, gzip.open(localcatalogpath + '.gz', 'wb') as f_out: - f_out.writelines(f_in) + createCompressedFileCopy(localcatalogpath) # now write filtered catalogs (branches) based on this catalog writeBranchCatalogs(localcatalogpath) diff --git a/docs/URL_rewrites.md b/docs/URL_rewrites.md index f14a00e..fb722c2 100644 --- a/docs/URL_rewrites.md +++ b/docs/URL_rewrites.md @@ -20,8 +20,8 @@ Here is an example .htaccess file you could place at the root of your Reposado r RewriteEngine On Options FollowSymLinks RewriteBase / - RewriteCond %{HTTP:Accept-Encoding} gzip - RewriteRule ^(.+\.(sucatalog))$ /$1.gz [L] + RewriteCond %{HTTP:Accept-Encoding} gzip + RewriteRule ^(.+\.(sucatalog|dist))$ /$1.gz [L] RewriteCond %{HTTP_USER_AGENT} Darwin/8 RewriteRule ^index(.*)\.sucatalog$ content/catalogs/index$1.sucatalog [L] RewriteCond %{HTTP_USER_AGENT} Darwin/9 From 334300800a5b12d9bbb9e258b8e0b1d8a9ff97e6 Mon Sep 17 00:00:00 2001 From: Christoph von Gabler-Sahm Date: Tue, 2 Aug 2016 16:16:57 +0200 Subject: [PATCH 3/5] updated rewrite documentation for compressed catalogs --- code/reposadolib/reposadocommon.py | 2 +- docs/URL_rewrites.md | 44 +++++++++++++++--------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/code/reposadolib/reposadocommon.py b/code/reposadolib/reposadocommon.py index 22c05a9..dd5ea25 100644 --- a/code/reposadolib/reposadocommon.py +++ b/code/reposadolib/reposadocommon.py @@ -262,7 +262,7 @@ def createCompressedFileCopy(local_file_path, copy_only_if_missing=False): '''creates a gzipped copy of the given file at the same location with .gz suffix''' local_gz_file_path = local_file_path + '.gz' if not ( copy_only_if_missing and os.path.exists(local_gz_file_path)): - with open(local_file_path) as f_in, gzip.open(local_gz_file_path, 'wb') as f_out: + with open(local_file_path) as f_in, gzip.open(local_gz_file_path, 'w') as f_out: f_out.writelines(f_in) def getDownloadStatus(): diff --git a/docs/URL_rewrites.md b/docs/URL_rewrites.md index fb722c2..52d579b 100644 --- a/docs/URL_rewrites.md +++ b/docs/URL_rewrites.md @@ -17,28 +17,28 @@ If you are using Apache2 as your webserver, you may be able to configure mod_rew Here is an example .htaccess file you could place at the root of your Reposado repo: - RewriteEngine On - Options FollowSymLinks - RewriteBase / - RewriteCond %{HTTP:Accept-Encoding} gzip - RewriteRule ^(.+\.(sucatalog|dist))$ /$1.gz [L] - RewriteCond %{HTTP_USER_AGENT} Darwin/8 - RewriteRule ^index(.*)\.sucatalog$ content/catalogs/index$1.sucatalog [L] - RewriteCond %{HTTP_USER_AGENT} Darwin/9 - RewriteRule ^index(.*)\.sucatalog$ content/catalogs/others/index-leopard.merged-1$1.sucatalog [L] - RewriteCond %{HTTP_USER_AGENT} Darwin/10 - RewriteRule ^index(.*)\.sucatalog$ content/catalogs/others/index-leopard-snowleopard.merged-1$1.sucatalog [L] - RewriteCond %{HTTP_USER_AGENT} Darwin/11 - RewriteRule ^index(.*)\.sucatalog$ content/catalogs/others/index-lion-snowleopard-leopard.merged-1$1.sucatalog [L] - RewriteCond %{HTTP_USER_AGENT} Darwin/12 - RewriteRule ^index(.*)\.sucatalog$ content/catalogs/others/index-mountainlion-lion-snowleopard-leopard.merged-1$1.sucatalog [L] - RewriteCond %{HTTP_USER_AGENT} Darwin/13 - RewriteRule ^index(.*)\.sucatalog$ content/catalogs/others/index-10.9-mountainlion-lion-snowleopard-leopard.merged-1$1.sucatalog [L] - RewriteCond %{HTTP_USER_AGENT} Darwin/14 - RewriteRule ^index(.*)\.sucatalog$ content/catalogs/others/index-10.10-10.9-mountainlion-lion-snowleopard-leopard.merged-1$1.sucatalog [L] - RewriteCond %{HTTP_USER_AGENT} Darwin/15 - RewriteRule ^index(.*)\.sucatalog$ content/catalogs/others/index-10.11-10.10-10.9-mountainlion-lion-snowleopard-leopard.merged-1$1.sucatalog [L] - + Options FollowSymLinks + RewriteEngine On + RewriteBase / + RewriteCond %{HTTP_USER_AGENT} Darwin/15 + RewriteRule ^/index(.*)\.sucatalog(\.gz)?$ /content/catalogs/others/index-10.11-10.10-10.9-mountainlion-lion-snowleopard-leopard.merged-1$1.sucatalog$2 [S=7] + RewriteCond %{HTTP_USER_AGENT} Darwin/14 + RewriteRule ^/index(.*)\.sucatalog(\.gz)?$ /content/catalogs/others/index-10.10-10.9-mountainlion-lion-snowleopard-leopard.merged-1$1.sucatalog$2 [S=6] + RewriteCond %{HTTP_USER_AGENT} Darwin/13 + RewriteRule ^/index(.*)\.sucatalog(\.gz)?$ /content/catalogs/others/index-10.9-mountainlion-lion-snowleopard-leopard.merged-1$1.sucatalog$2 [S=5] + RewriteCond %{HTTP_USER_AGENT} Darwin/12 + RewriteRule ^/index(.*)\.sucatalog(\.gz)?$ /content/catalogs/others/index-mountainlion-lion-snowleopard-leopard.merged-1$1.sucatalog$2 [S=4] + RewriteCond %{HTTP_USER_AGENT} Darwin/11 + RewriteRule ^/index(.*)\.sucatalog(\.gz)?$ /content/catalogs/others/index-lion-snowleopard-leopard.merged-1$1.sucatalog$2 [S=3] + RewriteCond %{HTTP_USER_AGENT} Darwin/10 + RewriteRule ^/index(.*)\.sucatalog(\.gz)?$ /content/catalogs/others/index-leopard-snowleopard.merged-1$1.sucatalog$2 [S=2] + RewriteCond %{HTTP_USER_AGENT} Darwin/9 + RewriteRule ^/index(.*)\.sucatalog(\.gz)?$ /content/catalogs/others/index-leopard.merged-1$1.sucatalog$2 [S=1] + RewriteCond %{HTTP_USER_AGENT} Darwin/8 + RewriteRule ^/index(.*)\.sucatalog(\.gz)?$ /content/catalogs/index$1.sucatalog + RewriteCond %{HTTP:Accept-Encoding} gzip + RewriteRule ^(.+\.(sucatalog|dist))$ $1.gz + This requires Apache2 to be configured to actually pay attention to mod_rewrite rules in .htaccess files. See your Apache and mod_rewrite documentation for details. From 33407bac2e005189e2e23a1cfc576a3e507380a3 Mon Sep 17 00:00:00 2001 From: Christoph von Gabler-Sahm Date: Tue, 2 Aug 2016 16:56:02 +0200 Subject: [PATCH 4/5] Updated documentation for serving compressed catalogs --- docs/URL_rewrites.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/URL_rewrites.md b/docs/URL_rewrites.md index 52d579b..3d504c6 100644 --- a/docs/URL_rewrites.md +++ b/docs/URL_rewrites.md @@ -17,9 +17,13 @@ If you are using Apache2 as your webserver, you may be able to configure mod_rew Here is an example .htaccess file you could place at the root of your Reposado repo: + AddEncoding x-gzip .gz + AddType text/plain .gz Options FollowSymLinks RewriteEngine On RewriteBase / + RewriteCond %{HTTP_USER_AGENT} Darwin/16 + RewriteRule ^/index(.*)\.sucatalog(\.gz)?$ /content/catalogs/others/index-10.12-10.11-10.10-10.9-mountainlion-lion-snowleopard-leopard.merged-1$1.sucatalog$2 [S=8] RewriteCond %{HTTP_USER_AGENT} Darwin/15 RewriteRule ^/index(.*)\.sucatalog(\.gz)?$ /content/catalogs/others/index-10.11-10.10-10.9-mountainlion-lion-snowleopard-leopard.merged-1$1.sucatalog$2 [S=7] RewriteCond %{HTTP_USER_AGENT} Darwin/14 From d8bcbcbae97cbaa616866b5cdba5385fc6c7b3a7 Mon Sep 17 00:00:00 2001 From: Christoph von Gabler-Sahm Date: Wed, 10 Aug 2016 12:22:56 +0200 Subject: [PATCH 5/5] added error handling, removed 2.7 dependency --- code/reposadolib/reposadocommon.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/code/reposadolib/reposadocommon.py b/code/reposadolib/reposadocommon.py index 16cfe91..c200bbe 100644 --- a/code/reposadolib/reposadocommon.py +++ b/code/reposadolib/reposadocommon.py @@ -275,12 +275,24 @@ def getDataFromPlist(filename): except (IOError, ExpatError): return {} + def createCompressedFileCopy(local_file_path, copy_only_if_missing=False): - '''creates a gzipped copy of the given file at the same location with .gz suffix''' + '''Creates a gzipped copy of the given file at the same location with .gz suffix''' local_gz_file_path = local_file_path + '.gz' - if not ( copy_only_if_missing and os.path.exists(local_gz_file_path)): - with open(local_file_path) as f_in, gzip.open(local_gz_file_path, 'w') as f_out: - f_out.writelines(f_in) + if not (copy_only_if_missing and os.path.exists(local_gz_file_path)): + try: + f_in = open(local_file_path, 'rb') + except (OSError, IOError), err: + print_stderr('Error: could not open file at %s: %s' % (local_file_path, err)) + try: + f_out = gzip.open(local_gz_file_path, 'wb') + f_out.writelines(f_in) + f_out.close() + except (OSError, IOError), err: + print_stderr('Error: could not create compressed file at %s: %s' % (local_gz_file_path, err)) + finally: + f_in.close() + def getDownloadStatus(): '''Reads download status info from disk'''