From 37e8cbd6bd9ad90f9988b1d28419879414f852da Mon Sep 17 00:00:00 2001 From: Seb35 Date: Sun, 27 Jan 2019 18:22:14 +0100 Subject: [PATCH 1/7] Minor adaptation to accept JORF database MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With this patch legi.py can now read the JORF database, which has a structure very similar to LEGI. The main difference is the directory structure: articles are not in a subdirectory of the text but grouped together in a common directory. To avoid breaking back-compatibility the JORF must be put in a different SQLite database, and an additional db_meta parameter is added (called “base”, which can have values in ['JORF', 'LEGI']. Given the CID is not known in the path itself but only in the file, the file must be read a bit earlier. --- README.md | 2 ++ legi/download.py | 17 +++++++--- legi/tar2sqlite.py | 81 ++++++++++++++++++++++++++++++++++------------ 3 files changed, 74 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 4a06145..3b651ba 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,8 @@ exemple avec [cron][cron] : (`chronic` fait partie des [`moreutils`](http://joeyh.name/code/moreutils/).) +L'option `--base JORF` permet de créer une base JORF au lieu d'une base LEGI. + ## Fonctionnalités ### Normalisation des titres et numéros diff --git a/legi/download.py b/legi/download.py index 284a5bb..ff52fa3 100644 --- a/legi/download.py +++ b/legi/download.py @@ -9,18 +9,21 @@ DILA_FTP_HOST = 'echanges.dila.gouv.fr' DILA_FTP_PORT = 21 -DILA_LEGI_DIR = '/LEGI' +DILA_LEGI_DIR = { + 'LEGI': '/LEGI', + 'JORF': '/JORF', +} -def download_legi(dst_dir): +def download_legi(dst_dir, base='LEGI'): if not os.path.exists(dst_dir): os.mkdir(dst_dir) local_files = {filename: {} for filename in os.listdir(dst_dir)} ftph = ftplib.FTP() ftph.connect(DILA_FTP_HOST, DILA_FTP_PORT) ftph.login() - ftph.cwd(DILA_LEGI_DIR) - remote_files = [filename for filename in ftph.nlst() if '.tar.gz' in filename and ('legi_' in filename or 'LEGI_' in filename)] + ftph.cwd(DILA_LEGI_DIR[base]) + remote_files = [filename for filename in ftph.nlst() if '.tar.gz' in filename and (base.lower()+'_' in filename or base+'_' in filename)] common_files = [f for f in remote_files if f in local_files] missing_files = [f for f in remote_files if f not in local_files] remote_files = {filename: {} for filename in remote_files} @@ -64,5 +67,9 @@ def download_legi(dst_dir): if __name__ == '__main__': p = argparse.ArgumentParser() p.add_argument('directory') + p.add_argument('--base', default='LEGI') args = p.parse_args() - download_legi(args.directory) + if args.base not in DILA_LEGI_DIR.keys(): + print('!> Non-existing database "'+args.base+'".') + raise SystemExit(1) + download_legi(args.directory, args.base) diff --git a/legi/tar2sqlite.py b/legi/tar2sqlite.py index 8d559c8..878a6a6 100755 --- a/legi/tar2sqlite.py +++ b/legi/tar2sqlite.py @@ -42,13 +42,13 @@ def scrape_tags(attrs, root, wanted_tags, unwrap=False): ) -def suppress(get_table, db, liste_suppression): +def suppress(base, get_table, db, liste_suppression): counts = {} for path in liste_suppression: parts = path.split('/') - assert parts[0] == 'legi' - text_cid = parts[11] + assert parts[0] == base.lower() text_id = parts[-1] + text_cid = parts[11] if base == 'LEGI' else text_id assert len(text_id) == 20 table = get_table(parts) db.run(""" @@ -124,7 +124,7 @@ def suppress(get_table, db, liste_suppression): """, (parts[3], text_cid, text_id)) count(counts, 'delete from duplicate_files', db.changes()) total = sum(counts.values()) - print("made", total, "changes in the database based on liste_suppression_legi.dat:", + print("made", total, "changes in the database based on liste_suppression_"+base.lower()+".dat:", json.dumps(counts, indent=4, sort_keys=True)) @@ -170,10 +170,15 @@ def process_archive(db, archive_path, process_links=True): insert = db.insert update = db.update - def get_table(parts): + def get_table( parts): + if parts[-1][4:8] not in TABLES_MAP: + return None table = TABLES_MAP[parts[-1][4:8]] if table == 'textes_': - table += parts[13] + 's' + if parts[0] == 'legi': + table += parts[13] + 's' + elif parts[0] == 'jorf': + table += parts[3] + 's' return table counts = {} @@ -183,6 +188,8 @@ def count_one(k): except KeyError: counts[k] = 1 + base = db.one("SELECT value FROM db_meta WHERE key = 'base'") or LEGI + skipped = 0 unknown_folders = {} liste_suppression = [] @@ -193,27 +200,53 @@ def count_one(k): if path[-1] == '/': continue parts = path.split('/') - if parts[-1] == 'liste_suppression_legi.dat': + if parts[-1] == 'liste_suppression_'+base.lower()+'.dat': liste_suppression += b''.join(entry.get_blocks()).decode('ascii').split() continue - if parts[1] == 'legi': + if parts[1] == base.lower(): path = path[len(parts[0])+1:] parts = parts[1:] - if not parts[2].startswith('code_et_TNC_'): + if parts[0] not in ['legi', 'jorf'] or \ + ( parts[0] == 'legi' and not parts[2].startswith('code_et_TNC_') ) or \ + ( parts[0] == 'jorf' and parts[2] not in ['article', 'section_ta', 'texte'] ): # https://github.com/Legilibre/legi.py/issues/23 try: unknown_folders[parts[2]] += 1 except KeyError: unknown_folders[parts[2]] = 1 continue - dossier = parts[3] - text_cid = parts[11] + dossier = parts[3] if parts[0] == 'legi' else 'jorf' + text_cid = parts[11] if parts[0] == 'legi' else None text_id = parts[-1][:-4] mtime = entry.mtime + # Read the file + xml.feed(b''.join(entry.get_blocks())) + root = xml.close() + tag = root.tag + meta = root.find('META') + + # Obtain the CID when database is not LEGI + if base != 'LEGI': + if tag in ['ARTICLE', 'SECTION_TA']: + contexte = root.find('CONTEXTE/TEXTE') + text_cid = attr(contexte, 'cid') + elif tag in ['TEXTELR', 'TEXTE_VERSION']: + meta_spec = meta.find('META_SPEC') + meta_chronicle = meta_spec.find('META_TEXTE_CHRONICLE') + text_cid = meta_chronicle.find('CID').text + else: + raise Exception('unexpected tag: '+tag) + # Skip the file if it hasn't changed, store it if it's a duplicate duplicate = False table = get_table(parts) + if table == None: + try: + unknown_folders[text_id] += 1 + except KeyError: + unknown_folders[text_id] = 1 + continue prev_row = db.one(""" SELECT mtime, dossier, cid FROM {0} @@ -270,11 +303,6 @@ def count_one(k): skipped += 1 continue - xml.feed(b''.join(entry.get_blocks())) - root = xml.close() - tag = root.tag - meta = root.find('META') - # Check the ID if tag == 'SECTION_TA': assert root.find('ID').text == text_id @@ -323,6 +351,9 @@ def count_one(k): ] elif tag == 'TEXTELR': assert table == 'textes_structs' + meta_spec = meta.find('META_SPEC') + meta_chronicle = meta_spec.find('META_TEXTE_CHRONICLE') + assert meta_chronicle.find('CID').text == text_cid scrape_tags(attrs, root, TEXTELR_TAGS) sommaires = [ { @@ -454,7 +485,7 @@ def count_one(k): print("skipped", x, "files in unknown folder `%s`" % d) if liste_suppression: - suppress(get_table, db, liste_suppression) + suppress(base, get_table, db, liste_suppression) def main(): @@ -467,6 +498,7 @@ def main(): p.add_argument('--pragma', action='append', default=[], help="Doc: https://www.sqlite.org/pragma.html | Example: journal_mode=WAL") p.add_argument('--raw', default=False, action='store_true') + p.add_argument('--base') p.add_argument('--skip-links', default=False, action='store_true', help="if set, all link metadata will be ignored (the `liens` table will be empty)") args = p.parse_args() @@ -475,7 +507,14 @@ def main(): os.mkdir(args.anomalies_dir) db = connect_db(args.db, pragmas=args.pragma) + base = db.one("SELECT value FROM db_meta WHERE key = 'base'") last_update = db.one("SELECT value FROM db_meta WHERE key = 'last_update'") + if not base: + base = args.base.upper() if args.base and not last_update else 'LEGI' + db.insert('db_meta', dict(key='base', value=base)) + if args.base and base != args.base.upper(): + print('!> Wrong database: requested '+args.base.upper()+' but existing database is '+base+'.') + raise SystemExit(1) # Check and record the data mode db_meta_raw = db.one("SELECT value FROM db_meta WHERE key = 'raw'") @@ -499,12 +538,12 @@ def main(): # Look for new archives in the given directory print("> last_update is", last_update) - archive_re = re.compile(r'(.+_)?legi(?P_global)?_(?P[0-9]{8}-[0-9]{6})\..+', flags=re.IGNORECASE) + archive_re = re.compile(r'(.+_)?'+base.lower()+r'(?P_global)?_(?P[0-9]{8}-[0-9]{6})\..+', flags=re.IGNORECASE) skipped = 0 archives = sorted([ (m.group('date'), bool(m.group('global')), m.group(0)) for m in [ archive_re.match(fn) for fn in os.listdir(args.directory) - if fnmatch(fn.lower(), '*legi_*.tar.*') + if fnmatch(fn.lower(), '*'+base.lower()+'_*.tar.*') ] ]) most_recent_global = [t[0] for t in archives if t[1]][-1] @@ -532,13 +571,13 @@ def main(): print('last_update is now set to', last_update) # Detect anomalies if requested - if args.anomalies: + if args.anomalies and base == 'LEGI': fpath = args.anomalies_dir + '/anomalies-' + last_update + '.txt' with open(fpath, 'w') as f: n_anomalies = detect_anomalies(db, f) print("logged", n_anomalies, "anomalies in", fpath) - if not args.raw: + if not args.raw and base == 'LEGI': from .normalize import normalize_text_titles normalize_text_titles(db) from .factorize import main as factorize From e37be83498cad4d806f120a43b5fc045551e3584 Mon Sep 17 00:00:00 2001 From: Seb35 Date: Sun, 27 Jan 2019 18:48:26 +0100 Subject: [PATCH 2/7] Fix PEP warning --- legi/tar2sqlite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/legi/tar2sqlite.py b/legi/tar2sqlite.py index 878a6a6..d5cf55f 100755 --- a/legi/tar2sqlite.py +++ b/legi/tar2sqlite.py @@ -241,7 +241,7 @@ def count_one(k): # Skip the file if it hasn't changed, store it if it's a duplicate duplicate = False table = get_table(parts) - if table == None: + if table is None: try: unknown_folders[text_id] += 1 except KeyError: From 5d2a5d57d966c331985bf4c8eeefba0aa8ea2593 Mon Sep 17 00:00:00 2001 From: Seb35 Date: Sun, 27 Jan 2019 19:07:14 +0100 Subject: [PATCH 3/7] Fix PEP warnings --- legi/tar2sqlite.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/legi/tar2sqlite.py b/legi/tar2sqlite.py index d5cf55f..b73528b 100755 --- a/legi/tar2sqlite.py +++ b/legi/tar2sqlite.py @@ -170,7 +170,7 @@ def process_archive(db, archive_path, process_links=True): insert = db.insert update = db.update - def get_table( parts): + def get_table(parts): if parts[-1][4:8] not in TABLES_MAP: return None table = TABLES_MAP[parts[-1][4:8]] @@ -188,7 +188,7 @@ def count_one(k): except KeyError: counts[k] = 1 - base = db.one("SELECT value FROM db_meta WHERE key = 'base'") or LEGI + base = db.one("SELECT value FROM db_meta WHERE key = 'base'") or 'LEGI' skipped = 0 unknown_folders = {} @@ -207,8 +207,8 @@ def count_one(k): path = path[len(parts[0])+1:] parts = parts[1:] if parts[0] not in ['legi', 'jorf'] or \ - ( parts[0] == 'legi' and not parts[2].startswith('code_et_TNC_') ) or \ - ( parts[0] == 'jorf' and parts[2] not in ['article', 'section_ta', 'texte'] ): + (parts[0] == 'legi' and not parts[2].startswith('code_et_TNC_')) or \ + (parts[0] == 'jorf' and parts[2] not in ['article', 'section_ta', 'texte']): # https://github.com/Legilibre/legi.py/issues/23 try: unknown_folders[parts[2]] += 1 From 669c37a9bb5dc9e9a1ef727fa7a1f392eba9555e Mon Sep 17 00:00:00 2001 From: Adrien Di Pasquale Date: Mon, 4 Feb 2019 17:31:52 +0100 Subject: [PATCH 4/7] minor improvements on JORF support --- .gitignore | 2 +- legi/sql/schema.sql | 12 ++++++------ legi/tar2sqlite.py | 38 +++++++++++++++++++++++++------------- 3 files changed, 32 insertions(+), 20 deletions(-) diff --git a/.gitignore b/.gitignore index 10aaf4a..0e031c6 100644 --- a/.gitignore +++ b/.gitignore @@ -6,5 +6,5 @@ .coverage *.pyc .tox/ -legi.sqlite* +*.sqlite* /tarballs/ diff --git a/legi/sql/schema.sql b/legi/sql/schema.sql index f8853c8..6342b3c 100644 --- a/legi/sql/schema.sql +++ b/legi/sql/schema.sql @@ -18,7 +18,7 @@ CREATE TABLE textes CREATE TABLE textes_structs ( id char(20) unique not null , versions text -, dossier text not null +, dossier text , cid char(20) not null , mtime int not null ); @@ -49,7 +49,7 @@ CREATE TABLE textes_versions , nota text , abro text , rect text -, dossier text not null +, dossier text , cid char(20) not null , mtime int not null , texte_id int references textes @@ -63,7 +63,7 @@ CREATE TABLE sections , titre_ta text , commentaire text , parent char(20) -- REFERENCES sections(id) -, dossier text not null +, dossier text , cid char(20) not null , mtime int not null ); @@ -114,11 +114,11 @@ CREATE TABLE duplicate_files ( id char(20) not null , sous_dossier text not null , cid char(20) not null -, dossier text not null +, dossier text , mtime int not null , data text not null , other_cid char(20) not null -, other_dossier text not null +, other_dossier text , other_mtime int not null , UNIQUE (id, sous_dossier, cid, dossier) ); @@ -132,7 +132,7 @@ CREATE TABLE textes_versions_brutes , autorite text , num text , date_texte day -, dossier text not null +, dossier text , cid char(20) not null , mtime int not null ); diff --git a/legi/tar2sqlite.py b/legi/tar2sqlite.py index b73528b..2384876 100755 --- a/legi/tar2sqlite.py +++ b/legi/tar2sqlite.py @@ -215,8 +215,8 @@ def count_one(k): except KeyError: unknown_folders[parts[2]] = 1 continue - dossier = parts[3] if parts[0] == 'legi' else 'jorf' - text_cid = parts[11] if parts[0] == 'legi' else None + dossier = parts[3] if base == 'LEGI' else None + text_cid = parts[11] if base == 'LEGI' else None text_id = parts[-1][:-4] mtime = entry.mtime @@ -498,22 +498,34 @@ def main(): p.add_argument('--pragma', action='append', default=[], help="Doc: https://www.sqlite.org/pragma.html | Example: journal_mode=WAL") p.add_argument('--raw', default=False, action='store_true') - p.add_argument('--base') + p.add_argument('--base', default="LEGI", choices=["LEGI", "JORF", "KALI"]) p.add_argument('--skip-links', default=False, action='store_true', help="if set, all link metadata will be ignored (the `liens` table will be empty)") args = p.parse_args() + if args.base != 'LEGI' and not args.raw: + print("!> You need to use the --raw option when working with bases other than LEGI.") + raise SystemExit(1) + + if args.base != 'LEGI' and args.anomalies: + print("!> The --anomalies option can only be used with the LEGI base") + raise SystemExit(1) + if not os.path.isdir(args.anomalies_dir): os.mkdir(args.anomalies_dir) db = connect_db(args.db, pragmas=args.pragma) - base = db.one("SELECT value FROM db_meta WHERE key = 'base'") + base_meta = db.one("SELECT value FROM db_meta WHERE key = 'base'") last_update = db.one("SELECT value FROM db_meta WHERE key = 'last_update'") - if not base: - base = args.base.upper() if args.base and not last_update else 'LEGI' - db.insert('db_meta', dict(key='base', value=base)) - if args.base and base != args.base.upper(): - print('!> Wrong database: requested '+args.base.upper()+' but existing database is '+base+'.') + if not base_meta: + if last_update: + # for backwards compatibility, this defaults to LEGI + base_meta = "LEGI" + else: + base_meta = args.base + db.insert('db_meta', dict(key='base', value=base_meta)) + if args.base and base_meta != args.base: + print('!> Wrong database: requested '+args.base+' but existing database is '+base_meta+'.') raise SystemExit(1) # Check and record the data mode @@ -538,12 +550,12 @@ def main(): # Look for new archives in the given directory print("> last_update is", last_update) - archive_re = re.compile(r'(.+_)?'+base.lower()+r'(?P_global)?_(?P[0-9]{8}-[0-9]{6})\..+', flags=re.IGNORECASE) + archive_re = re.compile(r'(.+_)?'+args.base.lower()+r'(?P_global)?_(?P[0-9]{8}-[0-9]{6})\..+', flags=re.IGNORECASE) skipped = 0 archives = sorted([ (m.group('date'), bool(m.group('global')), m.group(0)) for m in [ archive_re.match(fn) for fn in os.listdir(args.directory) - if fnmatch(fn.lower(), '*'+base.lower()+'_*.tar.*') + if fnmatch(fn.lower(), '*'+args.base.lower()+'_*.tar.*') ] ]) most_recent_global = [t[0] for t in archives if t[1]][-1] @@ -571,13 +583,13 @@ def main(): print('last_update is now set to', last_update) # Detect anomalies if requested - if args.anomalies and base == 'LEGI': + if args.anomalies: fpath = args.anomalies_dir + '/anomalies-' + last_update + '.txt' with open(fpath, 'w') as f: n_anomalies = detect_anomalies(db, f) print("logged", n_anomalies, "anomalies in", fpath) - if not args.raw and base == 'LEGI': + if not args.raw: from .normalize import normalize_text_titles normalize_text_titles(db) from .factorize import main as factorize From 4f152f1984a76a4c7428b72879c18aa17136484c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Beyou?= Date: Sat, 9 Feb 2019 11:23:15 +0100 Subject: [PATCH 5/7] Partial revert to keep optional --base argument when update --- legi/tar2sqlite.py | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/legi/tar2sqlite.py b/legi/tar2sqlite.py index 2384876..b7475ab 100755 --- a/legi/tar2sqlite.py +++ b/legi/tar2sqlite.py @@ -498,34 +498,30 @@ def main(): p.add_argument('--pragma', action='append', default=[], help="Doc: https://www.sqlite.org/pragma.html | Example: journal_mode=WAL") p.add_argument('--raw', default=False, action='store_true') - p.add_argument('--base', default="LEGI", choices=["LEGI", "JORF", "KALI"]) + p.add_argument('--base', choices=["LEGI", "JORF"]) p.add_argument('--skip-links', default=False, action='store_true', help="if set, all link metadata will be ignored (the `liens` table will be empty)") args = p.parse_args() - if args.base != 'LEGI' and not args.raw: - print("!> You need to use the --raw option when working with bases other than LEGI.") - raise SystemExit(1) - - if args.base != 'LEGI' and args.anomalies: - print("!> The --anomalies option can only be used with the LEGI base") - raise SystemExit(1) - if not os.path.isdir(args.anomalies_dir): os.mkdir(args.anomalies_dir) db = connect_db(args.db, pragmas=args.pragma) - base_meta = db.one("SELECT value FROM db_meta WHERE key = 'base'") + base = db.one("SELECT value FROM db_meta WHERE key = 'base'") last_update = db.one("SELECT value FROM db_meta WHERE key = 'last_update'") - if not base_meta: - if last_update: - # for backwards compatibility, this defaults to LEGI - base_meta = "LEGI" - else: - base_meta = args.base - db.insert('db_meta', dict(key='base', value=base_meta)) - if args.base and base_meta != args.base: - print('!> Wrong database: requested '+args.base+' but existing database is '+base_meta+'.') + if not base: + base = args.base.upper() if args.base and not last_update else 'LEGI' + db.insert('db_meta', dict(key='base', value=base)) + if args.base and base != args.base: + print('!> Wrong database: requested '+base.upper()+' but existing database is '+base+'.') + raise SystemExit(1) + + if base != 'LEGI' and not args.raw: + print("!> You need to use the --raw option when working with bases other than LEGI.") + raise SystemExit(1) + + if base != 'LEGI' and args.anomalies: + print("!> The --anomalies option can only be used with the LEGI base") raise SystemExit(1) # Check and record the data mode @@ -550,12 +546,12 @@ def main(): # Look for new archives in the given directory print("> last_update is", last_update) - archive_re = re.compile(r'(.+_)?'+args.base.lower()+r'(?P_global)?_(?P[0-9]{8}-[0-9]{6})\..+', flags=re.IGNORECASE) + archive_re = re.compile(r'(.+_)?'+base.lower()+r'(?P_global)?_(?P[0-9]{8}-[0-9]{6})\..+', flags=re.IGNORECASE) skipped = 0 archives = sorted([ (m.group('date'), bool(m.group('global')), m.group(0)) for m in [ archive_re.match(fn) for fn in os.listdir(args.directory) - if fnmatch(fn.lower(), '*'+args.base.lower()+'_*.tar.*') + if fnmatch(fn.lower(), '*'+base.lower()+'_*.tar.*') ] ]) most_recent_global = [t[0] for t in archives if t[1]][-1] From f5043c6625e2e85eb3cea4d9da2401105ba9a911 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Beyou?= Date: Sat, 9 Feb 2019 11:26:40 +0100 Subject: [PATCH 6/7] Partial revert to keep optional --base argument when update (bis) --- legi/tar2sqlite.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/legi/tar2sqlite.py b/legi/tar2sqlite.py index b7475ab..1aa157d 100755 --- a/legi/tar2sqlite.py +++ b/legi/tar2sqlite.py @@ -512,8 +512,8 @@ def main(): if not base: base = args.base.upper() if args.base and not last_update else 'LEGI' db.insert('db_meta', dict(key='base', value=base)) - if args.base and base != args.base: - print('!> Wrong database: requested '+base.upper()+' but existing database is '+base+'.') + if args.base and base != args.base.upper(): + print('!> Wrong database: requested '+args.base.upper()+' but existing database is '+base+'.') raise SystemExit(1) if base != 'LEGI' and not args.raw: From 532d1132a724c497f0d624c936326a1ba2a87794 Mon Sep 17 00:00:00 2001 From: Seb35 Date: Sat, 9 Feb 2019 14:05:07 +0100 Subject: [PATCH 7/7] Ignore strange value in liste_suppression, update README, fix SQL constraint --- README.md | 6 ++++++ legi/sql/schema.sql | 2 +- legi/tar2sqlite.py | 10 ++++++---- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 3b651ba..b50744c 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,12 @@ exemple avec [cron][cron] : (`chronic` fait partie des [`moreutils`](http://joeyh.name/code/moreutils/).) L'option `--base JORF` permet de créer une base JORF au lieu d'une base LEGI. +Noter que l'option `--raw` est obligatoire pour les bases autres que LEGI. + +Une fois la base créée, l'option `--base` n'est plus plus nécessaire car sa +valeur est enregistrée dans les métadonnées de la base et est utilisée comme +valeur par défaut. Toutefois, il peut être vérifié que la base à mettre à +jour est du bon type est donnant ce paramètre `--base`. ## Fonctionnalités diff --git a/legi/sql/schema.sql b/legi/sql/schema.sql index 6342b3c..70cedb7 100644 --- a/legi/sql/schema.sql +++ b/legi/sql/schema.sql @@ -78,7 +78,7 @@ CREATE TABLE articles , type text , nota text , bloc_textuel text -, dossier text not null +, dossier text , cid char(20) not null , mtime int not null ); diff --git a/legi/tar2sqlite.py b/legi/tar2sqlite.py index 1aa157d..ffe9248 100755 --- a/legi/tar2sqlite.py +++ b/legi/tar2sqlite.py @@ -46,6 +46,8 @@ def suppress(base, get_table, db, liste_suppression): counts = {} for path in liste_suppression: parts = path.split('/') + if parts[0] == 'null': + continue assert parts[0] == base.lower() text_id = parts[-1] text_cid = parts[11] if base == 'LEGI' else text_id @@ -516,10 +518,6 @@ def main(): print('!> Wrong database: requested '+args.base.upper()+' but existing database is '+base+'.') raise SystemExit(1) - if base != 'LEGI' and not args.raw: - print("!> You need to use the --raw option when working with bases other than LEGI.") - raise SystemExit(1) - if base != 'LEGI' and args.anomalies: print("!> The --anomalies option can only be used with the LEGI base") raise SystemExit(1) @@ -535,6 +533,10 @@ def main(): if db_meta_raw != args.raw: db.insert('db_meta', dict(key='raw', value=args.raw), replace=True) + if base != 'LEGI' and not args.raw: + print("!> You need to use the --raw option when working with bases other than LEGI.") + raise SystemExit(1) + # Handle the --skip-links option has_links = bool(db.one("SELECT 1 FROM liens LIMIT 1")) if not args.skip_links and not has_links and last_update is not None: