11#! /bin/bash
22
33#
4- # Prints a YYYYMMDD date of the latest available date on
5- # https://mirror.clarkson.edu/wikimedia /enwiki/
4+ # Prints a YYYYMMDD date of the latest available date on
5+ # https://wikidata.aerotechnet.com /enwiki/
66# We do some additional checks if the dumps are complete, too
77#
88
@@ -12,7 +12,6 @@ debug() {
1212 echo -n ' '
1313}
1414
15-
1615DATE=' '
1716
1817# Sets $DATE to first of the month (YYYYMMDD). If given a parameter then
@@ -21,13 +20,12 @@ set_date_to_first_of_month() {
2120 MINUS_NUM_MONTHS=${1:- 0}
2221
2322 if [[ " $( uname) " == " Darwin" ]]; then
24- DATE=$( date -v -${MINUS_NUM_MONTHS} m +%Y%m01)
23+ DATE=$( date -v -${MINUS_NUM_MONTHS} m +%Y%m01)
2524 else
26- DATE=$( date --date=" -$MINUS_NUM_MONTHS month" +%Y%m01)
25+ DATE=$( date --date=" -$MINUS_NUM_MONTHS month" +%Y%m01)
2726 fi
2827}
2928
30-
3129check_all_files_ready () {
3230 CHECK_DATE=$1
3331 debug " check_all_files_ready for $CHECK_DATE "
@@ -55,14 +53,13 @@ check_all_files_ready() {
5553
5654 ANY_FILE_MISSING=0
5755
58-
5956 # #
6057 # # 1. Chinese (ZH) Wikipedia
6158 # # usually the last to be dumped
6259 # #
6360 # from wikipedia_download.sh
6461 WIKIPEDIA_REQUIRED_FILES=" page pagelinks langlinks linktarget redirect"
65- DUMP_RUN_INFO_URL=" https://mirror.clarkson.edu/wikimedia /zhwiki/$CHECK_DATE /dumpruninfo.json"
62+ DUMP_RUN_INFO_URL=" https://wikidata.aerotechnet.com /zhwiki/$CHECK_DATE /dumpruninfo.json"
6663 debug $DUMP_RUN_INFO_URL
6764 DUMP_RUN_INFO=$( curl -s --fail " $DUMP_RUN_INFO_URL " )
6865
@@ -71,7 +68,6 @@ check_all_files_ready() {
7168 return 1
7269 fi
7370
74-
7571 for FN in $WIKIPEDIA_REQUIRED_FILES ; do
7672 TABLENAME=${FN// _/ } table # redirect => redirecttable
7773 debug " checking status for table $TABLENAME "
@@ -85,15 +81,13 @@ check_all_files_ready() {
8581 fi
8682 done
8783
88-
89-
9084 # #
9185 # # 2. Wikidata
9286 # #
9387 # from wikidata_download.sh
9488 WIKIDATA_REQUIRED_FILES=" geo_tags page wb_items_per_site"
9589
96- DUMP_RUN_INFO_URL=" https://mirror.clarkson.edu/wikimedia /wikidatawiki/$CHECK_DATE /dumpruninfo.json"
90+ DUMP_RUN_INFO_URL=" https://wikidata.aerotechnet.com /wikidatawiki/$CHECK_DATE /dumpruninfo.json"
9791 debug $DUMP_RUN_INFO_URL
9892 DUMP_RUN_INFO=$( curl -s --fail " $DUMP_RUN_INFO_URL " )
9993
@@ -118,17 +112,15 @@ check_all_files_ready() {
118112 return $ANY_FILE_MISSING
119113}
120114
121-
122-
123115#
124116# Usually you might try to get a list of dates from
125- # https://mirror.clarkson.edu/wikimedia /enwiki/ and then sort them, then look at status.html
117+ # https://wikidata.aerotechnet.com /enwiki/ and then sort them, then look at status.html
126118# inside the directories.
127119#
128120# We want to avoid parsing HTML.
129121#
130122# Previous version of this script then looked at index.json
131- # (https://mirror.clarkson.edu/wikimedia /index.json) but the file is written at beginning
123+ # (https://wikidata.aerotechnet.com /index.json) but the file is written at beginning
132124# of the export so first of month it would list files that don't exist yet.
133125#
134126
0 commit comments