|
1 | 1 | #!/usr/bin/env python3 |
2 | | -"""archivebot.py - discussion page archiving bot. |
| 2 | +"""archivebot.py - Discussion page archiving bot. |
3 | 3 |
|
4 | 4 | usage: |
5 | 5 |
|
6 | 6 | python pwb.py archivebot [OPTIONS] [TEMPLATE_PAGE] |
7 | 7 |
|
8 | | -Several TEMPLATE_PAGE templates can be given at once. Default is |
9 | | -`User:MiszaBot/config`. Bot examines backlinks (Special:WhatLinksHere) |
10 | | -to all TEMPLATE_PAGE templates. Then goes through all pages (unless a |
11 | | -specific page specified using options) and archives old discussions. |
12 | | -This is done by breaking a page into threads, then scanning each thread |
13 | | -for timestamps. Threads older than a specified threshold are then moved |
14 | | -to another page (the archive), which can be named either basing on the |
15 | | -thread's name or then name can contain a counter which will be |
16 | | -incremented when the archive reaches a certain size. |
| 8 | +Multiple TEMPLATE_PAGE templates can be given in a single command. The |
| 9 | +default is ``User:MiszaBot/config``. The bot examines backlinks (i.e. |
| 10 | +Special:WhatLinksHere) to all given TEMPLATE_PAGE templates. It then |
| 11 | +processes those pages (unless a specific page is specified via options) |
| 12 | +and archives old discussions. |
17 | 13 |
|
18 | | -Transcluded template may contain the following parameters: |
| 14 | +This is done by splitting each page into threads and scanning them for |
| 15 | +timestamps. Threads older than a configured threshold are moved to an |
| 16 | +archive page. The archive page name can be based on the thread's title, |
| 17 | +or include a counter that increments when the archive reaches a |
| 18 | +configured size. |
| 19 | +
|
| 20 | +The transcluded configuration template may include the following |
| 21 | +parameters: |
19 | 22 |
|
20 | 23 | .. code:: wikitext |
21 | 24 |
|
|
30 | 33 | |key = |
31 | 34 | }} |
32 | 35 |
|
33 | | -Meanings of parameters are: |
| 36 | +**Parameters meanings:** |
34 | 37 |
|
35 | 38 | archive |
36 | | - Name of the page to which archived threads will be put. Must be a |
37 | | - subpage of the current page. Variables are supported. |
| 39 | + Name of the archive page where threads will be moved. Must be a |
| 40 | + subpage of the current page, unless a valid ``key`` is provided. |
| 41 | + Supports variables. |
| 42 | +
|
38 | 43 | algo |
39 | | - Specifies the maximum age of a thread. Must be in the form |
40 | | - :code:`old(<delay>)` where ``<delay>`` specifies the age in |
41 | | - seconds (s), hours (h), days (d), weeks (w), or years (y) like ``24h`` |
42 | | - or ``5d``. Default is :code:`old(24h)`. |
| 44 | + Specifies the maximum age of a thread using the syntax: |
| 45 | + :code:`old(<delay>)`, where ``<delay>`` can be in seconds (s), hours (h), |
| 46 | + days (d), weeks (w), or years (y). For example: ``24h`` or ``5d``. |
| 47 | + Default: :code:`old(24h)`. |
| 48 | +
|
43 | 49 | counter |
44 | | - The current value of a counter which could be assigned as variable. |
45 | | - Will be updated by bot. Initial value is 1. |
| 50 | + The current value of the archive counter used in archive page naming. |
| 51 | + Will be updated automatically by the bot. Default: 1. |
| 52 | +
|
46 | 53 | maxarchivesize |
47 | | - The maximum archive size before incrementing the counter. Value can |
48 | | - be given with appending letter like ``K`` or ``M`` which indicates |
49 | | - KByte or MByte. Default value is ``200K``. |
| 54 | + The maximum size of an archive page before incrementing the counter. |
| 55 | + A suffix of ``K`` or ``M`` may be used for kilobytes or megabytes. |
| 56 | + Default: ``200K``. |
| 57 | +
|
50 | 58 | minthreadsleft |
51 | | - Minimum number of threads that should be left on a page. Default |
52 | | - value is 5. |
| 59 | + Minimum number of threads that must remain on the main page after |
| 60 | + archiving. Default: 5. |
| 61 | +
|
53 | 62 | minthreadstoarchive |
54 | | - The minimum number of threads to archive at once. Default value is 2. |
| 63 | + Minimum number of threads that must be eligible for archiving before |
| 64 | + any are moved. Default: 2. |
| 65 | +
|
55 | 66 | archiveheader |
56 | | - Content that will be put on new archive pages as the header. This |
57 | | - parameter supports the use of variables. Default value is |
58 | | - ``{{talkarchive}}``. |
| 67 | + Content placed at the top of each newly created archive page. |
| 68 | + Supports variables. If not set explicitly, a localized default will |
| 69 | + be retrieved from Wikidata using known archive header templates. If |
| 70 | + no localized template is found, the fallback ``{{talkarchive}}`` is |
| 71 | + used. |
| 72 | +
|
| 73 | + .. note:: |
| 74 | + If no ``archiveheader`` is set and no localized template can be |
| 75 | + retrieved from Wikidata, the fallback ``{{talkarchive}}`` is used. |
| 76 | + This generic fallback may not be appropriate for all wikis, so it |
| 77 | + is recommended to set ``archiveheader`` explicitly in such cases. |
| 78 | +
|
59 | 79 | key |
60 | | - A secret key that (if valid) allows archives not to be subpages of |
61 | | - the page being archived. |
| 80 | + A secret key that, if valid, allows archive pages to exist outside |
| 81 | + of the subpage structure of the current page. |
62 | 82 |
|
63 | | -Variables below can be used in the value for "archive" in the template |
64 | | -above; numbers are **ascii** digits. Alternatively you may use |
65 | | -**localized** digits. This is only available for a few site languages. |
66 | | -Refer :attr:`NON_ASCII_DIGITS |
67 | | -<userinterfaces.transliteration.NON_ASCII_DIGITS>` whether there is a |
68 | | -localized one. |
| 83 | +Variables below can be used in the value of the "archive" parameter in |
| 84 | +the template above. Numbers are represented as **ASCII** digits by |
| 85 | +default; alternatively, **localized** digits may be used. Localized |
| 86 | +digits are only available for a few site languages. Please refer to |
| 87 | +:attr:`NON_ASCII_DIGITS <userinterfaces.transliteration.NON_ASCII_DIGITS>` |
| 88 | +to check if a localized version is available. |
69 | 89 |
|
70 | 90 | .. list-table:: |
71 | 91 | :header-rows: 1 |
|
104 | 124 | - %(localweek)s |
105 | 125 | - week number of the thread being archived |
106 | 126 |
|
107 | | -The ISO calendar starts with the Monday of the week which has at least |
108 | | -four days in the new Gregorian calendar. If January 1st is between |
109 | | -Monday and Thursday (including), the first week of that year started the |
110 | | -Monday of that week, which is in the year before if January 1st is not a |
111 | | -Monday. If it's between Friday or Sunday (including) the following week |
112 | | -is then the first week of the year. So up to three days are still |
113 | | -counted as the year before. |
| 127 | +The ISO calendar defines the first week of the year as the week |
| 128 | +containing the first Thursday of the Gregorian calendar year. This means: |
| 129 | +
|
| 130 | +- If January 1st falls on a Monday, Tuesday, Wednesday, or Thursday, then |
| 131 | + the week containing January 1st is considered the first week of the year. |
| 132 | +
|
| 133 | +- If January 1st falls on a Friday, Saturday, or Sunday, then the first ISO |
| 134 | + week starts on the following Monday. |
| 135 | +
|
| 136 | +Because of this, up to three days at the start of January can belong to the |
| 137 | +last week of the previous year according to the ISO calendar. |
114 | 138 |
|
115 | 139 | .. seealso:: Python :python:`datetime.date.isocalendar |
116 | 140 | <library/datetime.html#datetime.date.isocalendar>`, |
117 | 141 | https://webspace.science.uu.nl/~gent0113/calendar/isocalendar.htm |
118 | 142 |
|
119 | 143 | Options (may be omitted): |
120 | 144 |
|
121 | | --help show this help message and exit |
| 145 | +-help Show this help message and exit. |
122 | 146 |
|
123 | | --calc:PAGE calculate key for PAGE and exit |
| 147 | +-calc:PAGE Calculate key for PAGE and exit. |
124 | 148 |
|
125 | | --file:FILE load list of pages from FILE |
| 149 | +-file:FILE Load list of pages from FILE. |
126 | 150 |
|
127 | | --force override security options |
| 151 | +-force Override security options. |
128 | 152 |
|
129 | | --locale:LOCALE switch to locale LOCALE |
| 153 | +-locale:LOCALE Switch to locale LOCALE. |
130 | 154 |
|
131 | | --namespace:NS only archive pages from a given namespace |
| 155 | +-namespace:NS Only archive pages from the given namespace. |
132 | 156 |
|
133 | | --page:PAGE archive a single PAGE, default ns is a user talk page |
| 157 | +-page:PAGE Archive a single PAGE. Default namespace is a user talk |
| 158 | + page. |
134 | 159 |
|
135 | | --salt:SALT specify salt |
| 160 | +-salt:SALT Specify salt. |
136 | 161 |
|
137 | 162 | -keep Preserve thread order in archive even if threads are |
138 | | - archived later |
139 | | --sort Sort archive by timestamp; should not be used with `keep` |
| 163 | + archived later. |
| 164 | +
|
| 165 | +-sort Sort archive by timestamp; should not be used with `keep`. |
140 | 166 |
|
141 | 167 | -async Run the bot in parallel tasks. |
142 | 168 |
|
| 169 | +Version historty: |
| 170 | +
|
143 | 171 | .. versionchanged:: 7.6 |
144 | | - Localized variables for "archive" template parameter are supported. |
145 | | - `User:MiszaBot/config` is the default template. `-keep` option was |
146 | | - added. |
| 172 | + Localized variables for the ``archive`` parameter are supported. |
| 173 | + ``User:MiszaBot/config`` is the default template. The ``-keep`` option |
| 174 | + was added. |
| 175 | +
|
147 | 176 | .. versionchanged:: 7.7 |
148 | 177 | ``-sort`` and ``-async`` options were added. |
| 178 | +
|
149 | 179 | .. versionchanged:: 8.2 |
150 | | - KeyboardInterrupt was enabled with ``-async`` option. |
| 180 | + KeyboardInterrupt support added when using the ``-async`` option. |
| 181 | +
|
| 182 | +.. versionchanged:: 10.3 |
| 183 | + If ``archiveheader`` is not set, the bot now attempts to retrieve a |
| 184 | + localized template from Wikidata (based on known item IDs). If none is |
| 185 | + found, ``{{talkarchive}}`` is used as fallback. |
151 | 186 | """ |
152 | 187 | # |
153 | 188 | # (C) Pywikibot team, 2006-2025 |
@@ -395,19 +430,34 @@ def max( |
395 | 430 | return max(ts1, ts2) |
396 | 431 |
|
397 | 432 | def get_header_template(self) -> str: |
398 | | - """Get localized archive header template. |
| 433 | + """Return a localized archive header template from Wikibase. |
| 434 | +
|
| 435 | + This method looks up a localized archive header template by |
| 436 | + checking a predefined list of Wikidata item IDs that correspond |
| 437 | + to commonly used archive header templates. It returns the first |
| 438 | + matching template found on the local wiki via the site’s |
| 439 | + Wikibase repository. |
| 440 | +
|
| 441 | + If no such localized template is found, it falls back to the |
| 442 | + default ``{{talkarchive}}`` template. |
399 | 443 |
|
400 | 444 | .. versionadded:: 10.2 |
401 | 445 |
|
402 | | - :raises NotImplementedError: Archive header is not localized |
| 446 | + .. versionchanged:: 10.3 |
| 447 | + Returns ``{{talkarchive}}`` by default if no localized |
| 448 | + template is found. |
| 449 | +
|
| 450 | + .. caution:: |
| 451 | + The default should be avoided where possible. It is |
| 452 | + recommended to explicitly set the ``archiveheader`` parameter |
| 453 | + in the bot's configuration template instead. |
403 | 454 | """ |
404 | 455 | for item in ARCHIVE_HEADER: |
405 | 456 | tpl = self.site.page_from_repository(item) |
406 | 457 | if tpl: |
407 | 458 | return f'{{{{{tpl.title(with_ns=False)}}}}}' |
408 | 459 |
|
409 | | - raise NotImplementedError( |
410 | | - 'Archive header is not localized on your site') |
| 460 | + return '{{talkarchive}}' |
411 | 461 |
|
412 | 462 | def load_page(self) -> None: |
413 | 463 | """Load the page to be archived and break it up into threads. |
|
0 commit comments