-
Notifications
You must be signed in to change notification settings - Fork 13
新ゲーム (word square) #1161
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft
platypus999
wants to merge
2
commits into
master
Choose a base branch
from
platypus999/word-square
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
新ゲーム (word square) #1161
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,144 @@ | ||
| import argparse | ||
| import json | ||
| import os | ||
| import re | ||
| import sqlite3 | ||
| import sys | ||
| from typing import Iterable, List, Set | ||
|
|
||
|
|
||
| def parse_args() -> argparse.Namespace: | ||
| parser = argparse.ArgumentParser(description='Build word-square definitions SQLite database.') | ||
| parser.add_argument( | ||
| '--stages', | ||
| default='word-square/stages.sqlite3', | ||
| help='Stages SQLite path. Default: word-square/stages.sqlite3', | ||
| ) | ||
| parser.add_argument( | ||
| '--lexicon', | ||
| required=True, | ||
| help='Source lexicon SQLite path. Example: ./CSW24.db', | ||
| ) | ||
| parser.add_argument( | ||
| '--output', | ||
| default='word-square/definitions.sqlite3', | ||
| help='Output SQLite path. Default: word-square/definitions.sqlite3', | ||
| ) | ||
| parser.add_argument( | ||
| '--reset', | ||
| action='store_true', | ||
| help='Delete output DB if it exists before writing.', | ||
| ) | ||
| parser.add_argument( | ||
| '--batch-size', | ||
| type=int, | ||
| default=900, | ||
| help='Number of words per IN() query. Default: 900', | ||
| ) | ||
| return parser.parse_args() | ||
|
|
||
|
|
||
| def iter_stage_words(conn: sqlite3.Connection) -> Iterable[str]: | ||
| cursor = conn.cursor() | ||
| cursor.execute('SELECT rows, cols FROM stages') | ||
| while True: | ||
| rows = cursor.fetchmany(1000) | ||
| if not rows: | ||
| break | ||
| for rows_json, cols_json in rows: | ||
| for word in json.loads(rows_json): | ||
| yield word | ||
| for word in json.loads(cols_json): | ||
| yield word | ||
|
|
||
|
|
||
| def chunked(items: List[str], size: int) -> Iterable[List[str]]: | ||
| for i in range(0, len(items), size): | ||
| yield items[i:i + size] | ||
|
|
||
|
|
||
| def censor_definition(definition: str) -> str: | ||
| return re.sub(r'(?<!-)\b[A-Z]{2,}\b', lambda m: '?' * len(m.group()), definition) | ||
|
|
||
|
|
||
| def main() -> int: | ||
| args = parse_args() | ||
| stages_path = os.path.expanduser(args.stages) | ||
| lexicon_path = os.path.expanduser(args.lexicon) | ||
| out_path = os.path.expanduser(args.output) | ||
| batch_size = max(1, args.batch_size) | ||
|
|
||
| if args.reset and os.path.exists(out_path): | ||
| os.remove(out_path) | ||
|
|
||
| os.makedirs(os.path.dirname(out_path), exist_ok=True) | ||
|
|
||
| stages_conn = sqlite3.connect(stages_path) | ||
| try: | ||
| words: Set[str] = set() | ||
| for word in iter_stage_words(stages_conn): | ||
| if word: | ||
| words.add(word.upper()) | ||
| finally: | ||
| stages_conn.close() | ||
|
|
||
| if not words: | ||
| print('No words found in stages database. Nothing to do.') | ||
| return 1 | ||
|
|
||
| lex_conn = sqlite3.connect(lexicon_path) | ||
| out_conn = sqlite3.connect(out_path) | ||
| out_conn.execute('PRAGMA journal_mode=OFF') | ||
| out_conn.execute('PRAGMA synchronous=OFF') | ||
| out_conn.execute( | ||
| 'CREATE TABLE IF NOT EXISTS definitions (' | ||
| 'word TEXT PRIMARY KEY,' | ||
| 'definition TEXT NOT NULL,' | ||
| 'definition_censored TEXT NOT NULL,' | ||
| 'probability_order INTEGER' | ||
| ')' | ||
| ) | ||
|
|
||
| insert_sql = ( | ||
| 'INSERT OR REPLACE INTO definitions' | ||
| ' (word, definition, definition_censored, probability_order)' | ||
| ' VALUES (?, ?, ?, ?)' | ||
| ) | ||
|
|
||
| found_total = 0 | ||
| missing_total = 0 | ||
| out_conn.execute('BEGIN') | ||
| try: | ||
| cur = lex_conn.cursor() | ||
| for chunk in chunked(sorted(words), batch_size): | ||
| placeholders = ','.join('?' for _ in chunk) | ||
| query = ( | ||
| f'SELECT word, definition, probability_order0' | ||
| f' FROM words WHERE word IN ({placeholders})' | ||
| ) | ||
| cur.execute(query, chunk) | ||
| rows = cur.fetchall() | ||
| if rows: | ||
| out_rows = [ | ||
| (word, defn, censor_definition(defn), prob) | ||
| for word, defn, prob in rows | ||
| ] | ||
| out_conn.executemany(insert_sql, out_rows) | ||
| found_words = {row[0] for row in rows} | ||
| found_total += len(found_words) | ||
| missing_total += len(chunk) - len(found_words) | ||
| else: | ||
| missing_total += len(chunk) | ||
| out_conn.commit() | ||
| finally: | ||
| lex_conn.close() | ||
| out_conn.close() | ||
|
|
||
| print(f'Unique stage words: {len(words)}') | ||
| print(f'Found definitions: {found_total}') | ||
| print(f'Missing definitions: {missing_total}') | ||
| return 0 | ||
|
|
||
|
|
||
| if __name__ == '__main__': | ||
| sys.exit(main()) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,146 @@ | ||
| import argparse | ||
| import glob | ||
| import json | ||
| import os | ||
| import re | ||
| import sqlite3 | ||
| import sys | ||
| from typing import Iterable, List | ||
|
|
||
| ROW_RE = re.compile(r'^[A-Z]{7}$') | ||
| SOLUTION_RE = re.compile(r'^Solution\s+#\d+:$') | ||
|
|
||
|
|
||
| def iter_input_files(patterns: List[str]) -> Iterable[str]: | ||
| for pattern in patterns: | ||
| expanded = os.path.expanduser(pattern) | ||
| for path in sorted(glob.glob(expanded)): | ||
| yield path | ||
|
|
||
|
|
||
| def compute_cols(rows: List[str]) -> List[str]: | ||
| return [''.join(row[i] for row in rows) for i in range(7)] | ||
|
|
||
|
|
||
| def parse_args() -> argparse.Namespace: | ||
| parser = argparse.ArgumentParser(description='Build word-square stages SQLite database.') | ||
| parser.add_argument( | ||
| '--input', | ||
| nargs='+', | ||
| required=True, | ||
| help='Input log glob(s). Example: ./*.log', | ||
| ) | ||
| parser.add_argument( | ||
| '--output', | ||
| default='word-square/stages.sqlite3', | ||
| help='Output SQLite path. Default: word-square/stages.sqlite3', | ||
| ) | ||
| parser.add_argument( | ||
| '--commit-every', | ||
| type=int, | ||
| default=1000, | ||
| help='Commit every N inserts. Default: 1000', | ||
| ) | ||
| parser.add_argument( | ||
| '--max-boards', | ||
| type=int, | ||
| default=0, | ||
| help='Stop after inserting N boards (0 means no limit).', | ||
| ) | ||
| parser.add_argument( | ||
| '--reset', | ||
| action='store_true', | ||
| help='Delete output DB if it exists before writing.', | ||
| ) | ||
| return parser.parse_args() | ||
|
|
||
|
|
||
| def main() -> int: | ||
| args = parse_args() | ||
| out_path = os.path.expanduser(args.output) | ||
|
|
||
| if args.reset and os.path.exists(out_path): | ||
| os.remove(out_path) | ||
|
|
||
| os.makedirs(os.path.dirname(out_path), exist_ok=True) | ||
| conn = sqlite3.connect(out_path) | ||
| conn.execute('PRAGMA journal_mode=OFF') | ||
| conn.execute('PRAGMA synchronous=OFF') | ||
| conn.execute( | ||
| 'CREATE TABLE IF NOT EXISTS stages (' | ||
| 'id INTEGER PRIMARY KEY,' | ||
| 'board TEXT NOT NULL UNIQUE,' | ||
| 'rows TEXT NOT NULL,' | ||
| 'cols TEXT NOT NULL,' | ||
| 'unique_words INTEGER NOT NULL,' | ||
| 'is_symmetric INTEGER NOT NULL' | ||
| ')' | ||
| ) | ||
| conn.execute('CREATE INDEX IF NOT EXISTS stages_board_idx ON stages(board)') | ||
| conn.execute('CREATE INDEX IF NOT EXISTS stages_is_symmetric_idx ON stages(is_symmetric)') | ||
| insert_sql = 'INSERT OR IGNORE INTO stages (board, rows, cols, unique_words, is_symmetric) VALUES (?, ?, ?, ?, ?)' | ||
|
|
||
| inserted = 0 | ||
| dupes = 0 | ||
| total_solutions = 0 | ||
| commit_every = max(1, args.commit_every) | ||
|
|
||
| conn.execute('BEGIN') | ||
| try: | ||
| paths = list(iter_input_files(args.input)) | ||
| if not paths: | ||
| print('No input files matched. Please pass --input with a valid glob.') | ||
| return 1 | ||
| for path in paths: | ||
| collecting = False | ||
| rows: List[str] = [] | ||
| with open(path, 'r', encoding='utf-8', errors='replace') as handle: | ||
| for raw_line in handle: | ||
| line = raw_line.strip() | ||
| if not line: | ||
| continue | ||
| if SOLUTION_RE.match(line): | ||
| collecting = True | ||
| rows = [] | ||
| continue | ||
| if not collecting: | ||
| continue | ||
| if ROW_RE.match(line): | ||
| rows.append(line) | ||
| if len(rows) == 7: | ||
| board = ''.join(rows) | ||
| cols = compute_cols(rows) | ||
| unique_words = len(set(rows + cols)) | ||
| is_symmetric = int(rows == cols) | ||
| before = conn.total_changes | ||
| conn.execute(insert_sql, (board, json.dumps(rows), json.dumps(cols), unique_words, is_symmetric)) | ||
| total_solutions += 1 | ||
| if conn.total_changes > before: | ||
| inserted += 1 | ||
| else: | ||
| dupes += 1 | ||
| if inserted % commit_every == 0: | ||
| conn.commit() | ||
| conn.execute('BEGIN') | ||
| if args.max_boards and inserted >= args.max_boards: | ||
| raise StopIteration | ||
| collecting = False | ||
| rows = [] | ||
| continue | ||
| # Unexpected line inside a solution block. Reset. | ||
| collecting = False | ||
| rows = [] | ||
| except StopIteration: | ||
| pass | ||
| finally: | ||
| conn.commit() | ||
| conn.close() | ||
|
|
||
| print(f'Parsed solutions: {total_solutions}') | ||
| print(f'Inserted boards: {inserted}') | ||
| print(f'Duplicates skipped: {dupes}') | ||
| return 0 | ||
|
|
||
|
|
||
| if __name__ == '__main__': | ||
| sys.exit(main()) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| *.sqlite3 | ||
| *.sqlite3-* |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,91 @@ | ||
| import * as sqlite from 'sqlite'; | ||
| import sqlite3 from 'sqlite3'; | ||
| import path from 'path'; | ||
|
|
||
| export interface WordSquareClue { | ||
| word: string; | ||
| definition: string; | ||
| definitionCensored: string; | ||
| probabilityOrder: number | null; | ||
| index: number; | ||
| } | ||
|
|
||
| export interface WordSquare { | ||
| board: string[]; | ||
| rows: WordSquareClue[]; | ||
| cols: WordSquareClue[]; | ||
| } | ||
|
|
||
| const loadStage = async (symmetric: boolean) => { | ||
| const db = await sqlite.open({ | ||
| filename: path.join(__dirname, 'stages.sqlite3'), | ||
| driver: sqlite3.Database, | ||
| }); | ||
| const stage = await db.get<{board: string; rows: string; cols: string}>( | ||
| symmetric | ||
| ? 'SELECT board, rows, cols FROM stages WHERE is_symmetric = 1 ORDER BY RANDOM() LIMIT 1' | ||
| : 'SELECT board, rows, cols FROM stages WHERE unique_words = 14 ORDER BY RANDOM() LIMIT 1', | ||
| ); | ||
| return stage ?? null; | ||
| }; | ||
|
|
||
| interface DefinitionRow { | ||
| word: string; | ||
| definition: string; | ||
| definition_censored: string; | ||
| probability_order: number | null; | ||
| } | ||
|
|
||
| const loadDefinitions = async (words: string[]) => { | ||
| const db = await sqlite.open({ | ||
| filename: path.join(__dirname, 'definitions.sqlite3'), | ||
| driver: sqlite3.Database, | ||
| }); | ||
| const uniqueWords = Array.from(new Set(words)); | ||
| if (uniqueWords.length === 0) { | ||
| return new Map<string, DefinitionRow>(); | ||
| } | ||
| const placeholders = uniqueWords.map(() => '?').join(','); | ||
| const rows = await db.all<DefinitionRow[]>( | ||
| `SELECT word, definition, definition_censored, probability_order FROM definitions WHERE word IN (${placeholders})`, | ||
| uniqueWords, | ||
| ); | ||
| const definitions = new Map<string, DefinitionRow>(); | ||
| for (const row of rows) { | ||
| definitions.set(row.word, row); | ||
| } | ||
| return definitions; | ||
| }; | ||
|
|
||
| const generateWordSquare = async (symmetric: boolean = false): Promise<WordSquare | null> => { | ||
| const stage = await loadStage(symmetric); | ||
| if (!stage) { | ||
| return null; | ||
| } | ||
| const rows = JSON.parse(stage.rows) as string[]; | ||
| const cols = JSON.parse(stage.cols) as string[]; | ||
| const board = stage.board.split(''); | ||
|
|
||
| if (rows.length !== 7 || cols.length !== 7 || board.length !== 49) { | ||
| return null; | ||
| } | ||
|
|
||
| const definitions = await loadDefinitions([...rows, ...cols]); | ||
| const toClue = (word: string, index: number): WordSquareClue => { | ||
| const def = definitions.get(word); | ||
| return { | ||
| word, | ||
| definition: def?.definition ?? '(no definition)', | ||
| definitionCensored: def?.definition_censored ?? '(no definition)', | ||
| probabilityOrder: def?.probability_order ?? null, | ||
| index, | ||
| }; | ||
| }; | ||
| return { | ||
| board, | ||
| rows: rows.map((word, index) => toClue(word, index)), | ||
| cols: cols.map((word, index) => toClue(word, index)), | ||
| }; | ||
| }; | ||
|
|
||
| export default generateWordSquare; | ||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🟡 うにゃ?この
unique_words = 14って数字は、どういう意味があるのかにゃ?うなにはちょっとよくわからないにゃ。コメントで説明してほしいにゃ!