Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 144 additions & 0 deletions bin/word-square-build-defs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import argparse
import json
import os
import re
import sqlite3
import sys
from typing import Iterable, List, Set


def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description='Build word-square definitions SQLite database.')
parser.add_argument(
'--stages',
default='word-square/stages.sqlite3',
help='Stages SQLite path. Default: word-square/stages.sqlite3',
)
parser.add_argument(
'--lexicon',
required=True,
help='Source lexicon SQLite path. Example: ./CSW24.db',
)
parser.add_argument(
'--output',
default='word-square/definitions.sqlite3',
help='Output SQLite path. Default: word-square/definitions.sqlite3',
)
parser.add_argument(
'--reset',
action='store_true',
help='Delete output DB if it exists before writing.',
)
parser.add_argument(
'--batch-size',
type=int,
default=900,
help='Number of words per IN() query. Default: 900',
)
return parser.parse_args()


def iter_stage_words(conn: sqlite3.Connection) -> Iterable[str]:
cursor = conn.cursor()
cursor.execute('SELECT rows, cols FROM stages')
while True:
rows = cursor.fetchmany(1000)
if not rows:
break
for rows_json, cols_json in rows:
for word in json.loads(rows_json):
yield word
for word in json.loads(cols_json):
yield word


def chunked(items: List[str], size: int) -> Iterable[List[str]]:
for i in range(0, len(items), size):
yield items[i:i + size]


def censor_definition(definition: str) -> str:
return re.sub(r'(?<!-)\b[A-Z]{2,}\b', lambda m: '?' * len(m.group()), definition)


def main() -> int:
args = parse_args()
stages_path = os.path.expanduser(args.stages)
lexicon_path = os.path.expanduser(args.lexicon)
out_path = os.path.expanduser(args.output)
batch_size = max(1, args.batch_size)

if args.reset and os.path.exists(out_path):
os.remove(out_path)

os.makedirs(os.path.dirname(out_path), exist_ok=True)

stages_conn = sqlite3.connect(stages_path)
try:
words: Set[str] = set()
for word in iter_stage_words(stages_conn):
if word:
words.add(word.upper())
finally:
stages_conn.close()

if not words:
print('No words found in stages database. Nothing to do.')
return 1

lex_conn = sqlite3.connect(lexicon_path)
out_conn = sqlite3.connect(out_path)
out_conn.execute('PRAGMA journal_mode=OFF')
out_conn.execute('PRAGMA synchronous=OFF')
out_conn.execute(
'CREATE TABLE IF NOT EXISTS definitions ('
'word TEXT PRIMARY KEY,'
'definition TEXT NOT NULL,'
'definition_censored TEXT NOT NULL,'
'probability_order INTEGER'
')'
)

insert_sql = (
'INSERT OR REPLACE INTO definitions'
' (word, definition, definition_censored, probability_order)'
' VALUES (?, ?, ?, ?)'
)

found_total = 0
missing_total = 0
out_conn.execute('BEGIN')
try:
cur = lex_conn.cursor()
for chunk in chunked(sorted(words), batch_size):
placeholders = ','.join('?' for _ in chunk)
query = (
f'SELECT word, definition, probability_order0'
f' FROM words WHERE word IN ({placeholders})'
)
cur.execute(query, chunk)
rows = cur.fetchall()
if rows:
out_rows = [
(word, defn, censor_definition(defn), prob)
for word, defn, prob in rows
]
out_conn.executemany(insert_sql, out_rows)
found_words = {row[0] for row in rows}
found_total += len(found_words)
missing_total += len(chunk) - len(found_words)
else:
missing_total += len(chunk)
out_conn.commit()
finally:
lex_conn.close()
out_conn.close()

print(f'Unique stage words: {len(words)}')
print(f'Found definitions: {found_total}')
print(f'Missing definitions: {missing_total}')
return 0


if __name__ == '__main__':
sys.exit(main())
146 changes: 146 additions & 0 deletions bin/word-square-build-stages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import argparse
import glob
import json
import os
import re
import sqlite3
import sys
from typing import Iterable, List

ROW_RE = re.compile(r'^[A-Z]{7}$')
SOLUTION_RE = re.compile(r'^Solution\s+#\d+:$')


def iter_input_files(patterns: List[str]) -> Iterable[str]:
for pattern in patterns:
expanded = os.path.expanduser(pattern)
for path in sorted(glob.glob(expanded)):
yield path


def compute_cols(rows: List[str]) -> List[str]:
return [''.join(row[i] for row in rows) for i in range(7)]


def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description='Build word-square stages SQLite database.')
parser.add_argument(
'--input',
nargs='+',
required=True,
help='Input log glob(s). Example: ./*.log',
)
parser.add_argument(
'--output',
default='word-square/stages.sqlite3',
help='Output SQLite path. Default: word-square/stages.sqlite3',
)
parser.add_argument(
'--commit-every',
type=int,
default=1000,
help='Commit every N inserts. Default: 1000',
)
parser.add_argument(
'--max-boards',
type=int,
default=0,
help='Stop after inserting N boards (0 means no limit).',
)
parser.add_argument(
'--reset',
action='store_true',
help='Delete output DB if it exists before writing.',
)
return parser.parse_args()


def main() -> int:
args = parse_args()
out_path = os.path.expanduser(args.output)

if args.reset and os.path.exists(out_path):
os.remove(out_path)

os.makedirs(os.path.dirname(out_path), exist_ok=True)
conn = sqlite3.connect(out_path)
conn.execute('PRAGMA journal_mode=OFF')
conn.execute('PRAGMA synchronous=OFF')
conn.execute(
'CREATE TABLE IF NOT EXISTS stages ('
'id INTEGER PRIMARY KEY,'
'board TEXT NOT NULL UNIQUE,'
'rows TEXT NOT NULL,'
'cols TEXT NOT NULL,'
'unique_words INTEGER NOT NULL,'
'is_symmetric INTEGER NOT NULL'
')'
)
conn.execute('CREATE INDEX IF NOT EXISTS stages_board_idx ON stages(board)')
conn.execute('CREATE INDEX IF NOT EXISTS stages_is_symmetric_idx ON stages(is_symmetric)')
insert_sql = 'INSERT OR IGNORE INTO stages (board, rows, cols, unique_words, is_symmetric) VALUES (?, ?, ?, ?, ?)'

inserted = 0
dupes = 0
total_solutions = 0
commit_every = max(1, args.commit_every)

conn.execute('BEGIN')
try:
paths = list(iter_input_files(args.input))
if not paths:
print('No input files matched. Please pass --input with a valid glob.')
return 1
for path in paths:
collecting = False
rows: List[str] = []
with open(path, 'r', encoding='utf-8', errors='replace') as handle:
for raw_line in handle:
line = raw_line.strip()
if not line:
continue
if SOLUTION_RE.match(line):
collecting = True
rows = []
continue
if not collecting:
continue
if ROW_RE.match(line):
rows.append(line)
if len(rows) == 7:
board = ''.join(rows)
cols = compute_cols(rows)
unique_words = len(set(rows + cols))
is_symmetric = int(rows == cols)
before = conn.total_changes
conn.execute(insert_sql, (board, json.dumps(rows), json.dumps(cols), unique_words, is_symmetric))
total_solutions += 1
if conn.total_changes > before:
inserted += 1
else:
dupes += 1
if inserted % commit_every == 0:
conn.commit()
conn.execute('BEGIN')
if args.max_boards and inserted >= args.max_boards:
raise StopIteration
collecting = False
rows = []
continue
# Unexpected line inside a solution block. Reset.
collecting = False
rows = []
except StopIteration:
pass
finally:
conn.commit()
conn.close()

print(f'Parsed solutions: {total_solutions}')
print(f'Inserted boards: {inserted}')
print(f'Duplicates skipped: {dupes}')
return 0


if __name__ == '__main__':
sys.exit(main())
1 change: 1 addition & 0 deletions index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ const productionBots = [
'mail-hook',
'wordhero',
'wordhero/crossword',
'word-square',
'oauth',
'tunnel',
'voiperrobot',
Expand Down
2 changes: 2 additions & 0 deletions word-square/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.sqlite3
*.sqlite3-*
91 changes: 91 additions & 0 deletions word-square/generateWordSquare.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import * as sqlite from 'sqlite';
import sqlite3 from 'sqlite3';
import path from 'path';

export interface WordSquareClue {
word: string;
definition: string;
definitionCensored: string;
probabilityOrder: number | null;
index: number;
}

export interface WordSquare {
board: string[];
rows: WordSquareClue[];
cols: WordSquareClue[];
}

const loadStage = async (symmetric: boolean) => {
const db = await sqlite.open({
filename: path.join(__dirname, 'stages.sqlite3'),
driver: sqlite3.Database,
});
const stage = await db.get<{board: string; rows: string; cols: string}>(
symmetric
? 'SELECT board, rows, cols FROM stages WHERE is_symmetric = 1 ORDER BY RANDOM() LIMIT 1'
: 'SELECT board, rows, cols FROM stages WHERE unique_words = 14 ORDER BY RANDOM() LIMIT 1',
);
return stage ?? null;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 うにゃ?このunique_words = 14って数字は、どういう意味があるのかにゃ?うなにはちょっとよくわからないにゃ。コメントで説明してほしいにゃ!

};

interface DefinitionRow {
word: string;
definition: string;
definition_censored: string;
probability_order: number | null;
}

const loadDefinitions = async (words: string[]) => {
const db = await sqlite.open({
filename: path.join(__dirname, 'definitions.sqlite3'),
driver: sqlite3.Database,
});
const uniqueWords = Array.from(new Set(words));
if (uniqueWords.length === 0) {
return new Map<string, DefinitionRow>();
}
const placeholders = uniqueWords.map(() => '?').join(',');
const rows = await db.all<DefinitionRow[]>(
`SELECT word, definition, definition_censored, probability_order FROM definitions WHERE word IN (${placeholders})`,
uniqueWords,
);
const definitions = new Map<string, DefinitionRow>();
for (const row of rows) {
definitions.set(row.word, row);
}
return definitions;
};

const generateWordSquare = async (symmetric: boolean = false): Promise<WordSquare | null> => {
const stage = await loadStage(symmetric);
if (!stage) {
return null;
}
const rows = JSON.parse(stage.rows) as string[];
const cols = JSON.parse(stage.cols) as string[];
const board = stage.board.split('');

if (rows.length !== 7 || cols.length !== 7 || board.length !== 49) {
return null;
}

const definitions = await loadDefinitions([...rows, ...cols]);
const toClue = (word: string, index: number): WordSquareClue => {
const def = definitions.get(word);
return {
word,
definition: def?.definition ?? '(no definition)',
definitionCensored: def?.definition_censored ?? '(no definition)',
probabilityOrder: def?.probability_order ?? null,
index,
};
};
return {
board,
rows: rows.map((word, index) => toClue(word, index)),
cols: cols.map((word, index) => toClue(word, index)),
};
};

export default generateWordSquare;
Loading
Loading