|
7 | 7 | from python.utils.lib import data, log |
8 | 8 |
|
9 | 9 | prompts_csv_url = 'https://huggingface.co/datasets/fka/prompts.chat/raw/main/prompts.csv' |
10 | | -output_path = Path(__file__).parent.parent.parent / 'data/ai-personas.json' |
| 10 | +output_path = Path(__file__).parent.parent / 'data/ai-personas.json' |
11 | 11 |
|
12 | 12 | log.info(f'Downloading {prompts_csv_url}...') |
13 | 13 | csv.field_size_limit(10**9) # to accommodate longass prompts |
|
22 | 22 | row for row in prompt_rows |
23 | 23 | if row.get('type') == 'TEXT' |
24 | 24 | and not prompt.looks_like_img_type(row.get('prompt', '')) |
| 25 | + and not prompt.looks_like_vid_type(row.get('prompt', '')) |
25 | 26 | and row.get('act', '').strip().lower() != 'test' |
26 | 27 | and (row_lower := row['act'].strip().lower()) not in seen_personas |
27 | 28 | and not seen_personas.add(row_lower) |
28 | 29 | ] |
29 | 30 | log.success(f'{len(text_prompt_rows):,} text prompts found!') |
30 | 31 |
|
31 | 32 | log.info(f'Reading {output_path}...') |
32 | | -personas = data.json.read(output_path) if output_path.exists() else {} |
| 33 | +if not output_path.exists(): |
| 34 | + log.error(f'Output path does not exist: {output_path}') |
| 35 | + raise SystemExit(1) |
| 36 | +personas = data.json.read(output_path) |
33 | 37 | log.success(f'{len(personas):,} previous personas loaded!') |
34 | 38 |
|
35 | 39 | log.info('Adding new personas...') |
36 | 40 | added_cnt = 0 |
| 41 | +emoji_re = re.compile( |
| 42 | + '[' |
| 43 | + '\U0001F300-\U0001FAFF' # symbols/emoji |
| 44 | + '\U00002700-\U000027BF' # dingbats |
| 45 | + '\U0001F1E0-\U0001F1FF' # flags |
| 46 | + ']+', |
| 47 | + flags=re.UNICODE |
| 48 | +) |
37 | 49 | for row in text_prompt_rows: |
38 | | - role = re.sub(r'^# |["“”‘’]', '', row['act']).strip() |
| 50 | + role = re.sub(r'^# |["“”‘’]', '', emoji_re.sub('', row['act'])).strip() |
39 | 51 | persona = {'prompt': row['prompt'].strip()} |
40 | 52 | if row.get('for_devs', '').strip().upper() == 'TRUE': |
41 | 53 | persona['targetAudience'] = ['devs'] |
|
0 commit comments