5
5
import math
6
6
import random
7
7
import re
8
- from typing import List , Literal , Union
8
+ from typing import List , Literal , Union , Optional
9
9
10
10
logger = logging .getLogger (__name__ )
11
11
@@ -82,7 +82,13 @@ def get_random_indices(*, start: int, size: int, percentage: int) -> List[int]:
82
82
83
83
84
84
def select_word_indices (
85
- words : List [str ], mode : Literal ["all" , "custom" , "keywords" , "random" , "regex" ], ** kwargs
85
+ words : List [str ],
86
+ mode : Literal ["all" , "custom" , "keywords" , "random" , "regex" ],
87
+ * ,
88
+ indices : Optional [List [int ]] = None ,
89
+ keywords : Optional [List [str ]] = None ,
90
+ percentage : Optional [int ] = None ,
91
+ regex : Optional [Union [str , re .Pattern ]] = None ,
86
92
) -> List [int ]:
87
93
"""
88
94
Select indices from a list of words based on specified selection mode.
@@ -97,12 +103,10 @@ def select_word_indices(
97
103
Args:
98
104
words (List[str]): A list of words to select from.
99
105
mode (str, optional): Selection mode. Defaults to "all".
100
-
101
- Keyword Arguments:
102
- indices (List[int]): Custom indices to select (for "custom" mode).
103
- keywords (List[str]): List of keywords to match (for "keywords" mode).
104
- percentage (int): Percentage of indices to select (for "random" mode).
105
- regex (str or Pattern): Regular expression pattern to match (for "regex" mode).
106
+ indices (List[int], optional): Custom indices to select (for "custom" mode).
107
+ keywords (List[str], optional): List of keywords to match (for "keywords" mode).
108
+ percentage (int, optional): Percentage of indices to select (for "random" mode). Defaults to None.
109
+ regex (str or Pattern, optional): Regular expression pattern to match (for "regex" mode).
106
110
107
111
Returns:
108
112
List[int]: Indices of selected words.
@@ -119,19 +123,19 @@ def select_word_indices(
119
123
return list (range (len (words )))
120
124
121
125
case "keywords" :
122
- word_list = kwargs . get ( " keywords" , [])
126
+ word_list = keywords or []
123
127
return [i for i , word in enumerate (words ) if word in word_list ]
124
128
125
129
case "random" :
126
- percentage = kwargs . get ( " percentage" , 50 )
127
- return get_random_indices (0 , len (words ), percentage )
130
+ percentage = percentage or 50
131
+ return get_random_indices (start = 0 , size = len (words ), percentage = percentage )
128
132
129
133
case "regex" :
130
- regex = kwargs . get ( " regex" , r"." )
131
- return [i for i , word in enumerate (words ) if re .search (regex , word )]
134
+ pattern = regex or r"."
135
+ return [i for i , word in enumerate (words ) if re .search (pattern , word )]
132
136
133
137
case "custom" :
134
- custom_indices = kwargs . get ( " indices" , [])
138
+ custom_indices = indices or []
135
139
valid_indices = [i for i in custom_indices if 0 <= i < len (words )]
136
140
invalid_indices = [i for i in custom_indices if i < 0 or i >= len (words )]
137
141
if invalid_indices :
0 commit comments