3333import enum
3434import logging
3535import re
36- from typing import Any , Callable , Dict , List , Optional , Tuple
36+ from typing import Any , Callable , Dict , List , Optional , Pattern , Tuple
3737from util import promo_text_remover as promo_text_remover_lib
3838from flask import current_app
3939
6262TITLE_WORD_ORDER_CONFIG = None
6363BLOCKLIST_CONFIG = None
6464TITLE_WORD_ORDER_OPTIONS_CONFIG = None
65- CUSTOM_TEXT_TOKENIZER : Callable [[str , str , Dict [str , str ]], List [str ]] = None
65+ CUSTOM_TEXT_TOKENIZER : Callable [[str , str , Dict [Pattern [str ], str ]],
66+ List [str ]] = None
6667
6768
6869def _get_required_configs ():
@@ -179,6 +180,12 @@ def _optimize(
179180
180181 optimization_level = self ._get_optimization_level ()
181182
183+ title_word_order_dictionary = title_word_order_config .get (
184+ _PHRASE_DICTIONARY_CONFIG_KEY , {})
185+
186+ regex_dictionary_terms = regex_util .generate_regex_term_dict (
187+ title_word_order_dictionary )
188+
182189 for entry in product_batch ['entries' ]:
183190
184191 if optimization_util .optimization_exclusion_specified (
@@ -212,8 +219,6 @@ def _optimize(
212219 keyword_weights_mapping = title_word_order_config .get (
213220 _KEYWORD_WEIGHTS_MAPPING_CONFIG_KEY , {})
214221 keywords_for_gpc = keyword_weights_mapping .get (str (gpc_id ), [])
215- title_word_order_dictionary = title_word_order_config .get (
216- _PHRASE_DICTIONARY_CONFIG_KEY , {})
217222
218223 allowed_keywords_for_gpc = _remove_keywords_in_blocklist (
219224 keywords_for_gpc , keyword_blocklist )
@@ -223,8 +228,7 @@ def _optimize(
223228 allowed_keywords_for_gpc )
224229
225230 title_to_process = original_title
226- regex_dictionary_terms = regex_util .generate_regex_term_dict (
227- title_word_order_dictionary )
231+
228232 title_words = _tokenize_text (title_to_process , language ,
229233 regex_dictionary_terms )
230234 description_words = _tokenize_text (
@@ -383,8 +387,9 @@ def _remove_keywords_in_blocklist(
383387 return allowed_keywords
384388
385389
386- def _tokenize_text (text : str , language : str ,
387- regex_dictionary_terms : Dict [str , str ]) -> List [str ]:
390+ def _tokenize_text (
391+ text : str , language : str , regex_dictionary_terms : Dict [Pattern [str ],
392+ str ]) -> List [str ]:
388393 """Splits text into individual words using the correct method for the given language.
389394
390395 Args:
@@ -405,7 +410,7 @@ def _tokenize_text(text: str, language: str,
405410
406411
407412def _split_words_in_japanese (
408- text : str , regex_dictionary_terms : Dict [str , str ]) -> List [str ]:
413+ text : str , regex_dictionary_terms : Dict [Pattern [ str ] , str ]) -> List [str ]:
409414 """Splits Japanese text into words by using MeCab.
410415
411416 If a group of words in the text match a regex in the
@@ -437,7 +442,7 @@ def _split_words_in_japanese(
437442
438443
439444def _split_words_in_western_languages (
440- text : str , regex_dictionary_terms : Dict [str , str ]) -> List [str ]:
445+ text : str , regex_dictionary_terms : Dict [Pattern [ str ] , str ]) -> List [str ]:
441446 """Splits western text into words.
442447
443448 If a group of words in the text match a regex in the
0 commit comments