-
Notifications
You must be signed in to change notification settings - Fork 854
optimize complexity of filter out unwanted recognizers from O(n*m ) to O(n) #1523
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 13 commits
7aa0721
4fc942a
35cc00f
6f2edfb
f9641a7
a4109d3
38a3d65
d982e49
d03f6c0
a06ebb3
0b59b40
be62a2d
fa16ffd
6e8d0b1
0888cc1
017da22
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -170,23 +170,25 @@ def get_recognizers( | |
| if language == rec.supported_language | ||
| ] | ||
| else: | ||
| all_entity_recognizers = dict() | ||
| for rec in all_possible_recognizers: | ||
| if language == rec.supported_language: | ||
| if type(rec.supported_entities) == list and len(rec.supported_entities) > 0: | ||
| for supported_entity in rec.supported_entities: | ||
| self.add_recognizer_map(all_entity_recognizers, supported_entity, rec) | ||
| elif type(rec.supported_entities) == str: | ||
| self.add_recognizer_map(all_entity_recognizers, supported_entity, rec) | ||
|
|
||
| for entity in entities: | ||
| subset = [ | ||
| rec | ||
| for rec in all_possible_recognizers | ||
| if entity in rec.supported_entities | ||
| and language == rec.supported_language | ||
| ] | ||
|
|
||
| if not subset: | ||
| if entity not in all_entity_recognizers: | ||
| logger.warning( | ||
| "Entity %s doesn't have the corresponding" | ||
| " recognizer in language : %s", | ||
| entity, | ||
| language, | ||
| ) | ||
| else: | ||
| to_return.update(set(subset)) | ||
| to_return.update(all_entity_recognizers[entity]) | ||
|
|
||
| logger.debug( | ||
| "Returning a total of %s recognizers", | ||
|
|
@@ -198,6 +200,12 @@ def get_recognizers( | |
|
|
||
| return list(to_return) | ||
|
|
||
| def add_recognizer_map(self, all_entity_recognizers, supported_entity, rec): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you please make this private + add type hints?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Furthermore, there are a few linting issues as can be seen here: https://dev.azure.com/csedevil/Presidio/_build/results?buildId=16970&view=logs&jobId=248a81d7-74ff-5424-a6bb-5f1f442ace41&j=248a81d7-74ff-5424-a6bb-5f1f442ace41&t=b4a88934-08b9-5ac5-8cd9-057bdd5531eb Consider using ruff to test |
||
| if supported_entity in all_entity_recognizers: | ||
| all_entity_recognizers[supported_entity].add(rec) | ||
| else: | ||
| all_entity_recognizers[supported_entity] = {rec} | ||
|
|
||
| def add_recognizer(self, recognizer: EntityRecognizer) -> None: | ||
| """ | ||
| Add a new recognizer to the list of recognizers. | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.