-
Notifications
You must be signed in to change notification settings - Fork 321
feat(Python-client):add scan filter supported #2305
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -34,7 +34,7 @@ | |
| from pypegasus.replication.ttypes import query_cfg_request | ||
| from pypegasus.rrdb import * | ||
| from pypegasus.rrdb.ttypes import scan_request, get_scanner_request, update_request, key_value, multi_put_request, \ | ||
| multi_get_request, multi_remove_request | ||
| multi_get_request, multi_remove_request, filter_type | ||
| from pypegasus.transport.protocol import * | ||
| from pypegasus.utils.tools import restore_key, get_ttl, bytes_cmp, ScanOptions | ||
|
|
||
|
|
@@ -496,6 +496,10 @@ def start_scan(self): | |
| request.stop_inclusive = self._scan_options.stop_inclusive | ||
| request.batch_size = self._scan_options.batch_size | ||
| request.need_check_hash = self._check_hash | ||
| request.sort_key_filter_type = self._scan_options.sortkey_filter_type | ||
| request.sort_key_filter_pattern = blob(self._scan_options.sortkey_filter_pattern) | ||
| request.hash_key_filter_type = self._scan_options.hashkey_filter_type | ||
| request.hash_key_filter_pattern = blob(self._scan_options.hashkey_filter_pattern) | ||
|
|
||
| op = RrdbGetScannerOperator(self._gpid, request, self._partition_hash) | ||
| session = self._table.get_session(self._gpid) | ||
|
|
@@ -597,6 +601,9 @@ def generate_key(cls, hash_key, sort_key): | |
| hash_key_len = len(hash_key) | ||
| sort_key_len = len(sort_key) | ||
|
|
||
| if hash_key_len >= 0xFFFF: | ||
| raise ValueError("hash_key length must be less than 65535") | ||
|
|
||
| if sort_key_len > 0: | ||
| values = (hash_key_len, hash_key, sort_key) | ||
| s = struct.Struct('>H'+str(hash_key_len)+'s'+str(sort_key_len)+'s') | ||
|
|
@@ -611,24 +618,50 @@ def generate_key(cls, hash_key, sort_key): | |
|
|
||
| @classmethod | ||
| def generate_next_bytes(cls, buff): | ||
| pos = len(buff) - 1 | ||
| """ | ||
| Increment the last non-0xFF byte in the buffer. | ||
|
|
||
| If `buff` is a string, it is assumed to be encoded with 'latin-1' to ensure | ||
| a 1:1 mapping between characters and bytes. Unicode strings with characters | ||
| outside the 0-255 range will raise a UnicodeEncodeError. | ||
| """ | ||
| is_str = isinstance(buff, str) | ||
| is_ba = isinstance(buff, bytearray) | ||
|
|
||
| if is_str: | ||
| arr = bytearray(buff.encode('latin-1')) | ||
WJSGDBZ marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| elif is_ba: | ||
| arr = buff | ||
| else: | ||
| arr = bytearray(buff) | ||
| pos = len(arr) - 1 | ||
| found = False | ||
| while pos >= 0: | ||
| if ord(buff[pos]) != 0xFF: | ||
| buff[pos] += 1 | ||
| if arr[pos] != 0xFF: | ||
| arr[pos] += 1 | ||
| found = True | ||
| break | ||
| if found: | ||
| return buff | ||
| pos -= 1 | ||
| if not found: | ||
| arr += b'\x00' | ||
| if is_str: | ||
| return arr.decode('latin-1') | ||
WJSGDBZ marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| elif is_ba: | ||
| return arr | ||
| else: | ||
| return buff + chr(0) | ||
| return bytes(arr) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is it converted to bytes here?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To avoid unexpected change: |
||
|
|
||
| @classmethod | ||
| def generate_next_key(cls, hash_key, stop_sort_key): | ||
| key = cls.generate_key(hash_key, stop_sort_key) | ||
| return blob(cls.generate_next_bytes(key.raw())) | ||
|
|
||
| @classmethod | ||
| def generate_stop_key(cls, hash_key, stop_sort_key): | ||
| if stop_sort_key: | ||
| return cls.generate_key(hash_key, stop_sort_key), True | ||
| else: | ||
| return cls.generate_next_bytes(hash_key), False | ||
| return blob(cls.generate_next_bytes(hash_key)), False | ||
|
|
||
| def __init__(self, meta_addrs=None, table_name='', | ||
| timeout=DEFAULT_TIMEOUT): | ||
|
|
@@ -1004,6 +1037,24 @@ def get_scanner(self, hash_key, | |
| stop_key, stop_inclusive = self.generate_stop_key(hash_key, stop_sort_key) | ||
| if not stop_inclusive: | ||
| scan_options.stop_inclusive = stop_inclusive | ||
|
|
||
| # limit key range by prefix filter | ||
| if scan_options.sortkey_filter_type == filter_type.FT_MATCH_PREFIX and \ | ||
| len(scan_options.sortkey_filter_pattern) > 0: | ||
| prefix_start = self.generate_key(hash_key, scan_options.sortkey_filter_pattern) | ||
| # If the prefix start is after the current start_key, move the scan start to the prefix. | ||
| if bytes_cmp(prefix_start.data, start_key.data) > 0: | ||
| start_key = prefix_start | ||
| scan_options.start_inclusive = True | ||
|
|
||
| prefix_stop = self.generate_next_key(hash_key, scan_options.sortkey_filter_pattern) | ||
| # If the prefix stop is before or equal to the current stop_key, move the scan stop to the prefix stop. | ||
| # The prefix stop represents the next key after hash_key and sortkey_filter_pattern, | ||
| # so stop_inclusive should be False. | ||
| if bytes_cmp(prefix_stop.data, stop_key.data) <= 0: | ||
| stop_key = prefix_stop | ||
| scan_options.stop_inclusive = False | ||
|
|
||
| gpid_list = [] | ||
| hash_list = [] | ||
| r = bytes_cmp(start_key.data, stop_key.data) | ||
|
|
@@ -1033,10 +1084,6 @@ def get_unordered_scanners(self, max_split_count, scan_options): | |
| size = count // split | ||
| more = count % split | ||
|
|
||
| opt = ScanOptions() | ||
| opt.timeout_millis = scan_options.timeout_millis | ||
| opt.batch_size = scan_options.batch_size | ||
| opt.snapshot = scan_options.snapshot | ||
| scanner_list = [] | ||
| for i in range(split): | ||
| gpid_list = [] | ||
|
|
@@ -1048,6 +1095,6 @@ def get_unordered_scanners(self, max_split_count, scan_options): | |
| gpid_list.append(all_gpid_list[count]) | ||
| hash_list.append(int(count)) | ||
|
|
||
| scanner_list.append(PegasusScanner(self.table, gpid_list, opt, hash_list, True)) | ||
| scanner_list.append(PegasusScanner(self.table, gpid_list, scan_options, hash_list, True)) | ||
|
|
||
| return scanner_list | ||
Uh oh!
There was an error while loading. Please reload this page.