|
| 1 | +################################################################################ |
| 2 | +# Licensed to the Apache Software Foundation (ASF) under one |
| 3 | +# or more contributor license agreements. See the NOTICE file |
| 4 | +# distributed with this work for additional information |
| 5 | +# regarding copyright ownership. The ASF licenses this file |
| 6 | +# to you under the Apache License, Version 2.0 (the |
| 7 | +# "License"); you may not use this file except in compliance |
| 8 | +# with the License. You may obtain a copy of the License at |
| 9 | +# |
| 10 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | +# |
| 12 | +# Unless required by applicable law or agreed to in writing, software |
| 13 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 14 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | +# See the License for the specific language governing permissions and |
| 16 | +# limitations under the License. |
| 17 | +################################################################################ |
| 18 | + |
| 19 | +import os |
| 20 | +from typing import Dict, Optional, Tuple |
| 21 | +from urllib.parse import urlparse |
| 22 | + |
| 23 | +from pypaimon.common.file_io import FileIO |
| 24 | +from pypaimon.common.options.config import OssOptions, S3Options |
| 25 | + |
| 26 | + |
| 27 | +def to_vortex_specified(file_io: FileIO, file_path: str) -> Tuple[str, Optional[Dict[str, str]]]: |
| 28 | + """Convert path and extract storage options for Vortex store.from_url(). |
| 29 | +
|
| 30 | + Returns (url, store_kwargs) where store_kwargs can be passed as |
| 31 | + keyword arguments to ``vortex.store.from_url(url, **store_kwargs)``. |
| 32 | + For local paths store_kwargs is None. |
| 33 | + """ |
| 34 | + if hasattr(file_io, 'file_io'): |
| 35 | + file_io = file_io.file_io() |
| 36 | + |
| 37 | + if hasattr(file_io, 'get_merged_properties'): |
| 38 | + properties = file_io.get_merged_properties() |
| 39 | + else: |
| 40 | + properties = file_io.properties if hasattr(file_io, 'properties') and file_io.properties else None |
| 41 | + |
| 42 | + scheme, _, _ = file_io.parse_location(file_path) |
| 43 | + file_path_for_vortex = file_io.to_filesystem_path(file_path) |
| 44 | + |
| 45 | + store_kwargs = None |
| 46 | + |
| 47 | + if scheme in {'file', None} or not scheme: |
| 48 | + if not os.path.isabs(file_path_for_vortex): |
| 49 | + file_path_for_vortex = os.path.abspath(file_path_for_vortex) |
| 50 | + return file_path_for_vortex, None |
| 51 | + |
| 52 | + # For remote schemes, keep the original URI so vortex can parse it |
| 53 | + file_path_for_vortex = file_path |
| 54 | + |
| 55 | + if scheme in {'s3', 's3a', 's3n'} and properties: |
| 56 | + store_kwargs = {} |
| 57 | + if properties.contains(S3Options.S3_REGION): |
| 58 | + store_kwargs['region'] = properties.get(S3Options.S3_REGION) |
| 59 | + if properties.contains(S3Options.S3_ACCESS_KEY_ID): |
| 60 | + store_kwargs['access_key_id'] = properties.get(S3Options.S3_ACCESS_KEY_ID) |
| 61 | + if properties.contains(S3Options.S3_ACCESS_KEY_SECRET): |
| 62 | + store_kwargs['secret_access_key'] = properties.get(S3Options.S3_ACCESS_KEY_SECRET) |
| 63 | + if properties.contains(S3Options.S3_SECURITY_TOKEN): |
| 64 | + store_kwargs['session_token'] = properties.get(S3Options.S3_SECURITY_TOKEN) |
| 65 | + if properties.contains(S3Options.S3_ENDPOINT): |
| 66 | + store_kwargs['endpoint'] = properties.get(S3Options.S3_ENDPOINT) |
| 67 | + |
| 68 | + elif scheme == 'oss' and properties: |
| 69 | + parsed = urlparse(file_path) |
| 70 | + bucket = parsed.netloc |
| 71 | + |
| 72 | + store_kwargs = {} |
| 73 | + if properties.contains(OssOptions.OSS_REGION): |
| 74 | + store_kwargs['region'] = properties.get(OssOptions.OSS_REGION) |
| 75 | + if properties.contains(OssOptions.OSS_ACCESS_KEY_ID): |
| 76 | + store_kwargs['access_key_id'] = properties.get(OssOptions.OSS_ACCESS_KEY_ID) |
| 77 | + if properties.contains(OssOptions.OSS_ACCESS_KEY_SECRET): |
| 78 | + store_kwargs['secret_access_key'] = properties.get(OssOptions.OSS_ACCESS_KEY_SECRET) |
| 79 | + if properties.contains(OssOptions.OSS_SECURITY_TOKEN): |
| 80 | + store_kwargs['session_token'] = properties.get(OssOptions.OSS_SECURITY_TOKEN) |
| 81 | + if properties.contains(OssOptions.OSS_ENDPOINT): |
| 82 | + endpoint = properties.get(OssOptions.OSS_ENDPOINT) |
| 83 | + endpoint_clean = endpoint.replace('http://', '').replace('https://', '') |
| 84 | + store_kwargs['endpoint'] = f"https://{bucket}.{endpoint_clean}" |
| 85 | + |
| 86 | + file_path_for_vortex = file_path_for_vortex.replace('oss://', 's3://') |
| 87 | + |
| 88 | + return file_path_for_vortex, store_kwargs |
0 commit comments