-
Notifications
You must be signed in to change notification settings - Fork 73
Expand file tree
/
Copy pathpyconnection.hpp
More file actions
393 lines (311 loc) · 14.1 KB
/
pyconnection.hpp
File metadata and controls
393 lines (311 loc) · 14.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
//===----------------------------------------------------------------------===//
// DuckDB
//
// duckdb_python/pyconnection/pyconnection.hpp
//
//
//===----------------------------------------------------------------------===//
#pragma once
#include "duckdb_python/arrow/arrow_array_stream.hpp"
#include "duckdb.hpp"
#include "duckdb_python/pybind11/pybind_wrapper.hpp"
#include "duckdb/common/unordered_map.hpp"
#include "duckdb_python/import_cache/python_import_cache.hpp"
#include "duckdb_python/numpy/numpy_type.hpp"
#include "duckdb_python/pyrelation.hpp"
#include "duckdb_python/pytype.hpp"
#include "duckdb_python/path_like.hpp"
#include "duckdb/execution/operator/csv_scanner/csv_reader_options.hpp"
#include "duckdb_python/pyfilesystem.hpp"
#include "duckdb_python/pybind11/registered_py_object.hpp"
#include "duckdb_python/python_dependency.hpp"
#include "duckdb/function/scalar_function.hpp"
#include "duckdb_python/pybind11/conversions/exception_handling_enum.hpp"
#include "duckdb_python/pybind11/conversions/python_udf_type_enum.hpp"
#include "duckdb_python/pybind11/conversions/python_csv_line_terminator_enum.hpp"
#include "duckdb/common/shared_ptr.hpp"
namespace duckdb {
struct BoundParameterData;
enum class PythonEnvironmentType { NORMAL, INTERACTIVE, JUPYTER };
struct DuckDBPyRelation;
class RegisteredArrow : public RegisteredObject {
public:
RegisteredArrow(unique_ptr<PythonTableArrowArrayStreamFactory> arrow_factory_p, py::object obj_p)
: RegisteredObject(std::move(obj_p)), arrow_factory(std::move(arrow_factory_p)) {};
unique_ptr<PythonTableArrowArrayStreamFactory> arrow_factory;
};
struct DefaultConnectionHolder {
public:
DefaultConnectionHolder() {
}
~DefaultConnectionHolder() {
}
public:
DefaultConnectionHolder(const DefaultConnectionHolder &other) = delete;
DefaultConnectionHolder(DefaultConnectionHolder &&other) = delete;
DefaultConnectionHolder &operator=(const DefaultConnectionHolder &other) = delete;
DefaultConnectionHolder &operator=(DefaultConnectionHolder &&other) = delete;
public:
shared_ptr<DuckDBPyConnection> Get();
void Set(shared_ptr<DuckDBPyConnection> conn);
private:
shared_ptr<DuckDBPyConnection> connection;
mutex l;
};
struct ConnectionGuard {
public:
ConnectionGuard() {
}
~ConnectionGuard() {
}
public:
DuckDB &GetDatabase() {
if (!database) {
ThrowConnectionException();
}
return *database;
}
const DuckDB &GetDatabase() const {
if (!database) {
ThrowConnectionException();
}
return *database;
}
Connection &GetConnection() {
if (!connection) {
ThrowConnectionException();
}
return *connection;
}
bool ConnectionIsClosed() const {
return connection == nullptr;
}
const Connection &GetConnection() const {
if (!connection) {
ThrowConnectionException();
}
return *connection;
}
DuckDBPyRelation &GetResult() {
if (!result) {
ThrowConnectionException();
}
return *result;
}
const DuckDBPyRelation &GetResult() const {
if (!result) {
ThrowConnectionException();
}
return *result;
}
public:
bool HasResult() const {
return result != nullptr;
}
public:
void SetDatabase(shared_ptr<DuckDB> db) {
database = std::move(db);
}
void SetDatabase(ConnectionGuard &con) {
if (!con.database) {
ThrowConnectionException();
}
database = con.database;
}
void SetConnection(unique_ptr<Connection> con) {
connection = std::move(con);
}
void ShareConnection(shared_ptr<Connection> con) {
connection = std::move(con);
}
shared_ptr<Connection> GetSharedConnection() {
return connection;
}
void SetResult(unique_ptr<DuckDBPyRelation> res) {
result = std::move(res);
}
private:
void ThrowConnectionException() const {
throw ConnectionException("Connection already closed!");
}
private:
shared_ptr<DuckDB> database;
shared_ptr<Connection> connection;
unique_ptr<DuckDBPyRelation> result;
};
struct DuckDBPyConnection : public enable_shared_from_this<DuckDBPyConnection> {
private:
class Cursors {
public:
Cursors() {
}
public:
void AddCursor(shared_ptr<DuckDBPyConnection> conn);
void ClearCursors();
private:
mutex lock;
vector<weak_ptr<DuckDBPyConnection>> cursors;
};
public:
ConnectionGuard con;
Cursors cursors;
std::mutex owned_py_connection_lock;
//! Points to owned_py_connection_lock by default, or to parent's lock for subcursors
std::mutex *py_connection_lock = &owned_py_connection_lock;
//! Whether this is a subcursor (shares connection with parent)
bool is_subcursor = false;
//! MemoryFileSystem used to temporarily store file-like objects for reading
shared_ptr<ModifiedMemoryFileSystem> internal_object_filesystem;
case_insensitive_map_t<unique_ptr<ExternalDependency>> registered_functions;
case_insensitive_set_t registered_objects;
public:
explicit DuckDBPyConnection() {
}
~DuckDBPyConnection();
public:
static void Initialize(py::handle &m);
static void Cleanup();
shared_ptr<DuckDBPyConnection> Enter();
static void Exit(DuckDBPyConnection &self, const py::object &exc_type, const py::object &exc,
const py::object &traceback);
static bool DetectAndGetEnvironment();
static bool IsJupyter();
static std::string FormattedPythonVersion();
static shared_ptr<DuckDBPyConnection> DefaultConnection();
static void SetDefaultConnection(shared_ptr<DuckDBPyConnection> conn);
static PythonImportCache *ImportCache();
static bool IsInteractive();
unique_ptr<DuckDBPyRelation> ReadCSV(const py::object &name, py::kwargs &kwargs);
py::list ExtractStatements(const string &query);
unique_ptr<DuckDBPyRelation> ReadJSON(
const py::object &name, const Optional<py::object> &columns = py::none(),
const Optional<py::object> &sample_size = py::none(), const Optional<py::object> &maximum_depth = py::none(),
const Optional<py::str> &records = py::none(), const Optional<py::str> &format = py::none(),
const Optional<py::object> &date_format = py::none(), const Optional<py::object> ×tamp_format = py::none(),
const Optional<py::object> &compression = py::none(),
const Optional<py::object> &maximum_object_size = py::none(),
const Optional<py::object> &ignore_errors = py::none(),
const Optional<py::object> &convert_strings_to_integers = py::none(),
const Optional<py::object> &field_appearance_threshold = py::none(),
const Optional<py::object> &map_inference_threshold = py::none(),
const Optional<py::object> &maximum_sample_files = py::none(),
const Optional<py::object> &filename = py::none(), const Optional<py::object> &hive_partitioning = py::none(),
const Optional<py::object> &union_by_name = py::none(), const Optional<py::object> &hive_types = py::none(),
const Optional<py::object> &hive_types_autocast = py::none());
shared_ptr<DuckDBPyType> MapType(const shared_ptr<DuckDBPyType> &key_type,
const shared_ptr<DuckDBPyType> &value_type);
shared_ptr<DuckDBPyType> StructType(const py::object &fields);
shared_ptr<DuckDBPyType> ListType(const shared_ptr<DuckDBPyType> &type);
shared_ptr<DuckDBPyType> ArrayType(const shared_ptr<DuckDBPyType> &type, idx_t size);
shared_ptr<DuckDBPyType> UnionType(const py::object &members);
shared_ptr<DuckDBPyType> EnumType(const string &name, const shared_ptr<DuckDBPyType> &type,
const py::list &values_p);
shared_ptr<DuckDBPyType> DecimalType(int width, int scale);
shared_ptr<DuckDBPyType> StringType(const string &collation = string());
shared_ptr<DuckDBPyType> Type(const string &type_str);
shared_ptr<DuckDBPyConnection>
RegisterScalarUDF(const string &name, const py::function &udf, const py::object &arguments = py::none(),
const shared_ptr<DuckDBPyType> &return_type = nullptr, PythonUDFType type = PythonUDFType::NATIVE,
FunctionNullHandling null_handling = FunctionNullHandling::DEFAULT_NULL_HANDLING,
PythonExceptionHandling exception_handling = PythonExceptionHandling::FORWARD_ERROR,
bool side_effects = false);
shared_ptr<DuckDBPyConnection> UnregisterUDF(const string &name);
shared_ptr<DuckDBPyConnection> ExecuteMany(const py::object &query, py::object params = py::list());
void ExecuteImmediately(vector<unique_ptr<SQLStatement>> statements);
unique_ptr<PreparedStatement> PrepareQuery(unique_ptr<SQLStatement> statement);
unique_ptr<QueryResult> ExecuteInternal(PreparedStatement &prep, py::object params = py::list());
unique_ptr<QueryResult> PrepareAndExecuteInternal(unique_ptr<SQLStatement> statement,
py::object params = py::list());
shared_ptr<DuckDBPyConnection> Execute(const py::object &query, py::object params = py::list());
shared_ptr<DuckDBPyConnection> ExecuteFromString(const string &query);
shared_ptr<DuckDBPyConnection> Append(const string &name, const PandasDataFrame &value, bool by_name);
shared_ptr<DuckDBPyConnection> RegisterPythonObject(const string &name, const py::object &python_object);
void InstallExtension(const string &extension, bool force_install = false,
const py::object &repository = py::none(), const py::object &repository_url = py::none(),
const py::object &version = py::none());
void LoadExtension(const string &extension);
unique_ptr<DuckDBPyRelation> RunQuery(const py::object &query, string alias = "", py::object params = py::list());
unique_ptr<DuckDBPyRelation> Table(const string &tname);
unique_ptr<DuckDBPyRelation> Values(const py::args ¶ms);
unique_ptr<DuckDBPyRelation> View(const string &vname);
unique_ptr<DuckDBPyRelation> TableFunction(const string &fname, py::object params = py::list());
unique_ptr<DuckDBPyRelation> FromDF(const PandasDataFrame &value);
unique_ptr<DuckDBPyRelation> FromParquet(const string &file_glob, bool binary_as_string, bool file_row_number,
bool filename, bool hive_partitioning, bool union_by_name,
const py::object &compression = py::none());
unique_ptr<DuckDBPyRelation> FromParquets(const vector<string> &file_globs, bool binary_as_string,
bool file_row_number, bool filename, bool hive_partitioning,
bool union_by_name, const py::object &compression = py::none());
unique_ptr<DuckDBPyRelation> FromParquetInternal(Value &&file_param, bool binary_as_string, bool file_row_number,
bool filename, bool hive_partitioning, bool union_by_name,
const py::object &compression = py::none());
unique_ptr<DuckDBPyRelation> FromArrow(py::object &arrow_object);
unordered_set<string> GetTableNames(const string &query, bool qualified);
shared_ptr<DuckDBPyConnection> UnregisterPythonObject(const string &name);
shared_ptr<DuckDBPyConnection> Begin();
shared_ptr<DuckDBPyConnection> Commit();
shared_ptr<DuckDBPyConnection> Rollback();
shared_ptr<DuckDBPyConnection> Checkpoint();
void Close();
void Interrupt();
double QueryProgress();
ModifiedMemoryFileSystem &GetObjectFileSystem();
// cursor() is stupid
shared_ptr<DuckDBPyConnection> Cursor();
//! Create a subcursor that shares the same connection and transaction.
//! This enables interleaved streaming + DML within a single transaction.
shared_ptr<DuckDBPyConnection> Subcursor();
Optional<py::list> GetDescription();
int GetRowcount();
// these should be functions on the result but well
Optional<py::tuple> FetchOne();
py::list FetchMany(idx_t size);
py::list FetchAll();
py::dict FetchNumpy();
PandasDataFrame FetchDF(bool date_as_object);
PandasDataFrame FetchDFChunk(const idx_t vectors_per_chunk = 1, bool date_as_object = false);
duckdb::pyarrow::Table FetchArrow(idx_t rows_per_batch);
PolarsDataFrame FetchPolars(idx_t rows_per_batch, bool lazy);
py::dict FetchPyTorch();
py::dict FetchTF();
duckdb::pyarrow::RecordBatchReader FetchRecordBatchReader(const idx_t rows_per_batch);
static shared_ptr<DuckDBPyConnection> Connect(const py::object &database, bool read_only, const py::dict &config);
static vector<Value> TransformPythonParamList(const py::handle ¶ms);
static case_insensitive_map_t<BoundParameterData> TransformPythonParamDict(const py::dict ¶ms);
void RegisterFilesystem(AbstractFileSystem filesystem);
void UnregisterFilesystem(const py::str &name);
py::list ListFilesystems();
bool FileSystemIsRegistered(const string &name);
// Profiling info
py::str GetProfilingInformation(const py::str &format = "json");
void EnableProfiling();
void DisableProfiling();
//! Default connection to an in-memory database
static DefaultConnectionHolder default_connection;
//! Caches and provides an interface to get frequently used modules+subtypes
static shared_ptr<PythonImportCache> import_cache;
static bool IsPandasDataframe(const py::object &object);
static PyArrowObjectType GetArrowType(const py::handle &obj);
static bool IsAcceptedArrowObject(const py::object &object);
static NumpyObjectType IsAcceptedNumpyObject(const py::object &object);
static unique_ptr<QueryResult> CompletePendingQuery(PendingQueryResult &pending_query);
private:
unique_ptr<DuckDBPyRelation> CreateRelation(shared_ptr<Relation> rel);
unique_ptr<DuckDBPyRelation> CreateRelation(shared_ptr<DuckDBPyResult> result);
PathLike GetPathLike(const py::object &object);
ScalarFunction CreateScalarUDF(const string &name, const py::function &udf, const py::object ¶meters,
const shared_ptr<DuckDBPyType> &return_type, bool vectorized,
FunctionNullHandling null_handling, PythonExceptionHandling exception_handling,
bool side_effects);
void RegisterArrowObject(const py::object &arrow_object, const string &name);
vector<unique_ptr<SQLStatement>> GetStatements(const py::object &query);
static PythonEnvironmentType environment;
static std::string formatted_python_version;
static void DetectEnvironment();
};
template <typename T>
static bool ModuleIsLoaded() {
auto dict = pybind11::module_::import("sys").attr("modules");
return dict.contains(py::str(T::Name));
}
} // namespace duckdb