Skip to content

Commit a0b3602

Browse files
authored
Merge pull request #417 from Distributive-Network/caleb/fix/stringdata
caleb/fix/stringdata
2 parents cfdde8f + 1951efc commit a0b3602

9 files changed

+101
-55
lines changed

.git-blame-ignore-revs

+7-1
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,10 @@ aae30e864449442cf0b04e94f8a242b1b667de9a
55
16dc3153b3cb684ca72445ed058babc8f5d97f42
66

77
# chore(linting): lint all C++ files
8-
58cd4b45777b046f03a63255c1d93e289e1cab5e
8+
58cd4b45777b046f03a63255c1d93e289e1cab5e
9+
10+
# chore(linting): lint PyBytesProxyHandler.cc
11+
d540ed6e0edfe9538dc726cf587dfb2cc76dde34
12+
13+
# chore(linting): lint PyObjectProxyHandler.cc
14+
1d45ea98e42294cce16deec5454725d4de36f59f

include/JSStringProxy.hh

+10-6
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515

1616
#include <Python.h>
1717

18+
#include <unordered_set>
19+
1820
/**
1921
* @brief The typedef for the backing store that will be used by JSStringProxy objects. All it contains is a pointer to the JSString
2022
*
@@ -24,6 +26,8 @@ typedef struct {
2426
JS::PersistentRootedValue *jsString;
2527
} JSStringProxy;
2628

29+
extern std::unordered_set<JSStringProxy *> jsStringProxies; // a collection of all JSStringProxy objects, used during a GCCallback to ensure they continue to point to the correct char buffer
30+
2731
/**
2832
* @brief This struct is a bundle of methods used by the JSStringProxy type
2933
*
@@ -37,7 +41,7 @@ public:
3741
*/
3842
static void JSStringProxy_dealloc(JSStringProxy *self);
3943

40-
/**
44+
/**
4145
* @brief copy protocol method for both copy and deepcopy
4246
*
4347
* @param self - The JSObjectProxy
@@ -49,13 +53,13 @@ public:
4953
// docs for methods, copied from cpython
5054
PyDoc_STRVAR(stringproxy_deepcopy__doc__,
5155
"__deepcopy__($self, memo, /)\n"
52-
"--\n"
53-
"\n");
56+
"--\n"
57+
"\n");
5458

5559
PyDoc_STRVAR(stringproxy_copy__doc__,
56-
"__copy__($self, /)\n"
57-
"--\n"
58-
"\n");
60+
"__copy__($self, /)\n"
61+
"--\n"
62+
"\n");
5963

6064
/**
6165
* @brief Struct for the other methods

src/JSStringProxy.cc

+2-1
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,13 @@
1212

1313
#include "include/StrType.hh"
1414

15-
15+
std::unordered_set<JSStringProxy *> jsStringProxies;
1616
extern JSContext *GLOBAL_CX;
1717

1818

1919
void JSStringProxyMethodDefinitions::JSStringProxy_dealloc(JSStringProxy *self)
2020
{
21+
jsStringProxies.erase(self);
2122
delete self->jsString;
2223
}
2324

src/PyBytesProxyHandler.cc

+22-22
Original file line numberDiff line numberDiff line change
@@ -26,30 +26,30 @@ static bool array_valueOf(JSContext *cx, unsigned argc, JS::Value *vp) {
2626
return false;
2727
}
2828

29-
JS::PersistentRootedObject* arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);
29+
JS::PersistentRootedObject *arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);
3030
JS::RootedObject rootedArrayBuffer(cx, arrayBuffer->get());
3131

3232
auto byteLength = JS::GetArrayBufferByteLength(rootedArrayBuffer);
3333

34-
bool isSharedMemory;
34+
bool isSharedMemory;
3535
JS::AutoCheckCannotGC autoNoGC(cx);
3636
uint8_t *data = JS::GetArrayBufferData(rootedArrayBuffer, &isSharedMemory, autoNoGC);
37-
37+
3838
size_t numberOfDigits = 0;
3939
for (size_t i = 0; i < byteLength; i++) {
40-
numberOfDigits += data[i] < 10 ? 1 : data[i] < 100 ? 2 : 3;
40+
numberOfDigits += data[i] < 10 ? 1 : data[i] < 100 ? 2 : 3;
4141
}
4242
const size_t STRING_LENGTH = byteLength + numberOfDigits;
43-
JS::Latin1Char* buffer = (JS::Latin1Char *)malloc(sizeof(JS::Latin1Char) * STRING_LENGTH);
44-
43+
JS::Latin1Char *buffer = (JS::Latin1Char *)malloc(sizeof(JS::Latin1Char) * STRING_LENGTH);
44+
4545
size_t charIndex = 0;
46-
sprintf((char*)&buffer[charIndex], "%d", data[0]);
46+
snprintf((char *)&buffer[charIndex], 4, "%d", data[0]);
4747
charIndex += data[0] < 10 ? 1 : data[0] < 100 ? 2 : 3;
4848

4949
for (size_t dataIndex = 1; dataIndex < byteLength; dataIndex++) {
5050
buffer[charIndex] = ',';
5151
charIndex++;
52-
sprintf((char*)&buffer[charIndex], "%d", data[dataIndex]);
52+
snprintf((char *)&buffer[charIndex], 4, "%d", data[dataIndex]);
5353
charIndex += data[dataIndex] < 10 ? 1 : data[dataIndex] < 100 ? 2 : 3;
5454
}
5555

@@ -84,7 +84,7 @@ static bool iterator_next(JSContext *cx, unsigned argc, JS::Value *vp) {
8484
JS::RootedObject thisObj(cx);
8585
if (!args.computeThis(cx, &thisObj)) return false;
8686

87-
JS::PersistentRootedObject* arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(thisObj, BytesIteratorSlotIteratedObject);
87+
JS::PersistentRootedObject *arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(thisObj, BytesIteratorSlotIteratedObject);
8888
JS::RootedObject rootedArrayBuffer(cx, arrayBuffer->get());
8989

9090
JS::RootedValue rootedNextIndex(cx, JS::GetReservedSlot(thisObj, BytesIteratorSlotNextIndex));
@@ -112,7 +112,7 @@ static bool iterator_next(JSContext *cx, unsigned argc, JS::Value *vp) {
112112
if (!JS_SetProperty(cx, result, "done", done)) return false;
113113

114114
if (itemKind == ITEM_KIND_VALUE) {
115-
bool isSharedMemory;
115+
bool isSharedMemory;
116116
JS::AutoCheckCannotGC autoNoGC(cx);
117117
uint8_t *data = JS::GetArrayBufferData(rootedArrayBuffer, &isSharedMemory, autoNoGC);
118118

@@ -125,7 +125,7 @@ static bool iterator_next(JSContext *cx, unsigned argc, JS::Value *vp) {
125125
JS::RootedValue rootedNextIndex(cx, JS::Int32Value(nextIndex));
126126
items[0].set(rootedNextIndex);
127127

128-
bool isSharedMemory;
128+
bool isSharedMemory;
129129
JS::AutoCheckCannotGC autoNoGC(cx);
130130
uint8_t *data = JS::GetArrayBufferData(rootedArrayBuffer, &isSharedMemory, autoNoGC);
131131

@@ -216,8 +216,8 @@ static bool array_iterator_func(JSContext *cx, unsigned argc, JS::Value *vp, int
216216
if (!JS::Construct(cx, constructor_val, JS::HandleValueArray::empty(), &obj)) return false;
217217
if (!obj) return false;
218218

219-
JS::PersistentRootedObject* arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);
220-
219+
JS::PersistentRootedObject *arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);
220+
221221
JS::SetReservedSlot(obj, BytesIteratorSlotIteratedObject, JS::PrivateValue(arrayBuffer));
222222
JS::SetReservedSlot(obj, BytesIteratorSlotNextIndex, JS::Int32Value(0));
223223
JS::SetReservedSlot(obj, BytesIteratorSlotItemKind, JS::Int32Value(itemKind));
@@ -253,13 +253,13 @@ bool PyBytesProxyHandler::set(JSContext *cx, JS::HandleObject proxy, JS::HandleI
253253
JS::HandleValue v, JS::HandleValue receiver,
254254
JS::ObjectOpResult &result) const {
255255

256-
// block all modifications
257-
256+
// block all modifications
257+
258258
PyObject *self = JS::GetMaybePtrFromReservedSlot<PyObject>(proxy, PyObjectSlot);
259259

260260
PyErr_Format(PyExc_TypeError,
261-
"'%.100s' object has only read-only attributes",
262-
Py_TYPE(self)->tp_name);
261+
"'%.100s' object has only read-only attributes",
262+
Py_TYPE(self)->tp_name);
263263

264264
return result.failReadOnly();
265265
}
@@ -298,7 +298,7 @@ bool PyBytesProxyHandler::getOwnPropertyDescriptor(
298298

299299
// "length" and "byteLength" properties have the same value
300300
if ((JS_StringEqualsLiteral(cx, idString, "length", &isProperty) && isProperty) || (JS_StringEqualsLiteral(cx, id.toString(), "byteLength", &isProperty) && isProperty)) {
301-
JS::PersistentRootedObject* arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);
301+
JS::PersistentRootedObject *arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);
302302

303303
JS::RootedObject rootedArrayBuffer(cx, arrayBuffer->get());
304304

@@ -314,7 +314,7 @@ bool PyBytesProxyHandler::getOwnPropertyDescriptor(
314314

315315
// "buffer" property
316316
if (JS_StringEqualsLiteral(cx, idString, "buffer", &isProperty) && isProperty) {
317-
JS::PersistentRootedObject* arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);
317+
JS::PersistentRootedObject *arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);
318318

319319
desc.set(mozilla::Some(
320320
JS::PropertyDescriptor::Data(
@@ -392,10 +392,10 @@ bool PyBytesProxyHandler::getOwnPropertyDescriptor(
392392
// item
393393
Py_ssize_t index;
394394
if (idToIndex(cx, id, &index)) {
395-
JS::PersistentRootedObject* arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);
395+
JS::PersistentRootedObject *arrayBuffer = JS::GetMaybePtrFromReservedSlot<JS::PersistentRootedObject>(proxy, OtherSlot);
396396
JS::RootedObject rootedArrayBuffer(cx, arrayBuffer->get());
397397

398-
bool isSharedMemory;
398+
bool isSharedMemory;
399399
JS::AutoCheckCannotGC autoNoGC(cx);
400400
uint8_t *data = JS::GetArrayBufferData(rootedArrayBuffer, &isSharedMemory, autoNoGC);
401401

@@ -406,7 +406,7 @@ bool PyBytesProxyHandler::getOwnPropertyDescriptor(
406406
));
407407

408408
return true;
409-
}
409+
}
410410

411411
PyObject *attrName = idToKey(cx, id);
412412
PyObject *self = JS::GetMaybePtrFromReservedSlot<PyObject>(proxy, PyObjectSlot);

src/PyListProxyHandler.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -2099,8 +2099,8 @@ void PyListProxyHandler::finalize(JS::GCContext *gcx, JSObject *proxy) const {
20992099
// We cannot call Py_DECREF here when shutting down as the thread state is gone.
21002100
// Then, when shutting down, there is only on reference left, and we don't need
21012101
// to free the object since the entire process memory is being released.
2102-
PyObject *self = JS::GetMaybePtrFromReservedSlot<PyObject>(proxy, PyObjectSlot);
2103-
if (Py_REFCNT(self) > 1) {
2102+
if (!_Py_IsFinalizing()) {
2103+
PyObject *self = JS::GetMaybePtrFromReservedSlot<PyObject>(proxy, PyObjectSlot);
21042104
Py_DECREF(self);
21052105
}
21062106
}

src/PyObjectProxyHandler.cc

+5-5
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ void PyObjectProxyHandler::finalize(JS::GCContext *gcx, JSObject *proxy) const {
8787
// We cannot call Py_DECREF here when shutting down as the thread state is gone.
8888
// Then, when shutting down, there is only on reference left, and we don't need
8989
// to free the object since the entire process memory is being released.
90-
PyObject *self = JS::GetMaybePtrFromReservedSlot<PyObject>(proxy, PyObjectSlot);
91-
if (Py_REFCNT(self) > 1) {
90+
if (!_Py_IsFinalizing()) {
91+
PyObject *self = JS::GetMaybePtrFromReservedSlot<PyObject>(proxy, PyObjectSlot);
9292
Py_DECREF(self);
9393
}
9494
}
@@ -109,12 +109,12 @@ bool PyObjectProxyHandler::ownPropertyKeys(JSContext *cx, JS::HandleObject proxy
109109
}
110110

111111
return handleOwnPropertyKeys(cx, nonDunderKeys, PyList_Size(nonDunderKeys), props);
112-
}
112+
}
113113
else {
114114
if (PyErr_Occurred()) {
115-
PyErr_Clear();
115+
PyErr_Clear();
116116
}
117-
117+
118118
return handleOwnPropertyKeys(cx, PyList_New(0), 0, props);
119119
}
120120
}

src/StrType.cc

+1
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ PyObject *StrType::proxifyString(JSContext *cx, JS::HandleValue strVal) {
112112
JS::RootedObject obj(cx);
113113
pyString->jsString = new JS::PersistentRootedValue(cx);
114114
pyString->jsString->setString((JSString *)lstr);
115+
jsStringProxies.insert(pyString);
115116

116117
// Initialize as legacy string (https://github.com/python/cpython/blob/v3.12.0b1/Include/cpython/unicodeobject.h#L78-L93)
117118
// see https://github.com/python/cpython/blob/v3.11.3/Objects/unicodeobject.c#L1230-L1245

src/jsTypeFactory.cc

+35-16
Original file line numberDiff line numberDiff line change
@@ -49,46 +49,64 @@ static PyObjectProxyHandler pyObjectProxyHandler;
4949
static PyListProxyHandler pyListProxyHandler;
5050
static PyIterableProxyHandler pyIterableProxyHandler;
5151

52-
std::unordered_map<const char16_t *, PyObject *> ucs2ToPyObjectMap; // a map of char16_t (UCS-2) buffers to their corresponding PyObjects, used when finalizing JSExternalStrings
53-
std::unordered_map<const JS::Latin1Char *, PyObject *> latin1ToPyObjectMap; // a map of Latin-1 char buffers to their corresponding PyObjects, used when finalizing JSExternalStrings
52+
std::unordered_map<PyObject *, size_t> externalStringObjToRefCountMap;// a map of python string objects to the number of JSExternalStrings that depend on it, used when finalizing JSExternalStrings
5453

5554
PyObject *PythonExternalString::getPyString(const char16_t *chars)
5655
{
57-
return ucs2ToPyObjectMap[chars];
56+
for (auto it: externalStringObjToRefCountMap) {
57+
if (PyUnicode_DATA(it.first) == (void *)chars) { // PyUnicode_<2/1>BYTE_DATA are just type casts of PyUnicode_DATA
58+
return it.first;
59+
}
60+
}
61+
62+
return NULL; // this shouldn't be reachable
5863
}
5964

6065
PyObject *PythonExternalString::getPyString(const JS::Latin1Char *chars)
6166
{
62-
return latin1ToPyObjectMap[chars];
67+
68+
return PythonExternalString::getPyString((const char16_t *)chars);
6369
}
6470

6571
void PythonExternalString::finalize(char16_t *chars) const
6672
{
6773
// We cannot call Py_DECREF here when shutting down as the thread state is gone.
6874
// Then, when shutting down, there is only on reference left, and we don't need
6975
// to free the object since the entire process memory is being released.
70-
PyObject *object = ucs2ToPyObjectMap[chars];
71-
if (Py_REFCNT(object) > 1) {
72-
Py_DECREF(object);
76+
if (_Py_IsFinalizing()) { return; }
77+
78+
for (auto it = externalStringObjToRefCountMap.cbegin(), next_it = it; it != externalStringObjToRefCountMap.cend(); it = next_it) {
79+
next_it++;
80+
if (PyUnicode_DATA(it->first) == (void *)chars) {
81+
Py_DECREF(it->first);
82+
externalStringObjToRefCountMap[it->first] = externalStringObjToRefCountMap[it->first] - 1;
83+
84+
if (externalStringObjToRefCountMap[it->first] == 0) {
85+
externalStringObjToRefCountMap.erase(it);
86+
}
87+
}
7388
}
7489
}
7590

7691
void PythonExternalString::finalize(JS::Latin1Char *chars) const
7792
{
78-
PyObject *object = latin1ToPyObjectMap[chars];
79-
if (Py_REFCNT(object) > 1) {
80-
Py_DECREF(object);
81-
}
93+
PythonExternalString::finalize((char16_t *)chars);
8294
}
8395

8496
size_t PythonExternalString::sizeOfBuffer(const char16_t *chars, mozilla::MallocSizeOf mallocSizeOf) const
8597
{
86-
return PyUnicode_GetLength(ucs2ToPyObjectMap[chars]);
98+
for (auto it: externalStringObjToRefCountMap) {
99+
if (PyUnicode_DATA(it.first) == (void *)chars) {
100+
return PyUnicode_GetLength(it.first);
101+
}
102+
}
103+
104+
return 0; // // this shouldn't be reachable
87105
}
88106

89107
size_t PythonExternalString::sizeOfBuffer(const JS::Latin1Char *chars, mozilla::MallocSizeOf mallocSizeOf) const
90108
{
91-
return PyUnicode_GetLength(latin1ToPyObjectMap[chars]);
109+
return PythonExternalString::sizeOfBuffer((const char16_t *)chars, mallocSizeOf);
92110
}
93111

94112
PythonExternalString PythonExternalStringCallbacks = {};
@@ -151,21 +169,22 @@ JS::Value jsTypeFactory(JSContext *cx, PyObject *object) {
151169
break;
152170
}
153171
case (PyUnicode_2BYTE_KIND): {
154-
ucs2ToPyObjectMap[(char16_t *)PyUnicode_2BYTE_DATA(object)] = object;
172+
externalStringObjToRefCountMap[object] = externalStringObjToRefCountMap[object] + 1;
173+
Py_INCREF(object);
155174
JSString *str = JS_NewExternalUCString(cx, (char16_t *)PyUnicode_2BYTE_DATA(object), PyUnicode_GET_LENGTH(object), &PythonExternalStringCallbacks);
156175
returnType.setString(str);
157176
break;
158177
}
159178
case (PyUnicode_1BYTE_KIND): {
160-
latin1ToPyObjectMap[(JS::Latin1Char *)PyUnicode_1BYTE_DATA(object)] = object;
179+
externalStringObjToRefCountMap[object] = externalStringObjToRefCountMap[object] + 1;
180+
Py_INCREF(object);
161181
JSString *str = JS_NewExternalStringLatin1(cx, (JS::Latin1Char *)PyUnicode_1BYTE_DATA(object), PyUnicode_GET_LENGTH(object), &PythonExternalStringCallbacks);
162182
// JSExternalString can now be properly treated as either one-byte or two-byte strings when GCed
163183
// see https://hg.mozilla.org/releases/mozilla-esr128/file/tip/js/src/vm/StringType-inl.h#l785
164184
returnType.setString(str);
165185
break;
166186
}
167187
}
168-
Py_INCREF(object);
169188
}
170189
else if (PyMethod_Check(object) || PyFunction_Check(object) || PyCFunction_Check(object)) {
171190
// can't determine number of arguments for PyCFunctions, so just assume potentially unbounded

src/modules/pythonmonkey/pythonmonkey.cc

+17-2
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,25 @@
4848

4949
JS::PersistentRootedObject jsFunctionRegistry;
5050

51-
void finalizationRegistryGCCallback(JSContext *cx, JSGCStatus status, JS::GCReason reason, void *data) {
51+
void pythonmonkeyGCCallback(JSContext *cx, JSGCStatus status, JS::GCReason reason, void *data) {
5252
if (status == JSGCStatus::JSGC_END) {
5353
JS::ClearKeptObjects(GLOBAL_CX);
5454
while (JOB_QUEUE->runFinalizationRegistryCallbacks(GLOBAL_CX));
55+
56+
if (_Py_IsFinalizing()) {
57+
return; // do not move char pointers around if python is finalizing
58+
}
59+
60+
JS::AutoCheckCannotGC nogc;
61+
for (const JSStringProxy *jsStringProxy: jsStringProxies) { // char buffers may have moved, so we need to re-point our JSStringProxies
62+
JSLinearString *str = (JSLinearString *)(jsStringProxy->jsString->toString()); // jsString is guaranteed to be linear
63+
if (JS::LinearStringHasLatin1Chars(str)) {
64+
(((PyUnicodeObject *)(jsStringProxy))->data.any) = (void *)JS::GetLatin1LinearStringChars(nogc, str);
65+
}
66+
else { // utf16 / ucs2 string
67+
(((PyUnicodeObject *)(jsStringProxy))->data.any) = (void *)JS::GetTwoByteLinearStringChars(nogc, str);
68+
}
69+
}
5570
}
5671
}
5772

@@ -547,7 +562,7 @@ PyMODINIT_FUNC PyInit_pythonmonkey(void)
547562
return NULL;
548563
}
549564

550-
JS_SetGCCallback(GLOBAL_CX, finalizationRegistryGCCallback, NULL);
565+
JS_SetGCCallback(GLOBAL_CX, pythonmonkeyGCCallback, NULL);
551566

552567
JS::RealmCreationOptions creationOptions = JS::RealmCreationOptions();
553568
JS::RealmBehaviors behaviours = JS::RealmBehaviors();

0 commit comments

Comments
 (0)