Skip to content

Commit fb6c4e3

Browse files
authored
Use const char* for JSON key name (#60721)
1 parent a15a4b5 commit fb6c4e3

File tree

5 files changed

+62
-82
lines changed

5 files changed

+62
-82
lines changed

pandas/_libs/include/pandas/vendored/ujson/lib/ultrajson.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -170,8 +170,8 @@ typedef void (*JSPFN_ITERBEGIN)(JSOBJ obj, JSONTypeContext *tc);
170170
typedef int (*JSPFN_ITERNEXT)(JSOBJ obj, JSONTypeContext *tc);
171171
typedef void (*JSPFN_ITEREND)(JSOBJ obj, JSONTypeContext *tc);
172172
typedef JSOBJ (*JSPFN_ITERGETVALUE)(JSOBJ obj, JSONTypeContext *tc);
173-
typedef char *(*JSPFN_ITERGETNAME)(JSOBJ obj, JSONTypeContext *tc,
174-
size_t *outLen);
173+
typedef const char *(*JSPFN_ITERGETNAME)(JSOBJ obj, JSONTypeContext *tc,
174+
size_t *outLen);
175175
typedef void *(*JSPFN_MALLOC)(size_t size);
176176
typedef void (*JSPFN_FREE)(void *pptr);
177177
typedef void *(*JSPFN_REALLOC)(void *base, size_t size);

pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -920,7 +920,7 @@ Perhaps implement recursion detection */
920920
void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name,
921921
size_t cbName) {
922922
const char *value;
923-
char *objName;
923+
const char *objName;
924924
int count;
925925
JSOBJ iterObj;
926926
size_t szlen;

pandas/_libs/src/vendored/ujson/python/objToJSON.c

+56-79
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ Numeric decoder derived from TCL library
5353

5454
npy_int64 get_nat(void) { return NPY_MIN_INT64; }
5555

56-
typedef char *(*PFN_PyTypeToUTF8)(JSOBJ obj, JSONTypeContext *ti,
57-
size_t *_outLen);
56+
typedef const char *(*PFN_PyTypeToUTF8)(JSOBJ obj, JSONTypeContext *ti,
57+
size_t *_outLen);
5858

5959
int object_is_decimal_type(PyObject *obj);
6060
int object_is_dataframe_type(PyObject *obj);
@@ -106,7 +106,7 @@ typedef struct __TypeContext {
106106
double doubleValue;
107107
JSINT64 longValue;
108108

109-
char *cStr;
109+
const char *cStr;
110110
NpyArrContext *npyarr;
111111
PdBlockContext *pdblock;
112112
int transpose;
@@ -301,14 +301,15 @@ static npy_float64 total_seconds(PyObject *td) {
301301
return double_val;
302302
}
303303

304-
static char *PyBytesToUTF8(JSOBJ _obj, JSONTypeContext *Py_UNUSED(tc),
305-
size_t *_outLen) {
304+
static const char *PyBytesToUTF8(JSOBJ _obj, JSONTypeContext *Py_UNUSED(tc),
305+
size_t *_outLen) {
306306
PyObject *obj = (PyObject *)_obj;
307307
*_outLen = PyBytes_GET_SIZE(obj);
308308
return PyBytes_AS_STRING(obj);
309309
}
310310

311-
static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, size_t *_outLen) {
311+
static const char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc,
312+
size_t *_outLen) {
312313
char *encoded = (char *)PyUnicode_AsUTF8AndSize(_obj, (Py_ssize_t *)_outLen);
313314
if (encoded == NULL) {
314315
/* Something went wrong.
@@ -321,24 +322,24 @@ static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, size_t *_outLen) {
321322
}
322323

323324
/* JSON callback. returns a char* and mutates the pointer to *len */
324-
static char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused),
325-
JSONTypeContext *tc, size_t *len) {
325+
static const char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused),
326+
JSONTypeContext *tc, size_t *len) {
326327
NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
327328
NPY_DATETIMEUNIT valueUnit = ((PyObjectEncoder *)tc->encoder)->valueUnit;
328329
GET_TC(tc)->cStr = int64ToIso(GET_TC(tc)->longValue, valueUnit, base, len);
329330
return GET_TC(tc)->cStr;
330331
}
331332

332333
/* JSON callback. returns a char* and mutates the pointer to *len */
333-
static char *NpyTimeDeltaToIsoCallback(JSOBJ Py_UNUSED(unused),
334-
JSONTypeContext *tc, size_t *len) {
334+
static const char *NpyTimeDeltaToIsoCallback(JSOBJ Py_UNUSED(unused),
335+
JSONTypeContext *tc, size_t *len) {
335336
GET_TC(tc)->cStr = int64ToIsoDuration(GET_TC(tc)->longValue, len);
336337
return GET_TC(tc)->cStr;
337338
}
338339

339340
/* JSON callback */
340-
static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc,
341-
size_t *len) {
341+
static const char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc,
342+
size_t *len) {
342343
if (!PyDate_Check(obj) && !PyDateTime_Check(obj)) {
343344
PyErr_SetString(PyExc_TypeError, "Expected date or datetime object");
344345
((JSONObjectEncoder *)tc->encoder)->errorMsg = "";
@@ -349,7 +350,8 @@ static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc,
349350
return PyDateTimeToIso(obj, base, len);
350351
}
351352

352-
static char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, size_t *outLen) {
353+
static const char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc,
354+
size_t *outLen) {
353355
PyObject *obj = (PyObject *)_obj;
354356
PyObject *str = PyObject_CallMethod(obj, "isoformat", NULL);
355357
if (str == NULL) {
@@ -373,8 +375,8 @@ static char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, size_t *outLen) {
373375
return outValue;
374376
}
375377

376-
static char *PyDecimalToUTF8Callback(JSOBJ _obj, JSONTypeContext *tc,
377-
size_t *len) {
378+
static const char *PyDecimalToUTF8Callback(JSOBJ _obj, JSONTypeContext *tc,
379+
size_t *len) {
378380
PyObject *obj = (PyObject *)_obj;
379381
PyObject *format_spec = PyUnicode_FromStringAndSize("f", 1);
380382
PyObject *str = PyObject_Format(obj, format_spec);
@@ -558,10 +560,10 @@ static JSOBJ NpyArr_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
558560
return GET_TC(tc)->itemValue;
559561
}
560562

561-
static char *NpyArr_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
562-
size_t *outLen) {
563+
static const char *NpyArr_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
564+
size_t *outLen) {
563565
NpyArrContext *npyarr = GET_TC(tc)->npyarr;
564-
char *cStr;
566+
const char *cStr;
565567

566568
if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) {
567569
const npy_intp idx = npyarr->index[npyarr->stridedim] - 1;
@@ -609,11 +611,11 @@ static int PdBlock_iterNextItem(JSOBJ obj, JSONTypeContext *tc) {
609611
return NpyArr_iterNextItem(obj, tc);
610612
}
611613

612-
static char *PdBlock_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
613-
size_t *outLen) {
614+
static const char *PdBlock_iterGetName(JSOBJ Py_UNUSED(obj),
615+
JSONTypeContext *tc, size_t *outLen) {
614616
PdBlockContext *blkCtxt = GET_TC(tc)->pdblock;
615617
NpyArrContext *npyarr = blkCtxt->npyCtxts[0];
616-
char *cStr;
618+
const char *cStr;
617619

618620
if (GET_TC(tc)->iterNext == PdBlock_iterNextItem) {
619621
const npy_intp idx = blkCtxt->colIdx - 1;
@@ -631,12 +633,12 @@ static char *PdBlock_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
631633
return cStr;
632634
}
633635

634-
static char *PdBlock_iterGetName_Transpose(JSOBJ Py_UNUSED(obj),
635-
JSONTypeContext *tc,
636-
size_t *outLen) {
636+
static const char *PdBlock_iterGetName_Transpose(JSOBJ Py_UNUSED(obj),
637+
JSONTypeContext *tc,
638+
size_t *outLen) {
637639
PdBlockContext *blkCtxt = GET_TC(tc)->pdblock;
638640
NpyArrContext *npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx];
639-
char *cStr;
641+
const char *cStr;
640642

641643
if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) {
642644
const npy_intp idx = npyarr->index[npyarr->stridedim] - 1;
@@ -817,9 +819,9 @@ static JSOBJ Tuple_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
817819
return GET_TC(tc)->itemValue;
818820
}
819821

820-
static char *Tuple_iterGetName(JSOBJ Py_UNUSED(obj),
821-
JSONTypeContext *Py_UNUSED(tc),
822-
size_t *Py_UNUSED(outLen)) {
822+
static const char *Tuple_iterGetName(JSOBJ Py_UNUSED(obj),
823+
JSONTypeContext *Py_UNUSED(tc),
824+
size_t *Py_UNUSED(outLen)) {
823825
return NULL;
824826
}
825827

@@ -864,9 +866,9 @@ static JSOBJ Set_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
864866
return GET_TC(tc)->itemValue;
865867
}
866868

867-
static char *Set_iterGetName(JSOBJ Py_UNUSED(obj),
868-
JSONTypeContext *Py_UNUSED(tc),
869-
size_t *Py_UNUSED(outLen)) {
869+
static const char *Set_iterGetName(JSOBJ Py_UNUSED(obj),
870+
JSONTypeContext *Py_UNUSED(tc),
871+
size_t *Py_UNUSED(outLen)) {
870872
return NULL;
871873
}
872874

@@ -962,8 +964,8 @@ static JSOBJ Dir_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
962964
return GET_TC(tc)->itemValue;
963965
}
964966

965-
static char *Dir_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
966-
size_t *outLen) {
967+
static const char *Dir_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
968+
size_t *outLen) {
967969
*outLen = PyBytes_GET_SIZE(GET_TC(tc)->itemName);
968970
return PyBytes_AS_STRING(GET_TC(tc)->itemName);
969971
}
@@ -994,9 +996,9 @@ static JSOBJ List_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
994996
return GET_TC(tc)->itemValue;
995997
}
996998

997-
static char *List_iterGetName(JSOBJ Py_UNUSED(obj),
998-
JSONTypeContext *Py_UNUSED(tc),
999-
size_t *Py_UNUSED(outLen)) {
999+
static const char *List_iterGetName(JSOBJ Py_UNUSED(obj),
1000+
JSONTypeContext *Py_UNUSED(tc),
1001+
size_t *Py_UNUSED(outLen)) {
10001002
return NULL;
10011003
}
10021004

@@ -1005,24 +1007,16 @@ static char *List_iterGetName(JSOBJ Py_UNUSED(obj),
10051007
//=============================================================================
10061008
static void Index_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
10071009
GET_TC(tc)->index = 0;
1008-
GET_TC(tc)->cStr = PyObject_Malloc(20);
1009-
if (!GET_TC(tc)->cStr) {
1010-
PyErr_NoMemory();
1011-
}
10121010
}
10131011

10141012
static int Index_iterNext(JSOBJ obj, JSONTypeContext *tc) {
1015-
if (!GET_TC(tc)->cStr) {
1016-
return 0;
1017-
}
1018-
10191013
const Py_ssize_t index = GET_TC(tc)->index;
10201014
Py_XDECREF(GET_TC(tc)->itemValue);
10211015
if (index == 0) {
1022-
memcpy(GET_TC(tc)->cStr, "name", 5);
1016+
GET_TC(tc)->cStr = "name";
10231017
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
10241018
} else if (index == 1) {
1025-
memcpy(GET_TC(tc)->cStr, "data", 5);
1019+
GET_TC(tc)->cStr = "data";
10261020
GET_TC(tc)->itemValue = get_values(obj);
10271021
if (!GET_TC(tc)->itemValue) {
10281022
return 0;
@@ -1042,8 +1036,8 @@ static JSOBJ Index_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
10421036
return GET_TC(tc)->itemValue;
10431037
}
10441038

1045-
static char *Index_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
1046-
size_t *outLen) {
1039+
static const char *Index_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
1040+
size_t *outLen) {
10471041
*outLen = strlen(GET_TC(tc)->cStr);
10481042
return GET_TC(tc)->cStr;
10491043
}
@@ -1054,28 +1048,20 @@ static char *Index_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
10541048
static void Series_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
10551049
PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
10561050
GET_TC(tc)->index = 0;
1057-
GET_TC(tc)->cStr = PyObject_Malloc(20);
10581051
enc->outputFormat = VALUES; // for contained series
1059-
if (!GET_TC(tc)->cStr) {
1060-
PyErr_NoMemory();
1061-
}
10621052
}
10631053

10641054
static int Series_iterNext(JSOBJ obj, JSONTypeContext *tc) {
1065-
if (!GET_TC(tc)->cStr) {
1066-
return 0;
1067-
}
1068-
10691055
const Py_ssize_t index = GET_TC(tc)->index;
10701056
Py_XDECREF(GET_TC(tc)->itemValue);
10711057
if (index == 0) {
1072-
memcpy(GET_TC(tc)->cStr, "name", 5);
1058+
GET_TC(tc)->cStr = "name";
10731059
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
10741060
} else if (index == 1) {
1075-
memcpy(GET_TC(tc)->cStr, "index", 6);
1061+
GET_TC(tc)->cStr = "index";
10761062
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
10771063
} else if (index == 2) {
1078-
memcpy(GET_TC(tc)->cStr, "data", 5);
1064+
GET_TC(tc)->cStr = "data";
10791065
GET_TC(tc)->itemValue = get_values(obj);
10801066
if (!GET_TC(tc)->itemValue) {
10811067
return 0;
@@ -1097,8 +1083,8 @@ static JSOBJ Series_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
10971083
return GET_TC(tc)->itemValue;
10981084
}
10991085

1100-
static char *Series_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
1101-
size_t *outLen) {
1086+
static const char *Series_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
1087+
size_t *outLen) {
11021088
*outLen = strlen(GET_TC(tc)->cStr);
11031089
return GET_TC(tc)->cStr;
11041090
}
@@ -1109,28 +1095,20 @@ static char *Series_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
11091095
static void DataFrame_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
11101096
PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
11111097
GET_TC(tc)->index = 0;
1112-
GET_TC(tc)->cStr = PyObject_Malloc(20);
11131098
enc->outputFormat = VALUES; // for contained series & index
1114-
if (!GET_TC(tc)->cStr) {
1115-
PyErr_NoMemory();
1116-
}
11171099
}
11181100

11191101
static int DataFrame_iterNext(JSOBJ obj, JSONTypeContext *tc) {
1120-
if (!GET_TC(tc)->cStr) {
1121-
return 0;
1122-
}
1123-
11241102
const Py_ssize_t index = GET_TC(tc)->index;
11251103
Py_XDECREF(GET_TC(tc)->itemValue);
11261104
if (index == 0) {
1127-
memcpy(GET_TC(tc)->cStr, "columns", 8);
1105+
GET_TC(tc)->cStr = "columns";
11281106
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "columns");
11291107
} else if (index == 1) {
1130-
memcpy(GET_TC(tc)->cStr, "index", 6);
1108+
GET_TC(tc)->cStr = "index";
11311109
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
11321110
} else if (index == 2) {
1133-
memcpy(GET_TC(tc)->cStr, "data", 5);
1111+
GET_TC(tc)->cStr = "data";
11341112
Py_INCREF(obj);
11351113
GET_TC(tc)->itemValue = obj;
11361114
} else {
@@ -1150,8 +1128,8 @@ static JSOBJ DataFrame_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
11501128
return GET_TC(tc)->itemValue;
11511129
}
11521130

1153-
static char *DataFrame_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
1154-
size_t *outLen) {
1131+
static const char *DataFrame_iterGetName(JSOBJ Py_UNUSED(obj),
1132+
JSONTypeContext *tc, size_t *outLen) {
11551133
*outLen = strlen(GET_TC(tc)->cStr);
11561134
return GET_TC(tc)->cStr;
11571135
}
@@ -1201,8 +1179,8 @@ static JSOBJ Dict_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
12011179
return GET_TC(tc)->itemValue;
12021180
}
12031181

1204-
static char *Dict_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
1205-
size_t *outLen) {
1182+
static const char *Dict_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
1183+
size_t *outLen) {
12061184
*outLen = PyBytes_GET_SIZE(GET_TC(tc)->itemName);
12071185
return PyBytes_AS_STRING(GET_TC(tc)->itemName);
12081186
}
@@ -1902,7 +1880,6 @@ static void Object_endTypeContext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
19021880
GET_TC(tc)->rowLabels = NULL;
19031881
NpyArr_freeLabels(GET_TC(tc)->columnLabels, GET_TC(tc)->columnLabelsLen);
19041882
GET_TC(tc)->columnLabels = NULL;
1905-
PyObject_Free(GET_TC(tc)->cStr);
19061883
GET_TC(tc)->cStr = NULL;
19071884
PyObject_Free(tc->prv);
19081885
tc->prv = NULL;
@@ -1953,8 +1930,8 @@ static JSOBJ Object_iterGetValue(JSOBJ obj, JSONTypeContext *tc) {
19531930
return GET_TC(tc)->iterGetValue(obj, tc);
19541931
}
19551932

1956-
static char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc,
1957-
size_t *outLen) {
1933+
static const char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc,
1934+
size_t *outLen) {
19581935
return GET_TC(tc)->iterGetName(obj, tc, outLen);
19591936
}
19601937

pandas/tests/io/json/test_compression.py

+1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ def test_read_zipped_json(datapath):
4141

4242
@td.skip_if_not_us_locale
4343
@pytest.mark.single_cpu
44+
@pytest.mark.network
4445
def test_with_s3_url(compression, s3_public_bucket, s3so):
4546
# Bucket created in tests/io/conftest.py
4647
df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))

pandas/tests/io/json/test_pandas.py

+2
Original file line numberDiff line numberDiff line change
@@ -1412,6 +1412,7 @@ def test_read_inline_jsonl(self):
14121412
tm.assert_frame_equal(result, expected)
14131413

14141414
@pytest.mark.single_cpu
1415+
@pytest.mark.network
14151416
@td.skip_if_not_us_locale
14161417
def test_read_s3_jsonl(self, s3_public_bucket_with_data, s3so):
14171418
# GH17200
@@ -2011,6 +2012,7 @@ def test_json_multiindex(self):
20112012
assert result == expected
20122013

20132014
@pytest.mark.single_cpu
2015+
@pytest.mark.network
20142016
def test_to_s3(self, s3_public_bucket, s3so):
20152017
# GH 28375
20162018
mock_bucket_name, target_file = s3_public_bucket.name, "test.json"

0 commit comments

Comments
 (0)