Skip to content

Commit e53008a

Browse files
authored
Merge pull request #204 from posit-dev/patch-http
Use `pyodide_http.patch_all()` and remove custom Seaborn data shim
2 parents 98aecb3 + 7cd18f0 commit e53008a

File tree

1 file changed

+4
-161
lines changed

1 file changed

+4
-161
lines changed

src/hooks/usePyodide.tsx

Lines changed: 4 additions & 161 deletions
Original file line numberDiff line numberDiff line change
@@ -170,173 +170,13 @@ def _mock_ipykernel():
170170
m.CommManager = CommManager
171171
mods["ipykernel.comm"] = m
172172
173-
174-
# A shim for the seaborn.load_dataset() function to work in Pyodide. Normally
175-
# that function won't work in Pyodide because it uses the urllib module, which
176-
# doesn't work in Pyodide. This shim replaces the uses of urllib with a wrapper
177-
# function around JS's synchronous XMLHttpRequest.
178-
def _shim_seaborn_load_dataset():
179-
import importlib
180-
import importlib.abc
181-
import io
182-
import os
183-
import sys
184-
import pyodide.code
185-
186-
# ==========================================================================
187-
# Python wrapper to get a URL synchronously using a JS XMLHttpRequest.
188-
# ==========================================================================
189-
get_url = pyodide.code.run_js(
190-
"""
191-
(url) => {
192-
const request = new XMLHttpRequest();
193-
request.open("GET", url, false); // false for synchronous request
194-
request.send(null);
195-
if (request.status === 200) {
196-
return request.responseText;
197-
} else {
198-
return "";
199-
}
200-
}
201-
"""
202-
)
203-
204-
# ==========================================================================
205-
# Substitutes for functions from seaborn.utils
206-
# ==========================================================================
207-
DATASET_SOURCE = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master"
208-
DATASET_NAMES_URL = f"{DATASET_SOURCE}/dataset_names.txt"
209-
210-
def urlopen(url: str) -> io.BytesIO:
211-
res = get_url(url)
212-
return io.BytesIO(res.encode("utf-8"))
213-
214-
def urlretrieve(url: str, filename: str) -> None:
215-
res = get_url(url)
216-
with open(filename, "w") as file:
217-
file.write(res)
218-
219-
def get_dataset_names():
220-
with urlopen(DATASET_NAMES_URL) as resp:
221-
txt = resp.read()
222-
223-
dataset_names = [name.strip() for name in txt.decode().split("\\n")]
224-
return list(filter(None, dataset_names))
225-
226-
def load_dataset(name, cache=True, data_home=None, **kws):
227-
# Load these dynamically with importlib instead of with 'import pandas'
228-
# because if we do a normal import, then Pyodide will detect and
229-
# automatically download the files for these packages and all their
230-
# dependencies when it starts, which is slow.
231-
pd = importlib.import_module("pandas")
232-
seaborn = importlib.import_module("seaborn")
233-
234-
if isinstance(name, pd.DataFrame):
235-
err = (
236-
"This function accepts only strings (the name of an example dataset). "
237-
"You passed a pandas DataFrame. If you have your own dataset, "
238-
"it is not necessary to use this function before plotting."
239-
)
240-
raise TypeError(err)
241-
242-
url = f"{DATASET_SOURCE}/{name}.csv"
243-
244-
if cache:
245-
cache_path = os.path.join(
246-
seaborn.utils.get_data_home(data_home), os.path.basename(url)
247-
)
248-
if not os.path.exists(cache_path):
249-
if name not in get_dataset_names():
250-
raise ValueError(f"'{name}' is not one of the example datasets.")
251-
urlretrieve(url, cache_path)
252-
full_path = cache_path
253-
else:
254-
full_path = url
255-
256-
df = pd.read_csv(full_path, **kws)
257-
258-
if df.iloc[-1].isnull().all():
259-
df = df.iloc[:-1]
260-
261-
if name == "tips":
262-
df["day"] = pd.Categorical(df["day"], ["Thur", "Fri", "Sat", "Sun"])
263-
df["sex"] = pd.Categorical(df["sex"], ["Male", "Female"])
264-
df["time"] = pd.Categorical(df["time"], ["Lunch", "Dinner"])
265-
df["smoker"] = pd.Categorical(df["smoker"], ["Yes", "No"])
266-
267-
elif name == "flights":
268-
months = df["month"].str[:3]
269-
df["month"] = pd.Categorical(months, months.unique())
270-
271-
elif name == "exercise":
272-
df["time"] = pd.Categorical(df["time"], ["1 min", "15 min", "30 min"])
273-
df["kind"] = pd.Categorical(df["kind"], ["rest", "walking", "running"])
274-
df["diet"] = pd.Categorical(df["diet"], ["no fat", "low fat"])
275-
276-
elif name == "titanic":
277-
df["class"] = pd.Categorical(df["class"], ["First", "Second", "Third"])
278-
df["deck"] = pd.Categorical(df["deck"], list("ABCDEFG"))
279-
280-
elif name == "penguins":
281-
df["sex"] = df["sex"].str.title()
282-
283-
elif name == "diamonds":
284-
df["color"] = pd.Categorical(
285-
df["color"],
286-
["D", "E", "F", "G", "H", "I", "J"],
287-
)
288-
df["clarity"] = pd.Categorical(
289-
df["clarity"],
290-
["IF", "VVS1", "VVS2", "VS1", "VS2", "SI1", "SI2", "I1"],
291-
)
292-
df["cut"] = pd.Categorical(
293-
df["cut"],
294-
["Ideal", "Premium", "Very Good", "Good", "Fair"],
295-
)
296-
297-
elif name == "taxis":
298-
df["pickup"] = pd.to_datetime(df["pickup"])
299-
df["dropoff"] = pd.to_datetime(df["dropoff"])
300-
301-
elif name == "seaice":
302-
df["Date"] = pd.to_datetime(df["Date"])
303-
304-
elif name == "dowjones":
305-
df["Date"] = pd.to_datetime(df["Date"])
306-
307-
return df
308-
309-
# ======================================================================================
310-
# Import hook to inject shim when seaborn is loaded
311-
# ======================================================================================
312-
class PostImportFinder(importlib.abc.MetaPathFinder):
313-
def __init__(self):
314-
self.is_loading = False
315-
316-
def find_module(self, fullname, path=None):
317-
if fullname == "seaborn" and not self.is_loading:
318-
return self
319-
320-
def load_module(self, fullname):
321-
self.is_loading = True
322-
# Load the actual seaborn module
323-
seaborn = importlib.import_module("seaborn")
324-
325-
# Apply the shimmed version of load_dataset
326-
seaborn.utils.load_dataset = load_dataset
327-
seaborn.load_dataset = load_dataset
328-
329-
return seaborn
330-
331-
sys.meta_path.insert(0, PostImportFinder())
332-
333173
_mock_multiprocessing()
334174
_mock_ipykernel()
335-
_shim_seaborn_load_dataset()
336175
337176
def _pyodide_env_init():
338177
import os
339178
import sys
179+
import pyodide_http
340180
341181
# We don't use ssl in this function, but this is needed for Shiny to load.
342182
import ssl
@@ -348,6 +188,9 @@ def _pyodide_env_init():
348188
# Add current directory to Python path.
349189
sys.path.insert(0, "")
350190
191+
# Patch http libraries to work with Pyodide
192+
pyodide_http.patch_all()
193+
351194
_pyodide_env_init()
352195
353196
# Function for saving a set of files so we can load them as a module.

0 commit comments

Comments
 (0)