Skip to content

Commit 41321f8

Browse files
author
sam bacha
authored
v1.8.0 (#10)
* feat(cli): python3 scripts pkg Signed-off-by: sam <[email protected]> * chore(release): 1.8.0
1 parent ac9ab26 commit 41321f8

9 files changed

+7318
-6
lines changed

.clang-format

+1-2
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ Standard: c++20
7575
SortIncludes: true
7676
IncludeBlocks: Preserve
7777
IncludeCategories:
78-
# Relative Paths
78+
# Relative Paths
7979
- Regex: '^"'
8080
Priority: 1
8181

@@ -136,4 +136,3 @@ BreakAfterJavaFieldAnnotations: true
136136
Language: JavaScript
137137
JavaScriptQuotes: Double
138138
JavaScriptWrapImports: true
139-
...

CHANGELOG.md

+10-3
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,17 @@
22

33
All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
44

5-
### [1.7.3](https://github.com/sambacha/dune-snippets/compare/v1.7.2...v1.7.3) (2021-04-10)
5+
## [1.8.0](https://github.com/sambacha/dune-snippets/compare/v1.7.3...v1.8.0) (2021-04-10)
6+
7+
8+
### Features
69

10+
* **add:** uniswap V2 ([#9](https://github.com/sambacha/dune-snippets/issues/9)) ([ac9ab26](https://github.com/sambacha/dune-snippets/commit/ac9ab2697e8dbb08cfb69a9cc7bdf69e6f05fd56))
11+
* **cli:** python3 scripts pkg ([364b651](https://github.com/sambacha/dune-snippets/commit/364b651939d4e4c10c2f67b64902fe832d264eeb))
12+
13+
### [1.7.3](https://github.com/sambacha/dune-snippets/compare/v1.7.2...v1.7.3) (2021-04-10)
714

815
### Bug Fixes
916

10-
* **bug:** malformed artifact pollution ([fcb4f66](https://github.com/sambacha/dune-snippets/commit/fcb4f662e3a75434abec207a47f743fb749fad84)), closes [#8](https://github.com/sambacha/dune-snippets/issues/8)
11-
* **defect:** artifact ([b187b05](https://github.com/sambacha/dune-snippets/commit/b187b058ed500a7e0a8de33d2b917feb495003a9))
17+
- **bug:** malformed artifact pollution ([fcb4f66](https://github.com/sambacha/dune-snippets/commit/fcb4f662e3a75434abec207a47f743fb749fad84)), closes [#8](https://github.com/sambacha/dune-snippets/issues/8)
18+
- **defect:** artifact ([b187b05](https://github.com/sambacha/dune-snippets/commit/b187b058ed500a7e0a8de33d2b917feb495003a9))

package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "dune-snippets",
3-
"version": "1.7.3",
3+
"version": "1.8.0",
44
"description": "Dune Analytics Snippets",
55
"main": "dune/index.json",
66
"scripts": {

src/common.py

+320
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,320 @@
1+
import pandas as pd
2+
import json
3+
import re
4+
import numpy as np
5+
import glob
6+
7+
8+
class EmptySolutionError(Exception):
9+
pass
10+
11+
12+
def get_max_xrate(o):
13+
if o["isSellOrder"]:
14+
return o["maxSellAmount"] / o["minBuyAmount"]
15+
else:
16+
return o["maxBuyAmount"] / o["maxSellAmount"]
17+
18+
19+
def compute_avg_eth_price_usd(orders):
20+
return sum(
21+
[o["sellTokenDailyPriceUSD"] for o in orders if o["sellToken"] == "WETH"]
22+
+ [o["buyTokenDailyPriceUSD"] for o in orders if o["buyToken"] == "WETH"]
23+
) / len([o for o in orders if "WETH" in {o["sellToken"], o["buyToken"]}])
24+
25+
26+
# O(n) iterator that for every element A in the first iterable returns
27+
# the largest element B on the second iterable that satisfies test.
28+
# Assumes that both lists are sorted.
29+
# Example: for this input
30+
# 1 2 4 5 9
31+
# 1 3 6 10
32+
# test(a,b) = a >= b
33+
# returns:
34+
# 1 1 3 3 6
35+
def get_largest_element_sequence(a, b, test):
36+
idx_b = 0
37+
for idx_a in range(len(a)):
38+
while idx_b < len(b) - 1 and test(a[idx_a], b[idx_b + 1]):
39+
idx_b += 1
40+
if not test(a[idx_a], b[idx_b]):
41+
raise ValueError("Found no element satisfying test.")
42+
yield b[idx_b]
43+
44+
45+
def load_block_data_file_to_df(fname):
46+
with open(fname, "r") as f:
47+
d = json.load(f)
48+
eth_price_usd = compute_avg_eth_price_usd(d["orders"])
49+
d = [
50+
{
51+
"block": o["uniswap"]["block"],
52+
"index": o["uniswap"]["index"],
53+
"sell_token": o["sellToken"],
54+
"buy_token": o["buyToken"],
55+
"max_buy_amount": o["maxBuyAmount"] if not o["isSellOrder"] else None,
56+
"max_sell_amount": o["maxSellAmount"] if o["isSellOrder"] else None,
57+
"sell_token_price_eth": o["sellTokenPriceETH"],
58+
"buy_token_price_eth": o["buyTokenPriceETH"],
59+
"sell_token_price_usd": o["sellTokenPriceETH"] * eth_price_usd,
60+
"buy_token_price_usd": o["buyTokenPriceETH"] * eth_price_usd,
61+
"timestamp": o["uniswap"]["timestamp"],
62+
"exec_sell_amount": o["uniswap"]["amounts"][0],
63+
"exec_buy_amount": o["uniswap"]["amounts"][-1],
64+
"nr_pools": len(o["uniswap"]["amounts"]) - 1,
65+
"is_sell_order": o["isSellOrder"],
66+
"address": o["address"],
67+
"sell_reserve": float(o["uniswap"]["balancesSellToken"][0]),
68+
"buy_reserve": float(o["uniswap"]["balancesBuyToken"][-1]),
69+
#'max_xrate': get_max_xrate(o)
70+
}
71+
for o in d["orders"]
72+
]
73+
df = pd.DataFrame.from_records(d)
74+
df["xrate"] = df.exec_sell_amount / df.exec_buy_amount
75+
df["block_index"] = df.apply(
76+
lambda r: "_".join(r[["block", "index"]].astype(str).values), axis=1
77+
)
78+
df["token_pair"] = df.apply(
79+
lambda r: "-".join(sorted([r["sell_token"], r["buy_token"]])), axis=1
80+
)
81+
df["exec_vol"] = df.exec_sell_amount * df.sell_token_price_usd
82+
df["max_vol_usd"] = df.apply(
83+
lambda r: r.max_sell_amount * r.sell_token_price_usd
84+
if r.is_sell_order
85+
else r.max_buy_amount * r.buy_token_price_usd,
86+
axis=1,
87+
)
88+
df["max_vol_eth"] = df.apply(
89+
lambda r: r.max_sell_amount * r.sell_token_price_eth
90+
if r.is_sell_order
91+
else r.max_buy_amount * r.buy_token_price_eth,
92+
axis=1,
93+
)
94+
95+
return df.set_index("block_index")
96+
97+
98+
def remove_most_active_users(df_exec, fraction_to_remove):
99+
nr_addresses = df_exec.address.nunique()
100+
addresses = (
101+
df_exec.address.value_counts()
102+
.iloc[round(nr_addresses * fraction_to_remove) :]
103+
.index
104+
)
105+
return df_exec[df_exec.address.isin(addresses)]
106+
107+
108+
def load_solver_solution(fname):
109+
with open(fname, "r") as f:
110+
d = json.load(f)
111+
d = [
112+
{
113+
"block": int(oid.split("-")[0]),
114+
"index": int(oid.split("-")[1]),
115+
"sell_token": o["sell_token"],
116+
"buy_token": o["buy_token"],
117+
"exec_sell_amount": int(o["exec_sell_amount"]) * 1e-18,
118+
"exec_buy_amount": int(o["exec_buy_amount"]) * 1e-18,
119+
"is_sell_order": o["is_sell_order"],
120+
}
121+
for oid, o in d["orders"].items()
122+
]
123+
if len(d) == 0:
124+
raise EmptySolutionError()
125+
df = pd.DataFrame.from_records(d)
126+
df["xrate"] = df.exec_sell_amount / df.exec_buy_amount
127+
df["block_index"] = df.apply(
128+
lambda r: "_".join(r[["block", "index"]].astype(str).values), axis=1
129+
)
130+
return df.set_index("block_index")
131+
132+
133+
def merge_exec_and_solved(fname, df_exec, from_timestamp, to_timestamp):
134+
df_sol = load_solver_solution(fname)
135+
df = df_exec[
136+
(df_exec.timestamp >= from_timestamp) & (df_exec.timestamp <= to_timestamp)
137+
].merge(
138+
df_sol[["exec_sell_amount", "exec_buy_amount", "xrate"]],
139+
how="inner",
140+
on="block_index",
141+
suffixes=("_uni", "_gp"),
142+
)
143+
df["batch_start_time"] = from_timestamp
144+
df["batch_end_time"] = to_timestamp
145+
df["surplus"] = df.xrate_uni / df.xrate_gp
146+
savings_buy = df.exec_buy_amount_gp - df.exec_buy_amount_uni
147+
savings_sell = df.exec_sell_amount_uni - df.exec_sell_amount_gp
148+
df["savings_vol_usd"] = (
149+
savings_buy * df["buy_token_price_usd"]
150+
+ savings_sell * df["sell_token_price_usd"]
151+
)
152+
return df
153+
154+
155+
def create_batch_table(solution_fname, df_exec):
156+
m = re.search(r"_([0-9]+)\-([0-9]+)(\-[0-9]+)*\.json$", solution_fname)
157+
from_timestamp, to_timestamp = int(m[1]), int(m[2])
158+
return merge_exec_and_solved(solution_fname, df_exec, from_timestamp, to_timestamp)
159+
160+
161+
def compute_savings_per_token(df):
162+
savings_buy_per_token = df.groupby("buy_token").savings_buy.sum()
163+
savings_sell_per_token = df.groupby("sell_token").savings_sell.sum()
164+
return savings_buy_per_token.add(savings_sell_per_token, fill_value=0)
165+
166+
167+
def compute_mean_gp_rel_surplus(df):
168+
return np.exp(np.mean(np.log(df.xrate_uni) - np.log(df.xrate_gp)))
169+
170+
171+
def create_batches_table(solution_dir, df_exec):
172+
dfs = []
173+
for fname in glob.glob(f"{solution_dir}/*.json"):
174+
try:
175+
dfs.append(create_batch_table(fname, df_exec))
176+
except EmptySolutionError:
177+
pass
178+
return pd.concat(dfs, axis=0).sort_index()
179+
180+
181+
def compute_orig_batch(batchdf, df_exec):
182+
batch_start_time = batchdf.batch_start_time.iloc[0]
183+
batch_end_time = batchdf.batch_end_time.iloc[0]
184+
return df_exec[
185+
(df_exec.timestamp >= batch_start_time) & (df_exec.timestamp <= batch_end_time)
186+
]
187+
188+
189+
def compute_orig_batch_size(batchdf, df_exec):
190+
batch_start_time = batchdf.batch_start_time.iloc[0]
191+
batch_end_time = batchdf.batch_end_time.iloc[0]
192+
return (
193+
(df_exec.timestamp >= batch_start_time) & (df_exec.timestamp <= batch_end_time)
194+
).sum()
195+
196+
197+
def remove_batches_not_fully_executed(df_sol, df_exec):
198+
problem_batch_sizes = df_sol.groupby(["batch_start_time", "batch_end_time"]).apply(
199+
compute_orig_batch_size, df_exec=df_exec
200+
)
201+
solution_batch_sizes = (
202+
df_sol.groupby(["batch_start_time", "batch_end_time"]).count().block
203+
)
204+
batch_start_times = [
205+
b[0]
206+
for b in solution_batch_sizes[solution_batch_sizes == problem_batch_sizes].index
207+
]
208+
return df_sol[df_sol.batch_start_time.isin(batch_start_times)]
209+
210+
211+
def compute_orig_total_orders(df_sol, df_exec):
212+
df = df_sol.groupby(["batch_start_time", "batch_end_time"]).apply(
213+
compute_orig_batch, df_exec=df_exec
214+
)
215+
tokens = pd.concat([df_sol.sell_token, df_sol.buy_token], axis=0).unique()
216+
return (df.sell_token.isin(tokens) & df.buy_token.isin(tokens)).sum()
217+
218+
219+
def compute_orig_total_users(df_sol, df_exec):
220+
df = df_sol.groupby(["batch_start_time", "batch_end_time"]).apply(
221+
compute_orig_batch, df_exec=df_exec
222+
)
223+
tokens = pd.concat([df_sol.sell_token, df_sol.buy_token], axis=0).unique()
224+
return df[df.sell_token.isin(tokens) & df.buy_token.isin(tokens)].address.nunique()
225+
226+
227+
def filter_batches_with_large_liquidity_updates(df_sol):
228+
# remove batches for which there was a liquidity update to some used pool
229+
# that resulted in a change of more or less CUTOFF fraction of its liquidity
230+
CUTOFF = 0.3
231+
232+
def large_liquidity_update_occurred_in_batch(batch_df):
233+
def occurred_in_token_pair(batch_df):
234+
if batch_df.shape[0] == 1:
235+
return False
236+
237+
def occurred_between_consecutive_trades(r):
238+
n1 = r.sell_reserve.iloc[0] + r.exec_sell_amount_uni.iloc[0]
239+
if r.sell_token.iloc[0] == r.sell_token.iloc[1]:
240+
n2 = r.sell_reserve.iloc[1]
241+
else:
242+
assert r.sell_token.iloc[0] == r.buy_token.iloc[1]
243+
n2 = r.buy_reserve.iloc[1]
244+
return abs(n1 - n2) / max(n1, n2) >= CUTOFF
245+
246+
df = pd.concat([batch_df, batch_df.shift(-1)], axis=1).iloc[:-1]
247+
return np.any(df.apply(occurred_between_consecutive_trades, axis=1))
248+
249+
return np.any(batch_df.groupby("token_pair").apply(occurred_in_token_pair))
250+
251+
df = df_sol[
252+
[
253+
"batch_start_time",
254+
"token_pair",
255+
"sell_token",
256+
"buy_token",
257+
"sell_reserve",
258+
"buy_reserve",
259+
"exec_sell_amount_uni",
260+
"exec_buy_amount_uni",
261+
]
262+
].groupby("batch_start_time")
263+
m = df.apply(large_liquidity_update_occurred_in_batch)
264+
bad_batches = m[m].index
265+
return df_sol[~df_sol.batch_start_time.isin(bad_batches)]
266+
267+
268+
def get_dfs(
269+
instance_path, batch_duration, nr_tokens, user_frac, limit_xrate_relax_frac
270+
):
271+
data_path = f"{instance_path}/s{batch_duration}-t{nr_tokens}-u{user_frac}-l{limit_xrate_relax_frac}/"
272+
df_exec = load_block_data_file_to_df(f"{data_path}/per_block.json")
273+
df_sol = create_batches_table(f"{data_path}/solutions/", df_exec)
274+
275+
# remove batches where there were untouched orders
276+
df_sol = remove_batches_not_fully_executed(df_sol, df_exec)
277+
278+
# remove outliers (bottom and top OUTLIER_FRAC quantile of surplus variable)
279+
# OUTLIER_FRAC = 0.01
280+
# not_outlier = (df_sol.surplus > df_sol.surplus.quantile(OUTLIER_FRAC)) & (df_sol.surplus < df_sol.surplus.quantile(1-OUTLIER_FRAC))
281+
# df_sol = df_sol[not_outlier]
282+
283+
# v = df_sol.max_vol_usd.quantile(.99)
284+
# df_sol = df_sol[df_sol.max_vol_usd <= v]
285+
286+
df_sol = filter_batches_with_large_liquidity_updates(df_sol)
287+
288+
# remove batches with weird results
289+
# df_sol = df_sol[~df_sol.batch_start_time.isin([1603206524])]
290+
291+
return (df_sol, df_exec)
292+
293+
294+
def get_block_data_file(
295+
instance_path, batch_duration, nr_tokens, user_frac, limit_xrate_relax_frac
296+
):
297+
data_path = f"{instance_path}/s{batch_duration}-t{nr_tokens}-u{user_frac}-l{limit_xrate_relax_frac}/"
298+
return load_block_data_file_to_df(f"{data_path}/per_block.json")
299+
300+
301+
def get_prices_at_blocks(data_path, blocks, tokens):
302+
with open(f"{data_path}/per_block.json", "r") as f:
303+
d = json.load(f)
304+
prices_in_file = {int(k): v for k, v in d["spot_prices"].items()}
305+
blocks_in_file = list(prices_in_file.keys())
306+
307+
prices = {b: {t: None} for b in blocks for t in tokens}
308+
for t in tokens:
309+
blocks_with_prices_for_t = list(
310+
get_largest_element_sequence(
311+
blocks,
312+
blocks_in_file,
313+
lambda a, b: b <= a and t in prices_in_file[b].keys(),
314+
)
315+
)
316+
for bi in range(len(blocks)):
317+
prices[blocks[bi]][t] = prices_in_file[blocks_with_prices_for_t[bi]][t]
318+
# prices = {blocks[bi]: {t: prices[blocks_in_file[bi]][t]} for bi in range(len(blocks)) for t in prices[blocks_in_file[bi]].keys()}
319+
assert set(prices.keys()) == set(blocks)
320+
return prices

0 commit comments

Comments
 (0)