|
| 1 | +import pandas as pd |
| 2 | +import json |
| 3 | +import re |
| 4 | +import numpy as np |
| 5 | +import glob |
| 6 | + |
| 7 | + |
| 8 | +class EmptySolutionError(Exception): |
| 9 | + pass |
| 10 | + |
| 11 | + |
| 12 | +def get_max_xrate(o): |
| 13 | + if o["isSellOrder"]: |
| 14 | + return o["maxSellAmount"] / o["minBuyAmount"] |
| 15 | + else: |
| 16 | + return o["maxBuyAmount"] / o["maxSellAmount"] |
| 17 | + |
| 18 | + |
| 19 | +def compute_avg_eth_price_usd(orders): |
| 20 | + return sum( |
| 21 | + [o["sellTokenDailyPriceUSD"] for o in orders if o["sellToken"] == "WETH"] |
| 22 | + + [o["buyTokenDailyPriceUSD"] for o in orders if o["buyToken"] == "WETH"] |
| 23 | + ) / len([o for o in orders if "WETH" in {o["sellToken"], o["buyToken"]}]) |
| 24 | + |
| 25 | + |
| 26 | +# O(n) iterator that for every element A in the first iterable returns |
| 27 | +# the largest element B on the second iterable that satisfies test. |
| 28 | +# Assumes that both lists are sorted. |
| 29 | +# Example: for this input |
| 30 | +# 1 2 4 5 9 |
| 31 | +# 1 3 6 10 |
| 32 | +# test(a,b) = a >= b |
| 33 | +# returns: |
| 34 | +# 1 1 3 3 6 |
| 35 | +def get_largest_element_sequence(a, b, test): |
| 36 | + idx_b = 0 |
| 37 | + for idx_a in range(len(a)): |
| 38 | + while idx_b < len(b) - 1 and test(a[idx_a], b[idx_b + 1]): |
| 39 | + idx_b += 1 |
| 40 | + if not test(a[idx_a], b[idx_b]): |
| 41 | + raise ValueError("Found no element satisfying test.") |
| 42 | + yield b[idx_b] |
| 43 | + |
| 44 | + |
| 45 | +def load_block_data_file_to_df(fname): |
| 46 | + with open(fname, "r") as f: |
| 47 | + d = json.load(f) |
| 48 | + eth_price_usd = compute_avg_eth_price_usd(d["orders"]) |
| 49 | + d = [ |
| 50 | + { |
| 51 | + "block": o["uniswap"]["block"], |
| 52 | + "index": o["uniswap"]["index"], |
| 53 | + "sell_token": o["sellToken"], |
| 54 | + "buy_token": o["buyToken"], |
| 55 | + "max_buy_amount": o["maxBuyAmount"] if not o["isSellOrder"] else None, |
| 56 | + "max_sell_amount": o["maxSellAmount"] if o["isSellOrder"] else None, |
| 57 | + "sell_token_price_eth": o["sellTokenPriceETH"], |
| 58 | + "buy_token_price_eth": o["buyTokenPriceETH"], |
| 59 | + "sell_token_price_usd": o["sellTokenPriceETH"] * eth_price_usd, |
| 60 | + "buy_token_price_usd": o["buyTokenPriceETH"] * eth_price_usd, |
| 61 | + "timestamp": o["uniswap"]["timestamp"], |
| 62 | + "exec_sell_amount": o["uniswap"]["amounts"][0], |
| 63 | + "exec_buy_amount": o["uniswap"]["amounts"][-1], |
| 64 | + "nr_pools": len(o["uniswap"]["amounts"]) - 1, |
| 65 | + "is_sell_order": o["isSellOrder"], |
| 66 | + "address": o["address"], |
| 67 | + "sell_reserve": float(o["uniswap"]["balancesSellToken"][0]), |
| 68 | + "buy_reserve": float(o["uniswap"]["balancesBuyToken"][-1]), |
| 69 | + #'max_xrate': get_max_xrate(o) |
| 70 | + } |
| 71 | + for o in d["orders"] |
| 72 | + ] |
| 73 | + df = pd.DataFrame.from_records(d) |
| 74 | + df["xrate"] = df.exec_sell_amount / df.exec_buy_amount |
| 75 | + df["block_index"] = df.apply( |
| 76 | + lambda r: "_".join(r[["block", "index"]].astype(str).values), axis=1 |
| 77 | + ) |
| 78 | + df["token_pair"] = df.apply( |
| 79 | + lambda r: "-".join(sorted([r["sell_token"], r["buy_token"]])), axis=1 |
| 80 | + ) |
| 81 | + df["exec_vol"] = df.exec_sell_amount * df.sell_token_price_usd |
| 82 | + df["max_vol_usd"] = df.apply( |
| 83 | + lambda r: r.max_sell_amount * r.sell_token_price_usd |
| 84 | + if r.is_sell_order |
| 85 | + else r.max_buy_amount * r.buy_token_price_usd, |
| 86 | + axis=1, |
| 87 | + ) |
| 88 | + df["max_vol_eth"] = df.apply( |
| 89 | + lambda r: r.max_sell_amount * r.sell_token_price_eth |
| 90 | + if r.is_sell_order |
| 91 | + else r.max_buy_amount * r.buy_token_price_eth, |
| 92 | + axis=1, |
| 93 | + ) |
| 94 | + |
| 95 | + return df.set_index("block_index") |
| 96 | + |
| 97 | + |
| 98 | +def remove_most_active_users(df_exec, fraction_to_remove): |
| 99 | + nr_addresses = df_exec.address.nunique() |
| 100 | + addresses = ( |
| 101 | + df_exec.address.value_counts() |
| 102 | + .iloc[round(nr_addresses * fraction_to_remove) :] |
| 103 | + .index |
| 104 | + ) |
| 105 | + return df_exec[df_exec.address.isin(addresses)] |
| 106 | + |
| 107 | + |
| 108 | +def load_solver_solution(fname): |
| 109 | + with open(fname, "r") as f: |
| 110 | + d = json.load(f) |
| 111 | + d = [ |
| 112 | + { |
| 113 | + "block": int(oid.split("-")[0]), |
| 114 | + "index": int(oid.split("-")[1]), |
| 115 | + "sell_token": o["sell_token"], |
| 116 | + "buy_token": o["buy_token"], |
| 117 | + "exec_sell_amount": int(o["exec_sell_amount"]) * 1e-18, |
| 118 | + "exec_buy_amount": int(o["exec_buy_amount"]) * 1e-18, |
| 119 | + "is_sell_order": o["is_sell_order"], |
| 120 | + } |
| 121 | + for oid, o in d["orders"].items() |
| 122 | + ] |
| 123 | + if len(d) == 0: |
| 124 | + raise EmptySolutionError() |
| 125 | + df = pd.DataFrame.from_records(d) |
| 126 | + df["xrate"] = df.exec_sell_amount / df.exec_buy_amount |
| 127 | + df["block_index"] = df.apply( |
| 128 | + lambda r: "_".join(r[["block", "index"]].astype(str).values), axis=1 |
| 129 | + ) |
| 130 | + return df.set_index("block_index") |
| 131 | + |
| 132 | + |
| 133 | +def merge_exec_and_solved(fname, df_exec, from_timestamp, to_timestamp): |
| 134 | + df_sol = load_solver_solution(fname) |
| 135 | + df = df_exec[ |
| 136 | + (df_exec.timestamp >= from_timestamp) & (df_exec.timestamp <= to_timestamp) |
| 137 | + ].merge( |
| 138 | + df_sol[["exec_sell_amount", "exec_buy_amount", "xrate"]], |
| 139 | + how="inner", |
| 140 | + on="block_index", |
| 141 | + suffixes=("_uni", "_gp"), |
| 142 | + ) |
| 143 | + df["batch_start_time"] = from_timestamp |
| 144 | + df["batch_end_time"] = to_timestamp |
| 145 | + df["surplus"] = df.xrate_uni / df.xrate_gp |
| 146 | + savings_buy = df.exec_buy_amount_gp - df.exec_buy_amount_uni |
| 147 | + savings_sell = df.exec_sell_amount_uni - df.exec_sell_amount_gp |
| 148 | + df["savings_vol_usd"] = ( |
| 149 | + savings_buy * df["buy_token_price_usd"] |
| 150 | + + savings_sell * df["sell_token_price_usd"] |
| 151 | + ) |
| 152 | + return df |
| 153 | + |
| 154 | + |
| 155 | +def create_batch_table(solution_fname, df_exec): |
| 156 | + m = re.search(r"_([0-9]+)\-([0-9]+)(\-[0-9]+)*\.json$", solution_fname) |
| 157 | + from_timestamp, to_timestamp = int(m[1]), int(m[2]) |
| 158 | + return merge_exec_and_solved(solution_fname, df_exec, from_timestamp, to_timestamp) |
| 159 | + |
| 160 | + |
| 161 | +def compute_savings_per_token(df): |
| 162 | + savings_buy_per_token = df.groupby("buy_token").savings_buy.sum() |
| 163 | + savings_sell_per_token = df.groupby("sell_token").savings_sell.sum() |
| 164 | + return savings_buy_per_token.add(savings_sell_per_token, fill_value=0) |
| 165 | + |
| 166 | + |
| 167 | +def compute_mean_gp_rel_surplus(df): |
| 168 | + return np.exp(np.mean(np.log(df.xrate_uni) - np.log(df.xrate_gp))) |
| 169 | + |
| 170 | + |
| 171 | +def create_batches_table(solution_dir, df_exec): |
| 172 | + dfs = [] |
| 173 | + for fname in glob.glob(f"{solution_dir}/*.json"): |
| 174 | + try: |
| 175 | + dfs.append(create_batch_table(fname, df_exec)) |
| 176 | + except EmptySolutionError: |
| 177 | + pass |
| 178 | + return pd.concat(dfs, axis=0).sort_index() |
| 179 | + |
| 180 | + |
| 181 | +def compute_orig_batch(batchdf, df_exec): |
| 182 | + batch_start_time = batchdf.batch_start_time.iloc[0] |
| 183 | + batch_end_time = batchdf.batch_end_time.iloc[0] |
| 184 | + return df_exec[ |
| 185 | + (df_exec.timestamp >= batch_start_time) & (df_exec.timestamp <= batch_end_time) |
| 186 | + ] |
| 187 | + |
| 188 | + |
| 189 | +def compute_orig_batch_size(batchdf, df_exec): |
| 190 | + batch_start_time = batchdf.batch_start_time.iloc[0] |
| 191 | + batch_end_time = batchdf.batch_end_time.iloc[0] |
| 192 | + return ( |
| 193 | + (df_exec.timestamp >= batch_start_time) & (df_exec.timestamp <= batch_end_time) |
| 194 | + ).sum() |
| 195 | + |
| 196 | + |
| 197 | +def remove_batches_not_fully_executed(df_sol, df_exec): |
| 198 | + problem_batch_sizes = df_sol.groupby(["batch_start_time", "batch_end_time"]).apply( |
| 199 | + compute_orig_batch_size, df_exec=df_exec |
| 200 | + ) |
| 201 | + solution_batch_sizes = ( |
| 202 | + df_sol.groupby(["batch_start_time", "batch_end_time"]).count().block |
| 203 | + ) |
| 204 | + batch_start_times = [ |
| 205 | + b[0] |
| 206 | + for b in solution_batch_sizes[solution_batch_sizes == problem_batch_sizes].index |
| 207 | + ] |
| 208 | + return df_sol[df_sol.batch_start_time.isin(batch_start_times)] |
| 209 | + |
| 210 | + |
| 211 | +def compute_orig_total_orders(df_sol, df_exec): |
| 212 | + df = df_sol.groupby(["batch_start_time", "batch_end_time"]).apply( |
| 213 | + compute_orig_batch, df_exec=df_exec |
| 214 | + ) |
| 215 | + tokens = pd.concat([df_sol.sell_token, df_sol.buy_token], axis=0).unique() |
| 216 | + return (df.sell_token.isin(tokens) & df.buy_token.isin(tokens)).sum() |
| 217 | + |
| 218 | + |
| 219 | +def compute_orig_total_users(df_sol, df_exec): |
| 220 | + df = df_sol.groupby(["batch_start_time", "batch_end_time"]).apply( |
| 221 | + compute_orig_batch, df_exec=df_exec |
| 222 | + ) |
| 223 | + tokens = pd.concat([df_sol.sell_token, df_sol.buy_token], axis=0).unique() |
| 224 | + return df[df.sell_token.isin(tokens) & df.buy_token.isin(tokens)].address.nunique() |
| 225 | + |
| 226 | + |
| 227 | +def filter_batches_with_large_liquidity_updates(df_sol): |
| 228 | + # remove batches for which there was a liquidity update to some used pool |
| 229 | + # that resulted in a change of more or less CUTOFF fraction of its liquidity |
| 230 | + CUTOFF = 0.3 |
| 231 | + |
| 232 | + def large_liquidity_update_occurred_in_batch(batch_df): |
| 233 | + def occurred_in_token_pair(batch_df): |
| 234 | + if batch_df.shape[0] == 1: |
| 235 | + return False |
| 236 | + |
| 237 | + def occurred_between_consecutive_trades(r): |
| 238 | + n1 = r.sell_reserve.iloc[0] + r.exec_sell_amount_uni.iloc[0] |
| 239 | + if r.sell_token.iloc[0] == r.sell_token.iloc[1]: |
| 240 | + n2 = r.sell_reserve.iloc[1] |
| 241 | + else: |
| 242 | + assert r.sell_token.iloc[0] == r.buy_token.iloc[1] |
| 243 | + n2 = r.buy_reserve.iloc[1] |
| 244 | + return abs(n1 - n2) / max(n1, n2) >= CUTOFF |
| 245 | + |
| 246 | + df = pd.concat([batch_df, batch_df.shift(-1)], axis=1).iloc[:-1] |
| 247 | + return np.any(df.apply(occurred_between_consecutive_trades, axis=1)) |
| 248 | + |
| 249 | + return np.any(batch_df.groupby("token_pair").apply(occurred_in_token_pair)) |
| 250 | + |
| 251 | + df = df_sol[ |
| 252 | + [ |
| 253 | + "batch_start_time", |
| 254 | + "token_pair", |
| 255 | + "sell_token", |
| 256 | + "buy_token", |
| 257 | + "sell_reserve", |
| 258 | + "buy_reserve", |
| 259 | + "exec_sell_amount_uni", |
| 260 | + "exec_buy_amount_uni", |
| 261 | + ] |
| 262 | + ].groupby("batch_start_time") |
| 263 | + m = df.apply(large_liquidity_update_occurred_in_batch) |
| 264 | + bad_batches = m[m].index |
| 265 | + return df_sol[~df_sol.batch_start_time.isin(bad_batches)] |
| 266 | + |
| 267 | + |
| 268 | +def get_dfs( |
| 269 | + instance_path, batch_duration, nr_tokens, user_frac, limit_xrate_relax_frac |
| 270 | +): |
| 271 | + data_path = f"{instance_path}/s{batch_duration}-t{nr_tokens}-u{user_frac}-l{limit_xrate_relax_frac}/" |
| 272 | + df_exec = load_block_data_file_to_df(f"{data_path}/per_block.json") |
| 273 | + df_sol = create_batches_table(f"{data_path}/solutions/", df_exec) |
| 274 | + |
| 275 | + # remove batches where there were untouched orders |
| 276 | + df_sol = remove_batches_not_fully_executed(df_sol, df_exec) |
| 277 | + |
| 278 | + # remove outliers (bottom and top OUTLIER_FRAC quantile of surplus variable) |
| 279 | + # OUTLIER_FRAC = 0.01 |
| 280 | + # not_outlier = (df_sol.surplus > df_sol.surplus.quantile(OUTLIER_FRAC)) & (df_sol.surplus < df_sol.surplus.quantile(1-OUTLIER_FRAC)) |
| 281 | + # df_sol = df_sol[not_outlier] |
| 282 | + |
| 283 | + # v = df_sol.max_vol_usd.quantile(.99) |
| 284 | + # df_sol = df_sol[df_sol.max_vol_usd <= v] |
| 285 | + |
| 286 | + df_sol = filter_batches_with_large_liquidity_updates(df_sol) |
| 287 | + |
| 288 | + # remove batches with weird results |
| 289 | + # df_sol = df_sol[~df_sol.batch_start_time.isin([1603206524])] |
| 290 | + |
| 291 | + return (df_sol, df_exec) |
| 292 | + |
| 293 | + |
| 294 | +def get_block_data_file( |
| 295 | + instance_path, batch_duration, nr_tokens, user_frac, limit_xrate_relax_frac |
| 296 | +): |
| 297 | + data_path = f"{instance_path}/s{batch_duration}-t{nr_tokens}-u{user_frac}-l{limit_xrate_relax_frac}/" |
| 298 | + return load_block_data_file_to_df(f"{data_path}/per_block.json") |
| 299 | + |
| 300 | + |
| 301 | +def get_prices_at_blocks(data_path, blocks, tokens): |
| 302 | + with open(f"{data_path}/per_block.json", "r") as f: |
| 303 | + d = json.load(f) |
| 304 | + prices_in_file = {int(k): v for k, v in d["spot_prices"].items()} |
| 305 | + blocks_in_file = list(prices_in_file.keys()) |
| 306 | + |
| 307 | + prices = {b: {t: None} for b in blocks for t in tokens} |
| 308 | + for t in tokens: |
| 309 | + blocks_with_prices_for_t = list( |
| 310 | + get_largest_element_sequence( |
| 311 | + blocks, |
| 312 | + blocks_in_file, |
| 313 | + lambda a, b: b <= a and t in prices_in_file[b].keys(), |
| 314 | + ) |
| 315 | + ) |
| 316 | + for bi in range(len(blocks)): |
| 317 | + prices[blocks[bi]][t] = prices_in_file[blocks_with_prices_for_t[bi]][t] |
| 318 | + # prices = {blocks[bi]: {t: prices[blocks_in_file[bi]][t]} for bi in range(len(blocks)) for t in prices[blocks_in_file[bi]].keys()} |
| 319 | + assert set(prices.keys()) == set(blocks) |
| 320 | + return prices |
0 commit comments