Uniswap · aadams · Dec 18, 2023 · Dec 18, 2023 · Jan 10, 2024 · Jan 11, 2024
diff --git a/v3/helpers/data_update.py b/v3/helpers/data_update.py
@@ -6,7 +6,6 @@
 from pathlib import Path
 import json
 
-
 # data updating
 def checkPath(data_type, data_path):
     """
@@ -104,6 +103,7 @@ def findSegment(table, connector, max_block, min_block, pool, chain, tgt_max_row
     return df.item() - 1
 
 
+
 def readOVM(path, data_type):
     """
     This is provided by the Optimism team to

diff --git a/v3/helpers/pool_helpers.py b/v3/helpers/pool_helpers.py
@@ -60,7 +60,116 @@ def dtToBN(dt, pool):
     return bn_as_of
 
 
-def createSwapDF(as_of, pool):
+def slot0ToAsOf(entry):
+    bn = (entry["block_number"] + entry["transaction_index"] / 1e4).item()
+
+    return bn
+
+
+# TODO
+# once we can traverse this list we can increased
+# the n_saved to make this have increased value
+def pull_block_segments(segment, as_of, n_saved=1_000):
+    blocks = segment.sort(  # this segment is generally like 5m rows
+        by=pl.col("block_number")
+    )  # we avoid a second sort here
+
+    prev_blocks = blocks.filter(pl.col("block_number") <= as_of).tail(n_saved)
+    next_blocks = blocks.filter(pl.col("block_number") >= as_of).head(n_saved)
+
+    del blocks  # remove this huge dataset asap
+
+    # we need to cut off the last block of both segements
+    # because we are not sure if we actually pulled all of
+    # that block (since we arent sorting based on tx_index
+    # since its an added sort) this is bc we want to sort
+    # the entire dataframe twice after we truncate
+    min_block = prev_blocks.select(pl.col("block_number").min())
+    prev_blocks = prev_blocks.filter(pl.col("block_number") > min_block).sort(
+        "block_number", "transaction_index", descending=[False, False]
+    )
+
+    max_block = next_blocks.select(pl.col("block_number").max())
+    next_blocks = next_blocks.filter(pl.col("block_number") < max_block).sort(
+        "block_number", "transaction_index", descending=[False, False]
+    )
+
+    return prev_blocks, next_blocks
+
+
+def initialize_blocks(pool, as_of):
+    # TODO
+    # this means that we are not optimizing code
+    # do this better lol
+    if pool.cache["swaps"].is_empty():
+        return
+
+    segment = pl.concat(
+        [
+            (
+                pool.cache["swaps"]
+                .select([pl.col("block_number"), pl.col("transaction_index")])
+                .with_columns(type_of_int=pl.lit("swap"))
+            ),
+            (
+                pool.cache["mb"]
+                .select([pl.col("block_number"), pl.col("transaction_index")])
+                .with_columns(type_of_int=pl.lit("mb"))
+            ),
+        ]
+    )
+
+    prev_blocks, next_blocks = pull_block_segments(segment, as_of)
+
+    # TODO
+    # utilize this list to optimally figure out how to apply deltas
+    pool.slot0["next_blocks"] = next_blocks
+    pool.slot0["prev_blocks"] = prev_blocks
+
+    pool.slot0["next_block"] = next_blocks.head(1)
+    pool.slot0["prev_block"] = prev_blocks.tail(1)
+
+    pool.slot0["initialized"] = True
+
+
+def createValidAsOf(as_of, pool):
+    # TODO
+    # this means that we are not optimizing code
+    # do this better lol
+    if pool.cache["swaps"].is_empty():
+        return
+
+    segment = pl.concat(
+        [
+            (
+                pool.cache["swaps"]
+                .select([pl.col("block_number"), pl.col("transaction_index")])
+                .with_columns(type_of_int=pl.lit("swap"))
+            ),
+            (
+                pool.cache["mb"]
+                .select([pl.col("block_number"), pl.col("transaction_index")])
+                .with_columns(type_of_int=pl.lit("mb"))
+            ),
+        ]
+    )
+
+    prev_blocks, next_blocks = pull_block_segments(segment, as_of)
+
+    # TODO
+    # utilize these list to optimally figure out how to apply deltas
+    # instead of recalcing everything (which is v expensive)
+    # this is mostly a liquidity optimization
+    pool.slot0["next_blocks"] = next_blocks
+    pool.slot0["prev_blocks"] = prev_blocks
+
+    pool.slot0["next_block"] = next_blocks.head(1)
+    pool.slot0["prev_block"] = prev_blocks.tail(1)
+
+    pool.slot0["initialized"] = True
+
+
+def createSwapDF(as_of, pool, givenPrice = 0, rotateValid=False):
     """
     This creates the swap data from that pre-computes most of the values
     needed to simulate a swap
@@ -71,11 +180,18 @@ def createSwapDF(as_of, pool):
     it then pre-computes the amounts needed to escape out of the current
     range as well
     """
-    price = pool.getPriceAt(as_of)
+    price = pool.getPriceAt(as_of) if givenPrice == 0 else givenPrice
     assert price != None, "Pool not initialized"
 
     tickFloor = priceX96ToTickFloor(price, pool.ts)
-    liq = createLiq(as_of, pool, "pool_mint_burn_events", pool.data_path)
+
+    # we've calculated that the lp distribution is the same
+    # so we can instead check the new price and go from there
+    if rotateValid:
+        liq = pool.slot0["liquidity"]
+    else:
+        liq = createLiq(as_of, pool, "pool_mint_burn_events", pool.data_path)
+        pool.slot0["liquidity"] = liq
 
     swap_df = (
         liq.filter(pl.col("liquidity") > 0)  # numerical error
@@ -116,6 +232,9 @@ def createSwapDF(as_of, pool):
     inRange1 = get_amount1_delta(p_b, sqrt_P, liquidity)
     inRangeToSwap1 = get_amount0_delta(p_b, sqrt_P, liquidity)
 
+    # fill slot0
+    createValidAsOf(as_of, pool)
+
     return (
         as_of,
         swap_df,
@@ -131,14 +250,23 @@ def createSwapDF(as_of, pool):
     )
 
 
-def getPriceSeries(pool, start_time, frequency, gas=False):
+def getPriceSeries(pool, start_time, end_time, frequency, gas=False):
     # precompute a dataframe that has the latest block number
+
+    # TODO
+    # we dont want to always provide an ending time
+    # do this in a better way
+    if end_time == None:
+        end_filter = True
+    else:
+        end_filter = pl.col("block_timestamp") <= end_time.replace(tzinfo=timezone.utc)
+
     bn_as_of = (
         pl.scan_parquet(f"{pool.data_path}/pool_swap_events/*.parquet")
         .filter(
             (pl.col("chain_name") == pool.chain)
             & (pl.col("block_timestamp") >= start_time.replace(tzinfo=timezone.utc))
-        )
+            & (end_filter))
         .select(["block_timestamp", "block_number"])
         .unique()
         .sort("block_timestamp")
@@ -157,7 +285,7 @@ def getPriceSeries(pool, start_time, frequency, gas=False):
                 (pl.col("chain_name") == pool.chain)
                 & (pl.col("address") == pool.pool)
                 & (pl.col("block_timestamp") >= start_time.replace(tzinfo=timezone.utc))
-            )
+                & (end_filter))
             .select(["block_timestamp", "tick", "gas_price", "gas_used"])
             .unique()
             .sort("block_timestamp")
@@ -187,7 +315,7 @@ def getPriceSeries(pool, start_time, frequency, gas=False):
                 (pl.col("chain_name") == pool.chain)
                 & (pl.col("address") == pool.pool)
                 & (pl.col("block_timestamp") >= start_time.replace(tzinfo=timezone.utc))
-            )
+                & (end_filter))
             .select(["block_timestamp", "tick"])
             .unique()
             .sort("block_timestamp")

diff --git a/v3/helpers/swap.py b/v3/helpers/swap.py
@@ -24,8 +24,9 @@ def parseCalldata(calldata):
     swapIn = parseEntry(calldata, "swapIn")
     findMax = parseEntry(calldata, "findMax", required=False)
     fees = parseEntry(calldata, "fees", required=False)
+    force = parseEntry(calldata, 'forceHardShift', required = False)
 
-    return (as_of, tokenIn, swapIn, findMax, fees)
+    return (as_of, tokenIn, swapIn, findMax, fees, force)
 
 
 def inRangeTesting(zeroForOne, inRange0, inRangeToSwap0, inRange1, inRangeToSwap1):
@@ -50,7 +51,10 @@ def swapIn(calldata, pool, warn=True):
 
     amtIn, _ = swapIn(calldata, pool)
     """
-    (as_of, tokenIn, swapIn, findMax, fees) = parseCalldata(calldata)
+    (as_of, tokenIn, swapIn, findMax, fees, force) = parseCalldata(calldata)
+
+    if force == None:
+        force = False
 
     # there can be a desync between mints/burns and swap pulls
     # which causes incorrect data
@@ -66,8 +70,10 @@ def swapIn(calldata, pool, warn=True):
     # stops us from hitting annoying bugs
     assert swapIn != 0, "We do not support swaps of 0"
 
-    if as_of != pool.cache["as_of"]:
-        pool.calcSwapDF(as_of)
+    if as_of != pool.slot0["as_of"]:
+        # there is an early return if the as_of is still valid
+        # compared to current state in here
+        pool.calcSwapDF(as_of, force)
 
     swap_df, inRangeValues = pool.cache["swapDF"], pool.cache["inRangeValues"]
 

diff --git a/v3/state.py b/v3/state.py
@@ -43,9 +43,20 @@ def __init__(
         # remove checksums
         self.pool = pool.lower()
 
+        # state where simulations are cached
+        self.slot0 = {
+            "initialized": False,
+            "next_blocks": pl.DataFrame(),
+            "prev_blocks": pl.DataFrame(),
+            "next_block": pl.DataFrame(),
+            "prev_block": pl.DataFrame(),
+            "liquidity": pl.DataFrame(),
+            "as_of": -1,
+        }
+
         # this is the cache where we store data if needed
+        # this is used to move disk -> memory -> optimized memory
         self.cache = {}
-        self.cache["as_of"] = 0
 
         # data checkers
         self.path = str(Path(f"{PACKAGEDIR}/data").resolve())
@@ -129,7 +140,11 @@ def readFromMemoryOrDisk(self, data, data_path, save=False):
                         & (pl.col("chain_name") == self.chain)
                     )
                     .with_columns(
-                        as_of=pl.col("block_number") + pl.col("transaction_index") / 1e4
+                        # TODO
+                        # will overflow if block has more than 10k txs
+                        # need to think of a better way lol
+                        as_of=pl.col("block_number")
+                        + pl.col("transaction_index") / 1e4
                     )
                     .collect()
                     .sort("as_of")
@@ -169,7 +184,7 @@ def readFromMemoryOrDisk(self, data, data_path, save=False):
 
                 return df
 
-    def calcSwapDF(self, as_of):
+    def calcSwapDF(self, as_of, force = False):
         """
         @inherit from pool_helpers.createSwapDF
         Helper function that calculates and caches swapDFs
@@ -178,13 +193,66 @@ def calcSwapDF(self, as_of):
 
         Notice: as_of is the block + transaction index / 1e4.
         Notice: Returns the value before the transaction at that index was done
+        Notice: we attempt to optimistically shift state if possible, but you can 
+                instead force the code to recalculate
         """
-        if self.cache["as_of"] == as_of:
-            return self.cache["swapDF"], self.cache["inRangeValues"]
+        # this is initalized after first swap run
+        rotationValid = False
+        if self.slot0["initialized"]:
+            next_block = slot0ToAsOf(self.slot0["next_block"])
+            prev_block = slot0ToAsOf(self.slot0["prev_block"])
+
+            # state is still valid and we can just rotate as_of
+            # NOTE: we replace the tx at as_of with our tx
+            # thus if txs would be equal to as_of, then we replace them
+            # which is why we equal here
+            if ((prev_block <= as_of) and 
+                (as_of <= next_block) and 
+                (not force)):
+
+                self.slot0["as_of"] = as_of
+                return self.cache["swapDF"], self.cache["inRangeValues"]
+
+            # TODO instead of recalculating liquidty from scratch, we can calculate
+            # based off a range and apply the deltas to the liquidity distributions
+            else:
+                if force:
+                    # rotationValid = False
+                    # this should already be False so noop
+                    pass
+
+                elif as_of > next_block:
+                    entry = (
+                        self.slot0["next_blocks"]
+                        .filter(pl.col("type_of_int") != "swap")
+                        .head(1)
+                    )
+                    # we may not have pulled the entry
+                    if entry.is_empty():
+                        # since there are no mbs, we know up until
+                        # the last of prev blocks is valid
+                        entry = self.slot0["prev_blocks"].tail(1)
+                    nextMB = slot0ToAsOf(entry)
+                    if nextMB > as_of:
+                        rotationValid = True
+
+                elif as_of < prev_block:
+                    entry = (
+                        self.slot0["prev_blocks"]
+                        .filter(pl.col("type_of_int") != "swap")
+                        .head(1)
+                    )
+
+                    if entry.is_empty():
+                        entry = self.slot0["prev_blocks"].tail(1)
 
-        as_of, df, inRangeValues = createSwapDF(as_of, self)
+                    nextMB = slot0ToAsOf(entry)
+                    if nextMB < as_of:
+                        rotationValid = True
+
+        as_of, df, inRangeValues = createSwapDF(as_of, self, rotationValid)
 
-        self.cache["as_of"] = as_of
+        self.slot0["as_of"] = as_of
         self.cache["swapDF"] = df
         self.cache["inRangeValues"] = inRangeValues
 
@@ -237,17 +305,18 @@ def getPriceAt(self, as_of, revert_on_uninitialized=False):
             assert not revert_on_uninitialized, "Price is not initialized"
             return None
         else:
-            return int(price.item())
+            return int(price)
 
-    def getPriceSeries(self, as_of, frequency="6h", gas=False):
+    def getPriceSeries(self, as_of, ending=None, frequency="6h", gas=False):
         """
         @inhert from pool_helpers.getPriceSeries
         Create a price series resampled to the desired frequency
         starting at as_of
 
         Notice: as_of is the block + transaction index / 1e4.
         """
-        px = getPriceSeries(self, as_of, frequency, gas)
+
+        px = getPriceSeries(self, as_of, ending, frequency, gas)
 
         return px