Prepare tutorials for 0.2.0.0

mchav · mchav · commit 466705bef564 · 2025-06-13T00:13:19.000-07:00
diff --git a/docs/coming_from_dplyr.md b/docs/coming_from_dplyr.md
@@ -105,13 +105,15 @@ starwars %>%
 Our logic is more explicit about what's going on. Because both our fields are nullable/optional we have to specify the type.
 
 ```haskell
-bmi (w :: Int) (h :: Int) = (fromIntegral w) / (fromIntegral h / 100) ** 2 :: Double
+convertEitherToDouble name d = D.apply (either (\unparsed -> if unparsed == "NA" then Nothing else D.readDouble unparsed) (Just . (fromIntegral @Int))) name d
 
 starwars
+  |> D.fold convertEitherToDouble ["mass", "height"]
   |> D.selectRange ("name", "mass")
-  -- mass and height are optionals so we combine them with
-  -- Haskell's Applicative operators.
-  |> D.derive "bmi" (lift2 (/) (lift fromIntegral (col @Int "mass")) (lift fromIntegral (col@ Int "height")))
+  -- Remove Nothing/empty rows.
+  |> D.filterJust "mass"
+  |> D.filterJust "height"
+  |> D.derive "bmi" ((D.col @Double "mass") / (D.lift2 (**) (D.col @Double "height") (D.lit 2)))
   |> D.take 10
 ```
 
@@ -221,8 +223,7 @@ starwars |> D.select ["species", "mass"]
          -- Always better to be explcit about types for
          -- numbers but you can also turn on defaults
          -- to save keystrokes.
-         |> D.filterWhere (["Count", "Mean_mass"],
-                           D.func (\(n :: Int) (mass :: Double) -> n > 1 && mass > 50))
+         |> D.filterWhere (D.lift2 (&&) (D.lift (>1) (D.col @Int "Count")) (D.lift (>50) (D.col @Int "Mean_mass")))
 ```
 
 ```
diff --git a/docs/coming_from_polars.md b/docs/coming_from_polars.md
@@ -122,9 +122,8 @@ main = do
     ...
     let year = (\(YearMonthDay y _ _) -> y)
     print $ df_csv
-          |> D.derive "birth_year" year "birthdate"
-          |> D.deriveFrom (["weight", "height"], D.func (\(w :: Double) (h :: Double) -> w / h ** 2))
-                       "bmi"
+          |> D.derive "birth_year" (lift year (D.col @Date "birthdate"))
+          |> D.derive "bmi" ((D.col @Double "weight") / (D.lift2 (**) (D.col @Double "height") (D.lit 2)))
           |> D.select ["name", "birth_year", "bmi"]
 ```
 
@@ -146,8 +145,8 @@ main = do
     let bmi :: Double -> Double -> Double
         bmi w h = w / h ** 2
     print $ df_csv
-          |> D.derive "birth_year" year "birthdate"
-          |> D.deriveFrom (["weight", "height"], D.func bmi) "bmi"
+          |> D.derive "birth_year" (lift year (D.col @Date "birthdate"))
+          |> D.derive "bmi" ((D.col @Double "weight") / (D.lift2 (**) (D.col @Double "height") (D.lit 2)))
           |> D.select ["name", "birth_year", "bmi"]
 ```
 
@@ -186,9 +185,8 @@ We instead write this two `applyWithAlias` calls:
 
 ```haskell
 df_csv
-    |> D.derive "weight-5%" (*0.95) "weight"
-    -- Alternatively we can use the `as` function.
-    |> D.as "height-5%" D.apply (*0.95) "height"
+    |> D.derive "weight-5%" ((col @Double "weight") * (lit 0.95))
+    |> D.derive "height-5%" ((col @Double "height") * (lit 0.95))
     |> D.select ["name", "weight-5%", "height-5%"]
 ```
 
@@ -207,7 +205,7 @@ index |      name      |     height-5%      |     weight-5%
 However we can make our program shorter by using regular Haskell and folding over the dataframe.
 
 ```haskell
-let reduce name = D.derive (name <> "-5%") (*0.95) name
+let reduce name = D.derive (name <> "-5%") ((col @Double name) * (lit 0.95))
 df_csv
     |> D.fold reduce ["weight", "height"]
     |> D.select ["name", "weight-5%", "height-5%"]
@@ -324,7 +322,7 @@ print(result)
 ```haskell
 decade = (*10) . flip div 10 . year
 df_csv
-    |> D.derive "decade" decade "birthdate"
+    |> D.derive "decade" (lift decade (col @date "birthdate"))
     |> D.groupByAgg D.Count ["decade"]
     |> D.aggregate [("height", D.Maximum), ("weight", D.Mean)]
     |> D.select ["decade", "sampleSize", "Mean_weight", "Maximum_height"]