Skip to content

Commit edaf12b

Browse files
cigraingerclaude
andcommitted
docs: rewrite cheatsheet with col-2 layouts, all features
Complete cheatsheet overhaul: - col-2 for macro vs raw SQL (filter, mutate, aggregation, pivot) - col-2 for read vs write IO - All join types with examples - Materialization section with all output functions - Distributed section with FLAME - Graph section with all algorithms + distribution - Nx interop - DuckDB-specific features (MEDIAN, PERCENTILE_CONT, window functions) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 226f7c3 commit edaf12b

1 file changed

Lines changed: 98 additions & 68 deletions

File tree

guides/cheatsheet.cheatmd

Lines changed: 98 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,15 @@
22

33
## Creating Data
44

5-
### From lists
5+
### From Elixir
66
```elixir
77
Dux.from_list([%{x: 1, y: "a"}, %{x: 2, y: "b"}])
88
```
99

1010
### From files
1111
```elixir
1212
Dux.from_csv("data.csv")
13+
Dux.from_csv("data.csv", delimiter: "\t", nullstr: "NA")
1314
Dux.from_parquet("data/**/*.parquet")
1415
Dux.from_ndjson("events.ndjson")
1516
```
@@ -24,54 +25,63 @@ Dux.from_query("SELECT * FROM range(100) t(x)")
2425
### Expression syntax
2526
{: .col-2}
2627

27-
#### Macro (requires `require Dux`)
28+
#### Macro (`require Dux`)
2829
```elixir
2930
Dux.filter(df, x > 10 and status == "active")
30-
```
3131

32-
#### With interpolation
33-
```elixir
32+
# Interpolate Elixir values with ^
3433
min_val = 50
3534
Dux.filter(df, price > ^min_val)
3635
```
3736

3837
#### Raw SQL
3938
```elixir
4039
Dux.filter_with(df, "x > 10 AND status = 'active'")
40+
41+
# DuckDB functions work here
42+
Dux.filter_with(df, "x BETWEEN 10 AND 20")
4143
```
4244

43-
## Transformation
45+
## Transforms
4446

45-
### Add columns
47+
### Mutate (add/replace columns)
4648
{: .col-2}
4749

4850
#### Macro
4951
```elixir
50-
Dux.mutate(df, revenue: price * qty, tax: price * 0.08)
52+
Dux.mutate(df,
53+
revenue: price * qty,
54+
tax: price * 0.08
55+
)
5156
```
5257

5358
#### Raw SQL
5459
```elixir
55-
Dux.mutate_with(df, revenue: "price * qty")
60+
Dux.mutate_with(df,
61+
revenue: "price * qty",
62+
upper_name: "UPPER(name)",
63+
rank: "ROW_NUMBER() OVER (ORDER BY score DESC)"
64+
)
5665
```
5766

58-
### Other transforms
67+
### Column management
5968
```elixir
60-
Dux.select(df, [:name, :age]) # keep columns
61-
Dux.discard(df, [:temp]) # drop columns
62-
Dux.rename(df, old_name: :new_name) # rename columns
63-
Dux.drop_nil(df, [:age]) # remove nil rows
69+
Dux.select(df, [:name, :age]) # keep columns
70+
Dux.discard(df, [:temp, :debug]) # drop columns
71+
Dux.rename(df, old_name: :new_name) # rename columns
72+
Dux.drop_nil(df, [:age, :email]) # remove rows with nils
6473
```
6574

6675
## Sorting & Limiting
6776

6877
```elixir
69-
Dux.sort_by(df, :name) # ascending
70-
Dux.sort_by(df, desc: :score) # descending
71-
Dux.sort_by(df, asc: :dept, desc: :salary) # multi-column
72-
Dux.head(df, 10) # first N rows
73-
Dux.slice(df, 5, 10) # offset + limit
74-
Dux.distinct(df) # deduplicate
78+
Dux.sort_by(df, :name) # ascending
79+
Dux.sort_by(df, desc: :score) # descending
80+
Dux.sort_by(df, asc: :dept, desc: :salary) # multi-column
81+
Dux.head(df) # first 10 rows (default)
82+
Dux.head(df, 5) # first 5 rows
83+
Dux.slice(df, 5, 10) # offset 5, take 10
84+
Dux.distinct(df) # deduplicate all columns
7585
```
7686

7787
## Aggregation
@@ -85,72 +95,94 @@ df
8595
|> Dux.group_by(:region)
8696
|> Dux.summarise(
8797
total: sum(amount),
88-
avg: avg(price),
89-
n: count(id)
98+
average: avg(price),
99+
n: count(id),
100+
biggest: max(amount),
101+
smallest: min(amount)
90102
)
91103
```
92104

93105
#### Raw SQL
94106
```elixir
95107
df
96-
|> Dux.group_by(:region)
108+
|> Dux.group_by([:region, :year])
97109
|> Dux.summarise_with(
98110
total: "SUM(amount)",
99-
avg: "AVG(price)",
100-
n: "COUNT(id)"
111+
median: "MEDIAN(price)",
112+
p95: "PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY amount)",
113+
unique: "COUNT(DISTINCT customer_id)"
101114
)
102115
```
103116

104-
## Reshape
117+
## Joins
105118

106-
### Pivot
107-
{: .col-2}
119+
### Join types
120+
```elixir
121+
Dux.join(left, right, on: :id) # inner (default)
122+
Dux.join(left, right, on: :id, how: :left) # left
123+
Dux.join(left, right, on: :id, how: :right) # right
124+
Dux.join(left, right, on: :id, how: :anti) # anti (no match)
125+
Dux.join(left, right, on: :id, how: :semi) # semi (exists)
126+
Dux.join(left, right, on: :id, how: :cross) # cross product
127+
```
108128

109-
#### Wide (long → wide)
129+
### Different column names
110130
```elixir
111-
Dux.pivot_wider(df, :product, :sales, agg: "SUM")
131+
Dux.join(flights, airports, on: [{:dest, :faa}])
132+
Dux.join(orders, users, on: [{:customer_id, :id}])
112133
```
113134

114-
#### Long (wide → long)
135+
### Concat rows (UNION ALL)
115136
```elixir
116-
Dux.pivot_longer(df, [:q1, :q2], names_to: "quarter", values_to: "sales")
137+
Dux.concat_rows([df1, df2, df3])
117138
```
118139

119-
## Joins
140+
## Reshape
120141

121-
```elixir
122-
# Same column name
123-
Dux.join(left, right, on: :id)
142+
### Pivot
143+
{: .col-2}
124144

125-
# Different column names
126-
Dux.join(left, right, on: [{:user_id, :id}])
145+
#### Long → Wide
146+
```elixir
147+
Dux.pivot_wider(df, :product, :sales)
148+
Dux.pivot_wider(df, :product, :sales, agg: "SUM")
149+
```
127150

128-
# Join types
129-
Dux.join(left, right, on: :id, how: :left)
130-
# :inner (default), :left, :right, :cross, :anti, :semi
151+
#### Wide → Long
152+
```elixir
153+
Dux.pivot_longer(df, [:q1, :q2, :q3, :q4],
154+
names_to: "quarter",
155+
values_to: "revenue"
156+
)
131157
```
132158

133159
## IO
134160

135-
### Reading
161+
### Read & Write
162+
{: .col-2}
163+
164+
#### Reading
136165
```elixir
137-
Dux.from_csv("file.csv", delimiter: "\t")
138-
Dux.from_parquet("s3://bucket/*.parquet")
166+
Dux.from_csv("file.csv")
167+
Dux.from_csv("file.csv", delimiter: "\t", nullstr: "NA")
168+
Dux.from_parquet("data/*.parquet")
169+
Dux.from_parquet("s3://bucket/data/*.parquet")
139170
Dux.from_ndjson("events.ndjson")
171+
Dux.from_query("SELECT * FROM 'file.csv'")
140172
```
141173

142-
### Writing
174+
#### Writing
143175
```elixir
144176
Dux.to_csv(df, "out.csv")
177+
Dux.to_parquet(df, "out.parquet")
145178
Dux.to_parquet(df, "out.parquet", compression: :zstd)
146179
Dux.to_ndjson(df, "out.ndjson")
147180
```
148181

149-
## Materialization & Inspection
182+
## Materialization
150183

151184
```elixir
152-
Dux.compute(df) # → %Dux{} with table ref
153-
Dux.collect(df) # → local %Dux{} (from distributed)
185+
Dux.compute(df) # → %Dux{} (execute pipeline)
154186
Dux.to_rows(df) # → [%{"col" => val}, ...]
155187
Dux.to_rows(df, atom_keys: true) # → [%{col: val}, ...]
156188
Dux.to_columns(df) # → %{"col" => [vals]}
@@ -163,10 +195,11 @@ Dux.sql_preview(df, pretty: true) # → formatted SQL
163195
## Distributed
164196

165197
```elixir
166-
# Mark for distributed execution
198+
# Discover or start workers
167199
workers = Dux.Remote.Worker.list()
168200

169-
Dux.from_parquet("data/**/*.parquet")
201+
# Same verbs, automatically distributed
202+
Dux.from_parquet("s3://data/**/*.parquet")
170203
|> Dux.distribute(workers)
171204
|> Dux.filter(amount > 100)
172205
|> Dux.group_by(:region)
@@ -175,35 +208,32 @@ Dux.from_parquet("data/**/*.parquet")
175208

176209
# Collect back to local %Dux{}
177210
df |> Dux.distribute(workers) |> Dux.collect()
211+
212+
# FLAME: elastic cloud compute
213+
Dux.Flame.start_pool(backend: {FLAME.FlyBackend, ...}, max: 10)
214+
workers = Dux.Flame.spin_up(5)
178215
```
179216

180-
## Graph
217+
## Graph Analytics
181218

182219
```elixir
183220
graph = Dux.Graph.new(vertices: v, edges: e)
184221

185-
# Local
186-
Dux.Graph.pagerank(graph)
187-
Dux.Graph.shortest_paths(graph, start_node)
188-
Dux.Graph.connected_components(graph)
189-
Dux.Graph.triangle_count(graph)
190-
Dux.Graph.out_degree(graph)
222+
# Algorithms (return %Dux{} — pipe into any verb)
223+
Dux.Graph.pagerank(graph) # influence ranking
224+
Dux.Graph.shortest_paths(graph, start) # BFS distances
225+
Dux.Graph.connected_components(graph) # community detection
226+
Dux.Graph.triangle_count(graph) # clustering density
227+
Dux.Graph.out_degree(graph) # connection count
228+
Dux.Graph.in_degree(graph) # incoming connections
191229

192-
# Distributed
193-
graph = Dux.Graph.new(vertices: v, edges: e)
194-
|> Dux.Graph.distribute(workers)
195-
196-
Dux.Graph.pagerank(graph)
197-
Dux.Graph.connected_components(graph)
198-
Dux.Graph.shortest_paths(graph, start_node)
199-
Dux.Graph.triangle_count(graph)
230+
# Distribute across workers
231+
graph |> Dux.Graph.distribute(workers) |> Dux.Graph.pagerank()
200232
```
201233

202234
## Nx Interop
203235

204236
```elixir
205-
# Single column → tensor
206-
tensor = Dux.to_tensor(df, :price)
207-
237+
tensor = Dux.to_tensor(df, :price) # column → Nx.Tensor
208238
# Implements Nx.LazyContainer for defn
209239
```

0 commit comments

Comments
 (0)