22
33## Creating Data
44
5- ### From lists
5+ ### From Elixir
66```elixir
77Dux.from_list([%{x: 1, y: "a"}, %{x: 2, y: "b"}])
88```
99
1010### From files
1111```elixir
1212Dux.from_csv("data.csv")
13+ Dux.from_csv("data.csv", delimiter: "\t", nullstr: "NA")
1314Dux.from_parquet("data/**/*.parquet")
1415Dux.from_ndjson("events.ndjson")
1516```
@@ -24,54 +25,63 @@ Dux.from_query("SELECT * FROM range(100) t(x)")
2425### Expression syntax
2526{: .col-2}
2627
27- #### Macro (requires `require Dux`)
28+ #### Macro (`require Dux`)
2829```elixir
2930Dux.filter(df, x > 10 and status == "active")
30- ```
3131
32- #### With interpolation
33- ```elixir
32+ # Interpolate Elixir values with ^
3433min_val = 50
3534Dux.filter(df, price > ^min_val)
3635```
3736
3837#### Raw SQL
3938```elixir
4039Dux.filter_with(df, "x > 10 AND status = 'active'")
40+
41+ # DuckDB functions work here
42+ Dux.filter_with(df, "x BETWEEN 10 AND 20")
4143```
4244
43- ## Transformation
45+ ## Transforms
4446
45- ### Add columns
47+ ### Mutate (add/replace columns)
4648{: .col-2}
4749
4850#### Macro
4951```elixir
50- Dux.mutate(df, revenue: price * qty, tax: price * 0.08)
52+ Dux.mutate(df,
53+ revenue: price * qty,
54+ tax: price * 0.08
55+ )
5156```
5257
5358#### Raw SQL
5459```elixir
55- Dux.mutate_with(df, revenue: "price * qty")
60+ Dux.mutate_with(df,
61+ revenue: "price * qty",
62+ upper_name: "UPPER(name)",
63+ rank: "ROW_NUMBER() OVER (ORDER BY score DESC)"
64+ )
5665```
5766
58- ### Other transforms
67+ ### Column management
5968```elixir
60- Dux.select(df, [:name, :age]) # keep columns
61- Dux.discard(df, [:temp]) # drop columns
62- Dux.rename(df, old_name: :new_name) # rename columns
63- Dux.drop_nil(df, [:age]) # remove nil rows
69+ Dux.select(df, [:name, :age]) # keep columns
70+ Dux.discard(df, [:temp, :debug]) # drop columns
71+ Dux.rename(df, old_name: :new_name) # rename columns
72+ Dux.drop_nil(df, [:age, :email ]) # remove rows with nils
6473```
6574
6675## Sorting & Limiting
6776
6877```elixir
69- Dux.sort_by(df, :name) # ascending
70- Dux.sort_by(df, desc: :score) # descending
71- Dux.sort_by(df, asc: :dept, desc: :salary) # multi-column
72- Dux.head(df, 10) # first N rows
73- Dux.slice(df, 5, 10) # offset + limit
74- Dux.distinct(df) # deduplicate
78+ Dux.sort_by(df, :name) # ascending
79+ Dux.sort_by(df, desc: :score) # descending
80+ Dux.sort_by(df, asc: :dept, desc: :salary) # multi-column
81+ Dux.head(df) # first 10 rows (default)
82+ Dux.head(df, 5) # first 5 rows
83+ Dux.slice(df, 5, 10) # offset 5, take 10
84+ Dux.distinct(df) # deduplicate all columns
7585```
7686
7787## Aggregation
8595|> Dux.group_by(:region)
8696|> Dux.summarise(
8797 total: sum(amount),
88- avg: avg(price),
89- n: count(id)
98+ average: avg(price),
99+ n: count(id),
100+ biggest: max(amount),
101+ smallest: min(amount)
90102)
91103```
92104
93105#### Raw SQL
94106```elixir
95107df
96- |> Dux.group_by(:region)
108+ |> Dux.group_by([ :region, :year] )
97109|> Dux.summarise_with(
98110 total: "SUM(amount)",
99- avg: "AVG(price)",
100- n: "COUNT(id)"
111+ median: "MEDIAN(price)",
112+ p95: "PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY amount)",
113+ unique: "COUNT(DISTINCT customer_id)"
101114)
102115```
103116
104- ## Reshape
117+ ## Joins
105118
106- ### Pivot
107- {: .col-2}
119+ ### Join types
120+ ```elixir
121+ Dux.join(left, right, on: :id) # inner (default)
122+ Dux.join(left, right, on: :id, how: :left) # left
123+ Dux.join(left, right, on: :id, how: :right) # right
124+ Dux.join(left, right, on: :id, how: :anti) # anti (no match)
125+ Dux.join(left, right, on: :id, how: :semi) # semi (exists)
126+ Dux.join(left, right, on: :id, how: :cross) # cross product
127+ ```
108128
109- #### Wide (long → wide)
129+ ### Different column names
110130```elixir
111- Dux.pivot_wider(df, :product, :sales, agg: "SUM")
131+ Dux.join(flights, airports, on: [{:dest, :faa}])
132+ Dux.join(orders, users, on: [{:customer_id, :id}])
112133```
113134
114- #### Long (wide → long )
135+ ### Concat rows (UNION ALL )
115136```elixir
116- Dux.pivot_longer(df, [:q1, :q2], names_to: "quarter", values_to: "sales" )
137+ Dux.concat_rows([df1, df2, df3] )
117138```
118139
119- ## Joins
140+ ## Reshape
120141
121- ```elixir
122- # Same column name
123- Dux.join(left, right, on: :id)
142+ ### Pivot
143+ {: .col-2}
124144
125- # Different column names
126- Dux.join(left, right, on: [{:user_id, :id}])
145+ #### Long → Wide
146+ ```elixir
147+ Dux.pivot_wider(df, :product, :sales)
148+ Dux.pivot_wider(df, :product, :sales, agg: "SUM")
149+ ```
127150
128- # Join types
129- Dux.join(left, right, on: :id, how: :left)
130- # :inner (default), :left, :right, :cross, :anti, :semi
151+ #### Wide → Long
152+ ```elixir
153+ Dux.pivot_longer(df, [:q1, :q2, :q3, :q4],
154+ names_to: "quarter",
155+ values_to: "revenue"
156+ )
131157```
132158
133159## IO
134160
135- ### Reading
161+ ### Read & Write
162+ {: .col-2}
163+
164+ #### Reading
136165```elixir
137- Dux.from_csv("file.csv", delimiter: "\t")
138- Dux.from_parquet("s3://bucket/*.parquet")
166+ Dux.from_csv("file.csv")
167+ Dux.from_csv("file.csv", delimiter: "\t", nullstr: "NA")
168+ Dux.from_parquet("data/*.parquet")
169+ Dux.from_parquet("s3://bucket/data/*.parquet")
139170Dux.from_ndjson("events.ndjson")
171+ Dux.from_query("SELECT * FROM 'file.csv'")
140172```
141173
142- ### Writing
174+ #### Writing
143175```elixir
144176Dux.to_csv(df, "out.csv")
177+ Dux.to_parquet(df, "out.parquet")
145178Dux.to_parquet(df, "out.parquet", compression: :zstd)
146179Dux.to_ndjson(df, "out.ndjson")
147180```
148181
149- ## Materialization & Inspection
182+ ## Materialization
150183
151184```elixir
152- Dux.compute(df) # → %Dux{} with table ref
153- Dux.collect(df) # → local %Dux{} (from distributed)
185+ Dux.compute(df) # → %Dux{} (execute pipeline)
154186Dux.to_rows(df) # → [%{"col" => val}, ...]
155187Dux.to_rows(df, atom_keys: true) # → [%{col: val}, ...]
156188Dux.to_columns(df) # → %{"col" => [vals]}
@@ -163,10 +195,11 @@ Dux.sql_preview(df, pretty: true) # → formatted SQL
163195## Distributed
164196
165197```elixir
166- # Mark for distributed execution
198+ # Discover or start workers
167199workers = Dux.Remote.Worker.list()
168200
169- Dux.from_parquet("data/**/*.parquet")
201+ # Same verbs, automatically distributed
202+ Dux.from_parquet("s3://data/**/*.parquet")
170203|> Dux.distribute(workers)
171204|> Dux.filter(amount > 100)
172205|> Dux.group_by(:region)
@@ -175,35 +208,32 @@ Dux.from_parquet("data/**/*.parquet")
175208
176209# Collect back to local %Dux{}
177210df |> Dux.distribute(workers) |> Dux.collect()
211+
212+ # FLAME: elastic cloud compute
213+ Dux.Flame.start_pool(backend: {FLAME.FlyBackend, ...}, max: 10)
214+ workers = Dux.Flame.spin_up(5)
178215```
179216
180- ## Graph
217+ ## Graph Analytics
181218
182219```elixir
183220graph = Dux.Graph.new(vertices: v, edges: e)
184221
185- # Local
186- Dux.Graph.pagerank(graph)
187- Dux.Graph.shortest_paths(graph, start_node)
188- Dux.Graph.connected_components(graph)
189- Dux.Graph.triangle_count(graph)
190- Dux.Graph.out_degree(graph)
222+ # Algorithms (return %Dux{} — pipe into any verb)
223+ Dux.Graph.pagerank(graph) # influence ranking
224+ Dux.Graph.shortest_paths(graph, start) # BFS distances
225+ Dux.Graph.connected_components(graph) # community detection
226+ Dux.Graph.triangle_count(graph) # clustering density
227+ Dux.Graph.out_degree(graph) # connection count
228+ Dux.Graph.in_degree(graph) # incoming connections
191229
192- # Distributed
193- graph = Dux.Graph.new(vertices: v, edges: e)
194- |> Dux.Graph.distribute(workers)
195-
196- Dux.Graph.pagerank(graph)
197- Dux.Graph.connected_components(graph)
198- Dux.Graph.shortest_paths(graph, start_node)
199- Dux.Graph.triangle_count(graph)
230+ # Distribute across workers
231+ graph |> Dux.Graph.distribute(workers) |> Dux.Graph.pagerank()
200232```
201233
202234## Nx Interop
203235
204236```elixir
205- # Single column → tensor
206- tensor = Dux.to_tensor(df, :price)
207-
237+ tensor = Dux.to_tensor(df, :price) # column → Nx.Tensor
208238# Implements Nx.LazyContainer for defn
209239```
0 commit comments