Skip to content

Commit d37f6fe

Browse files
goldvitalycopybara-github
authored andcommitted
Support removed values in extract.
``` name old cpu/op new cpu/op delta BM_DisjointChains/1/1 1.73µs ± 2% 1.43µs ± 3% -17.49% (p=0.008 n=5+5) BM_DisjointChains/1/1000 25.2µs ± 1% 25.0µs ± 3% ~ (p=0.222 n=5+5) BM_DisjointChains/1/977k 28.3ms ±20% 24.7ms ± 2% -12.57% (p=0.008 n=5+5) BM_DisjointChains/2/1 3.08µs ± 4% 2.31µs ± 2% -24.87% (p=0.008 n=5+5) BM_DisjointChains/2/1000 44.3µs ±13% 41.6µs ± 3% ~ (p=0.056 n=5+5) BM_DisjointChains/2/977k 49.7ms ±15% 51.7ms ±17% ~ (p=0.841 n=5+5) BM_DisjointChains/1000/1 1.36ms ± 2% 0.99ms ± 3% -27.01% (p=0.008 n=5+5) BM_DisjointChains/1000/1000 18.5ms ± 2% 18.0ms ± 2% ~ (p=0.056 n=5+5) BM_DisjointChains/977k/1 2.60s ± 1% 2.21s ± 6% -15.10% (p=0.008 n=5+5) BM_DisjointChainsObjects/1/1 2.49µs ± 3% 2.14µs ± 3% -14.16% (p=0.008 n=5+5) BM_DisjointChainsObjects/1/1000 43.4µs ± 2% 42.5µs ± 1% -2.01% (p=0.016 n=5+5) BM_DisjointChainsObjects/1/977k 51.1ms ±14% 50.0ms ±18% ~ (p=0.690 n=5+5) BM_DisjointChainsObjects/2/1 4.49µs ± 1% 3.75µs ± 3% -16.39% (p=0.008 n=5+5) BM_DisjointChainsObjects/2/1000 78.3µs ± 2% 77.4µs ± 1% ~ (p=0.151 n=5+5) BM_DisjointChainsObjects/2/977k 103ms ±13% 104ms ± 7% ~ (p=1.000 n=5+5) BM_DisjointChainsObjects/1000/1 2.04ms ± 4% 1.67ms ± 3% -18.21% (p=0.008 n=5+5) BM_DisjointChainsObjects/1000/1000 41.0ms ±11% 38.8ms ± 2% ~ (p=0.310 n=5+5) BM_DisjointChainsObjects/977k/1 3.40s ± 2% 3.05s ± 3% -10.34% (p=0.008 n=5+5) BM_DAG/2/10/100/1000 252µs ± 1% 247µs ± 2% ~ (p=0.095 n=5+5) BM_DAG/2/100/100/1000 2.48ms ± 1% 2.41ms ± 1% -2.87% (p=0.008 n=5+5) BM_DAG/2/100/100/10 229µs ± 3% 194µs ± 3% -15.24% (p=0.008 n=5+5) BM_DAG/20/10/100/10 239µs ± 2% 199µs ± 1% -16.81% (p=0.008 n=5+5) BM_DAG/20/100/100/10 2.58ms ± 1% 2.32ms ±13% ~ (p=0.151 n=5+5) BM_DAG/20/100/10/10 11.0ms ± 3% 10.2ms ± 2% -7.22% (p=0.008 n=5+5) BM_DAGObjects/2/10/100/1000 444µs ± 1% 442µs ± 1% ~ (p=0.310 n=5+5) BM_DAGObjects/2/100/100/1000 4.27ms ± 1% 4.26ms ± 4% ~ (p=1.000 n=5+5) BM_DAGObjects/2/100/100/10 323µs ± 2% 283µs ± 3% -12.19% (p=0.008 n=5+5) BM_DAGObjects/20/10/100/10 344µs ± 3% 305µs ± 3% -11.40% (p=0.008 n=5+5) BM_DAGObjects/20/100/100/10 3.80ms ± 2% 3.43ms ± 4% -9.65% (p=0.008 n=5+5) BM_DAGObjects/20/100/10/10 16.1ms ± 2% 15.6ms ± 3% -3.41% (p=0.016 n=5+5) name old time/op new time/op delta BM_DisjointChains/1/1 1.74µs ± 2% 1.44µs ± 3% -17.38% (p=0.008 n=5+5) BM_DisjointChains/1/1000 25.3µs ± 1% 25.0µs ± 3% ~ (p=0.222 n=5+5) BM_DisjointChains/1/977k 28.4ms ±20% 24.8ms ± 2% -12.57% (p=0.008 n=5+5) BM_DisjointChains/2/1 3.09µs ± 4% 2.32µs ± 2% -24.88% (p=0.008 n=5+5) BM_DisjointChains/2/1000 44.3µs ±14% 41.7µs ± 3% ~ (p=0.056 n=5+5) BM_DisjointChains/2/977k 50.3ms ±18% 52.2ms ±18% ~ (p=0.841 n=5+5) BM_DisjointChains/1000/1 1.36ms ± 2% 0.99ms ± 3% -27.02% (p=0.008 n=5+5) BM_DisjointChains/1000/1000 18.6ms ± 2% 18.1ms ± 2% ~ (p=0.056 n=5+5) BM_DisjointChains/977k/1 2.61s ± 1% 2.22s ± 6% -15.10% (p=0.008 n=5+5) BM_DisjointChainsObjects/1/1 2.50µs ± 3% 2.14µs ± 3% -14.16% (p=0.008 n=5+5) BM_DisjointChainsObjects/1/1000 43.5µs ± 2% 42.7µs ± 1% -1.99% (p=0.016 n=5+5) BM_DisjointChainsObjects/1/977k 51.2ms ±14% 50.2ms ±18% ~ (p=0.690 n=5+5) BM_DisjointChainsObjects/2/1 4.50µs ± 1% 3.76µs ± 3% -16.42% (p=0.008 n=5+5) BM_DisjointChainsObjects/2/1000 78.5µs ± 2% 77.6µs ± 1% ~ (p=0.095 n=5+5) BM_DisjointChainsObjects/2/977k 103ms ±13% 104ms ± 7% ~ (p=1.000 n=5+5) BM_DisjointChainsObjects/1000/1 2.05ms ± 5% 1.67ms ± 3% -18.25% (p=0.008 n=5+5) BM_DisjointChainsObjects/1000/1000 41.2ms ±11% 39.0ms ± 2% ~ (p=0.222 n=5+5) BM_DisjointChainsObjects/977k/1 3.41s ± 2% 3.06s ± 3% -10.31% (p=0.008 n=5+5) BM_DAG/2/10/100/1000 252µs ± 1% 247µs ± 3% ~ (p=0.151 n=5+5) BM_DAG/2/100/100/1000 2.49ms ± 1% 2.41ms ± 1% -2.90% (p=0.008 n=5+5) BM_DAG/2/100/100/10 230µs ± 3% 195µs ± 3% -15.25% (p=0.008 n=5+5) BM_DAG/20/10/100/10 239µs ± 2% 199µs ± 1% -16.81% (p=0.008 n=5+5) BM_DAG/20/100/100/10 2.59ms ± 2% 2.33ms ±13% ~ (p=0.151 n=5+5) BM_DAG/20/100/10/10 11.0ms ± 3% 10.2ms ± 2% -7.26% (p=0.008 n=5+5) BM_DAGObjects/2/10/100/1000 446µs ± 1% 443µs ± 1% ~ (p=0.310 n=5+5) BM_DAGObjects/2/100/100/1000 4.29ms ± 1% 4.28ms ± 4% ~ (p=1.000 n=5+5) BM_DAGObjects/2/100/100/10 324µs ± 2% 284µs ± 3% -12.18% (p=0.008 n=5+5) BM_DAGObjects/20/10/100/10 345µs ± 3% 306µs ± 3% -11.37% (p=0.008 n=5+5) BM_DAGObjects/20/100/100/10 3.81ms ± 3% 3.45ms ± 4% -9.64% (p=0.008 n=5+5) BM_DAGObjects/20/100/10/10 16.2ms ± 2% 15.6ms ± 2% -3.41% (p=0.016 n=5+5) name old INSTRUCTIONS/op new INSTRUCTIONS/op delta BM_DisjointChains/1/1 13.7k ± 0% 11.5k ± 0% -16.11% (p=0.008 n=5+5) BM_DisjointChains/1/1000 238k ± 1% 242k ± 0% +1.29% (p=0.008 n=5+5) BM_DisjointChains/1/977k 223M ± 0% 229M ± 0% +2.49% (p=0.008 n=5+5) BM_DisjointChains/2/1 23.1k ± 0% 18.5k ± 0% -19.89% (p=0.016 n=4+5) BM_DisjointChains/2/1000 402k ± 1% 409k ± 0% +1.78% (p=0.008 n=5+5) BM_DisjointChains/2/977k 376M ± 0% 387M ± 0% +3.02% (p=0.008 n=5+5) BM_DisjointChains/1000/1 9.66M ± 1% 7.21M ± 1% -25.40% (p=0.008 n=5+5) BM_DisjointChains/1000/1000 166M ± 0% 170M ± 0% +2.64% (p=0.016 n=5+4) BM_DisjointChains/977k/1 9.85G ± 0% 7.46G ± 1% -24.23% (p=0.008 n=5+5) BM_DisjointChainsObjects/1/1 18.3k ± 0% 16.2k ± 0% -11.62% (p=0.008 n=5+5) BM_DisjointChainsObjects/1/1000 423k ± 1% 426k ± 0% ~ (p=0.056 n=5+5) BM_DisjointChainsObjects/1/977k 403M ± 0% 408M ± 0% +1.42% (p=0.008 n=5+5) BM_DisjointChainsObjects/2/1 32.5k ± 0% 28.1k ± 0% -13.67% (p=0.016 n=4+5) BM_DisjointChainsObjects/2/1000 771k ± 0% 784k ± 0% +1.68% (p=0.008 n=5+5) BM_DisjointChainsObjects/2/977k 735M ± 0% 752M ± 0% +2.26% (p=0.016 n=4+5) BM_DisjointChainsObjects/1000/1 14.1M ± 0% 11.9M ± 1% -15.69% (p=0.016 n=4+5) BM_DisjointChainsObjects/1000/1000 354M ± 0% 363M ± 0% +2.72% (p=0.016 n=4+5) BM_DisjointChainsObjects/977k/1 14.5G ± 1% 12.1G ± 0% -16.77% (p=0.008 n=5+5) BM_DAG/2/10/100/1000 2.58M ± 1% 2.64M ± 0% +2.60% (p=0.008 n=5+5) BM_DAG/2/100/100/1000 24.5M ± 1% 25.3M ± 0% +3.06% (p=0.008 n=5+5) BM_DAG/2/100/100/10 1.78M ± 0% 1.52M ± 1% -14.44% (p=0.008 n=5+5) BM_DAG/20/10/100/10 1.82M ± 1% 1.55M ± 0% -14.79% (p=0.008 n=5+5) BM_DAG/20/100/100/10 18.6M ± 0% 16.0M ± 0% -14.17% (p=0.008 n=5+5) BM_DAG/20/100/10/10 77.8M ± 1% 75.0M ± 1% -3.55% (p=0.008 n=5+5) BM_DAGObjects/2/10/100/1000 4.45M ± 0% 4.54M ± 0% +2.10% (p=0.016 n=4+5) BM_DAGObjects/2/100/100/1000 41.6M ± 1% 42.4M ± 0% +1.88% (p=0.008 n=5+5) BM_DAGObjects/2/100/100/10 2.41M ± 1% 2.16M ± 1% -10.60% (p=0.008 n=5+5) BM_DAGObjects/20/10/100/10 2.54M ± 1% 2.25M ± 0% -11.42% (p=0.016 n=5+4) BM_DAGObjects/20/100/100/10 25.7M ± 0% 23.2M ± 1% -9.55% (p=0.008 n=5+5) BM_DAGObjects/20/100/10/10 115M ± 0% 113M ± 1% -2.23% (p=0.008 n=5+5) name old CYCLES/op new CYCLES/op delta BM_DisjointChains/1/1 5.61k ± 2% 4.60k ± 0% -18.01% (p=0.008 n=5+5) BM_DisjointChains/1/1000 81.9k ± 1% 80.7k ± 1% -1.51% (p=0.008 n=5+5) BM_DisjointChains/1/977k 82.9M ± 3% 81.4M ± 1% ~ (p=0.222 n=5+5) BM_DisjointChains/2/1 9.83k ± 3% 7.51k ± 1% -23.59% (p=0.008 n=5+5) BM_DisjointChains/2/1000 137k ± 1% 135k ± 1% -1.42% (p=0.032 n=5+5) BM_DisjointChains/2/977k 142M ± 2% 138M ± 2% ~ (p=0.056 n=5+5) BM_DisjointChains/1000/1 4.40M ± 1% 3.25M ± 1% -26.10% (p=0.008 n=5+5) BM_DisjointChains/1000/1000 59.5M ± 1% 59.0M ± 1% ~ (p=0.222 n=5+5) BM_DisjointChains/977k/1 8.05G ± 2% 6.93G ± 6% -13.91% (p=0.008 n=5+5) BM_DisjointChainsObjects/1/1 8.02k ± 0% 6.96k ± 0% -13.24% (p=0.008 n=5+5) BM_DisjointChainsObjects/1/1000 140k ± 1% 140k ± 0% ~ (p=0.841 n=5+5) BM_DisjointChainsObjects/1/977k 147M ± 2% 147M ± 1% ~ (p=1.000 n=5+5) BM_DisjointChainsObjects/2/1 14.4k ± 2% 12.3k ± 1% -14.61% (p=0.008 n=5+5) BM_DisjointChainsObjects/2/1000 253k ± 1% 254k ± 0% +0.67% (p=0.016 n=5+5) BM_DisjointChainsObjects/2/977k 274M ± 2% 272M ± 1% ~ (p=0.421 n=5+5) BM_DisjointChainsObjects/1000/1 6.59M ± 4% 5.44M ± 1% -17.42% (p=0.008 n=5+5) BM_DisjointChainsObjects/1000/1000 127M ± 2% 127M ± 0% ~ (p=0.421 n=5+5) BM_DisjointChainsObjects/977k/1 10.7G ± 1% 9.6G ± 4% -10.38% (p=0.008 n=5+5) BM_DAG/2/10/100/1000 815k ± 2% 805k ± 1% ~ (p=0.095 n=5+5) BM_DAG/2/100/100/1000 8.02M ± 1% 7.92M ± 2% ~ (p=0.095 n=5+5) BM_DAG/2/100/100/10 735k ± 0% 634k ± 1% -13.79% (p=0.008 n=5+5) BM_DAG/20/10/100/10 763k ± 1% 651k ± 1% -14.75% (p=0.008 n=5+5) BM_DAG/20/100/100/10 8.33M ± 0% 7.23M ± 2% -13.12% (p=0.008 n=5+5) BM_DAG/20/100/10/10 35.1M ± 1% 33.6M ± 1% -4.18% (p=0.008 n=5+5) BM_DAGObjects/2/10/100/1000 1.44M ± 1% 1.45M ± 0% +0.81% (p=0.032 n=5+5) BM_DAGObjects/2/100/100/1000 13.7M ± 1% 13.8M ± 0% ~ (p=0.095 n=5+5) BM_DAGObjects/2/100/100/10 1.04M ± 1% 0.93M ± 2% -10.53% (p=0.008 n=5+5) BM_DAGObjects/20/10/100/10 1.11M ± 2% 0.98M ± 1% -11.66% (p=0.008 n=5+5) BM_DAGObjects/20/100/100/10 12.2M ± 1% 11.1M ± 2% -8.58% (p=0.008 n=5+5) BM_DAGObjects/20/100/10/10 52.2M ± 0% 51.1M ± 1% -2.20% (p=0.008 n=5+5) name old allocs/op new allocs/op delta BM_DisjointChains/1/1 30.0 ± 0% 27.0 ± 0% -10.00% (p=0.008 n=5+5) BM_DisjointChains/1/1000 34.0 ± 0% 33.0 ± 0% -2.94% (p=0.008 n=5+5) BM_DisjointChains/1/977k 37.0 ± 0% 36.0 ± 0% -2.70% (p=0.008 n=5+5) BM_DisjointChains/2/1 50.0 ± 0% 42.0 ± 0% -16.00% (p=0.008 n=5+5) BM_DisjointChains/2/1000 56.0 ± 0% 54.0 ± 0% -3.57% (p=0.008 n=5+5) BM_DisjointChains/2/977k 61.0 ± 0% 59.0 ± 0% ~ (p=0.079 n=4+5) BM_DisjointChains/1000/1 18.1k ± 0% 13.1k ± 0% -27.69% (p=0.008 n=5+5) BM_DisjointChains/1000/1000 20.1k ± 0% 19.1k ± 0% -4.99% (p=0.008 n=5+5) BM_DisjointChains/977k/1 18.0M ± 0% 13.0M ± 0% -27.78% (p=0.016 n=4+5) BM_DisjointChainsObjects/1/1 52.0 ± 0% 49.0 ± 0% -5.77% (p=0.008 n=5+5) BM_DisjointChainsObjects/1/1000 57.0 ± 0% 56.0 ± 0% -1.75% (p=0.000 n=5+4) BM_DisjointChainsObjects/1/977k 62.0 ± 0% 61.0 ± 0% -1.61% (p=0.008 n=5+5) BM_DisjointChainsObjects/2/1 92.0 ± 0% 84.0 ± 0% -8.70% (p=0.008 n=5+5) BM_DisjointChainsObjects/2/1000 100 ± 0% 98 ± 0% -2.00% (p=0.008 n=5+5) BM_DisjointChainsObjects/2/977k 109 ± 0% 107 ± 0% -1.83% (p=0.008 n=5+5) BM_DisjointChainsObjects/1000/1 37.1k ± 0% 32.1k ± 0% -13.49% (p=0.008 n=5+5) BM_DisjointChainsObjects/1000/1000 41.0k ± 0% 40.0k ± 0% -2.44% (p=0.029 n=4+4) BM_DisjointChainsObjects/977k/1 37.0M ± 0% 32.0M ± 0% -13.51% (p=0.008 n=5+5) BM_DAG/2/10/100/1000 353 ± 0% 333 ± 0% -5.67% (p=0.008 n=5+5) BM_DAG/2/100/100/1000 3.24k ± 0% 3.04k ± 0% -6.17% (p=0.008 n=5+5) BM_DAG/2/100/100/10 3.04k ± 0% 2.84k ± 0% -6.57% (p=0.008 n=5+5) BM_DAG/20/10/100/10 3.17k ± 0% 2.97k ± 0% -6.31% (p=0.008 n=5+5) BM_DAG/20/100/100/10 30.2k ± 0% 28.2k ± 0% -6.61% (p=0.008 n=5+5) BM_DAG/20/100/10/10 143k ± 1% 117k ± 1% -17.95% (p=0.008 n=5+5) BM_DAGObjects/2/10/100/1000 559 ± 0% 539 ± 0% -3.58% (p=0.008 n=5+5) BM_DAGObjects/2/100/100/1000 5.07k ± 0% 4.87k ± 0% ~ (p=0.079 n=4+5) BM_DAGObjects/2/100/100/10 4.87k ± 0% 4.67k ± 0% -4.11% (p=0.008 n=5+5) BM_DAGObjects/20/10/100/10 5.21k ± 0% 5.01k ± 0% -3.84% (p=0.008 n=5+5) BM_DAGObjects/20/100/100/10 48.5k ± 0% 46.5k ± 0% -4.13% (p=0.008 n=5+5) BM_DAGObjects/20/100/10/10 309k ± 0% 285k ± 1% -7.92% (p=0.008 n=5+5) name old peak-mem(Bytes)/op new peak-mem(Bytes)/op delta BM_DisjointChains/1/1 6.44k ± 0% 5.93k ± 0% -7.89% (p=0.008 n=5+5) BM_DisjointChains/1/1000 57.2k ± 0% 56.8k ± 0% -0.81% (p=0.008 n=5+5) BM_DisjointChains/1/977k 50.7M ± 0% 50.7M ± 0% -0.00% (p=0.008 n=5+5) BM_DisjointChains/2/1 7.30k ± 0% 6.62k ± 0% ~ (p=0.079 n=4+5) BM_DisjointChains/2/1000 91.5k ± 0% 92.2k ± 0% +0.80% (p=0.029 n=4+4) BM_DisjointChains/2/977k 84.0M ± 0% 85.1M ± 0% +1.35% (p=0.008 n=5+5) BM_DisjointChains/1000/1 608k ± 0% 608k ± 0% -0.13% (p=0.008 n=5+5) BM_DisjointChains/1000/1000 18.3M ± 0% 18.3M ± 0% +0.00% (p=0.016 n=4+5) BM_DisjointChains/977k/1 612M ± 0% 612M ± 0% -0.00% (p=0.032 n=4+5) BM_DisjointChainsObjects/1/1 7.61k ± 0% 7.11k ± 0% ~ (p=0.079 n=4+5) BM_DisjointChainsObjects/1/1000 110k ± 0% 109k ± 0% -0.42% (p=0.000 n=5+4) BM_DisjointChainsObjects/1/977k 102M ± 0% 102M ± 0% -0.00% (p=0.008 n=5+5) BM_DisjointChainsObjects/2/1 8.27k ± 0% 7.76k ± 0% -6.15% (p=0.000 n=5+4) BM_DisjointChainsObjects/2/1000 162k ± 0% 162k ± 0% -0.18% (p=0.008 n=5+5) BM_DisjointChainsObjects/2/977k 154M ± 0% 154M ± 0% +0.08% (p=0.008 n=5+5) BM_DisjointChainsObjects/1000/1 609k ± 0% 609k ± 0% ~ (p=0.667 n=5+5) BM_DisjointChainsObjects/1000/1000 36.9M ± 0% 36.9M ± 0% -0.00% (p=0.016 n=5+4) BM_DisjointChainsObjects/977k/1 612M ± 0% 612M ± 0% -0.00% (p=0.016 n=4+5) BM_DAG/2/10/100/1000 417k ± 0% 419k ± 0% +0.46% (p=0.016 n=5+4) BM_DAG/2/100/100/1000 3.50M ± 0% 3.50M ± 0% +0.05% (p=0.016 n=4+5) BM_DAG/2/100/100/10 185k ± 0% 185k ± 0% -0.20% (p=0.000 n=5+4) BM_DAG/20/10/100/10 200k ± 0% 199k ± 0% -0.19% (p=0.000 n=4+5) BM_DAG/20/100/100/10 1.95M ± 0% 1.95M ± 0% -0.02% (p=0.029 n=4+4) BM_DAG/20/100/10/10 1.24M ± 1% 1.46M ± 1% +17.52% (p=0.008 n=5+5) BM_DAGObjects/2/10/100/1000 519k ± 0% 519k ± 0% -0.02% (p=0.008 n=5+5) BM_DAGObjects/2/100/100/1000 3.90M ± 0% 3.90M ± 0% ~ (p=0.079 n=4+5) BM_DAGObjects/2/100/100/10 296k ± 0% 296k ± 0% -0.13% (p=0.008 n=5+5) BM_DAGObjects/20/10/100/10 321k ± 0% 321k ± 0% -0.12% (p=0.008 n=5+5) BM_DAGObjects/20/100/100/10 3.30M ± 0% 3.30M ± 0% +0.00% (p=0.016 n=4+5) BM_DAGObjects/20/100/10/10 2.31M ± 1% 2.81M ± 0% +21.76% (p=0.008 n=5+5) ``` PiperOrigin-RevId: 718016652 Change-Id: I92507a140cc76a359a4186f53ebdde9a88c39f95
1 parent d73c94c commit d37f6fe

File tree

7 files changed

+199
-15
lines changed

7 files changed

+199
-15
lines changed

koladata/internal/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,7 @@ cc_test(
566566
":dtype",
567567
":object_id",
568568
":schema_utils",
569+
":types_buffer",
569570
":uuid_object",
570571
"//koladata/internal/testing:matchers",
571572
"//koladata/s11n",

koladata/internal/data_bag.cc

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
#include "absl/container/inlined_vector.h"
3535
#include "absl/hash/hash.h"
3636
#include "absl/log/check.h"
37-
#include "absl/log/log.h"
3837
#include "absl/status/status.h"
3938
#include "absl/status/statusor.h"
4039
#include "absl/strings/str_cat.h"
@@ -445,9 +444,9 @@ SparseSource& DataBagImpl::GetMutableSmallAllocSource(absl::string_view attr) {
445444
return small_alloc_sources_[attr];
446445
}
447446

448-
absl::StatusOr<DataSliceImpl> DataBagImpl::GetAttr(
447+
absl::StatusOr<DataSliceImpl> DataBagImpl::GetAttrImpl(
449448
const DataSliceImpl& objects, absl::string_view attr,
450-
FallbackSpan fallbacks) const {
449+
FallbackSpan fallbacks, bool with_removed) const {
451450
if (objects.is_empty_and_unknown()) {
452451
return DataSliceImpl::CreateEmptyAndUnknownType(objects.size());
453452
}
@@ -468,6 +467,9 @@ absl::StatusOr<DataSliceImpl> DataBagImpl::GetAttr(
468467
};
469468

470469
std::optional<SliceBuilder> bldr;
470+
if (with_removed) {
471+
bldr.emplace(objs.size());
472+
}
471473
for (const DataBagImpl* db = this; db != nullptr; db = next_fallback()) {
472474
ConstDenseSourceArray dense_sources;
473475
ConstSparseSourceArray sparse_sources;
@@ -522,6 +524,19 @@ absl::StatusOr<DataSliceImpl> DataBagImpl::GetAttr(
522524
}
523525
}
524526

527+
absl::StatusOr<DataSliceImpl> DataBagImpl::GetAttr(
528+
const DataSliceImpl& objects, absl::string_view attr,
529+
FallbackSpan fallbacks) const {
530+
return GetAttrImpl(objects, attr, fallbacks, /*with_removed=*/false);
531+
}
532+
533+
534+
absl::StatusOr<DataSliceImpl> DataBagImpl::GetAttrWithRemoved(
535+
const DataSliceImpl& objects, absl::string_view attr,
536+
FallbackSpan fallbacks) const {
537+
return GetAttrImpl(objects, attr, fallbacks, /*with_removed=*/true);
538+
}
539+
525540
absl::StatusOr<DataItem> DataBagImpl::GetAttr(const DataItem& object,
526541
absl::string_view attr,
527542
FallbackSpan fallbacks) const {

koladata/internal/data_bag.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ class DataBagImpl : public arolla::RefcountedBase {
163163
bool IsPristine() const;
164164

165165
// Returns DataSliceImpl with attribute for every object.
166-
// Missing values are looked up in the fallback databags.
166+
// Missing (not removed) values are looked up in the fallback databags.
167167
absl::StatusOr<DataSliceImpl> GetAttr(
168168
const DataSliceImpl& objects,
169169
absl::string_view attr,
@@ -174,6 +174,15 @@ class DataBagImpl : public arolla::RefcountedBase {
174174
absl::string_view attr,
175175
FallbackSpan fallbacks = {}) const;
176176

177+
// Returns DataSliceImpl with attribute for every object.
178+
// Resulting DataSliceImpl always contains types_buffer to distinguish
179+
// removed and unset values.
180+
// Missing (not removed) values are looked up in the fallback databags.
181+
absl::StatusOr<DataSliceImpl> GetAttrWithRemoved(
182+
const DataSliceImpl& objects,
183+
absl::string_view attr,
184+
FallbackSpan fallbacks = {}) const;
185+
177186
// Gets __schema__ attribute for objects and returns an Error if DataSlice has
178187
// primitives or objects do not have __schema__ attribute.
179188
absl::StatusOr<DataItem> GetObjSchemaAttr(const DataItem& item,
@@ -654,6 +663,12 @@ class DataBagImpl : public arolla::RefcountedBase {
654663
std::optional<DataItem> LookupAttrInDataSourcesMap(
655664
ObjectId object_id, absl::string_view attr) const;
656665

666+
absl::StatusOr<DataSliceImpl> GetAttrImpl(
667+
const DataSliceImpl& objects,
668+
absl::string_view attr,
669+
FallbackSpan fallbacks,
670+
bool with_removed) const;
671+
657672
template <bool kReturnValues>
658673
absl::StatusOr<std::pair<DataSliceImpl, arolla::DenseArrayEdge>>
659674
GetDictKeysOrValues(const DataSliceImpl& dicts, FallbackSpan fallbacks) const;

koladata/internal/data_bag_test.cc

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "koladata/internal/schema_utils.h"
3838
#include "koladata/internal/slice_builder.h"
3939
#include "koladata/internal/testing/matchers.h"
40+
#include "koladata/internal/types_buffer.h"
4041
#include "koladata/internal/uuid_object.h"
4142
#include "arolla/dense_array/dense_array.h"
4243
#include "arolla/dense_array/qtype/types.h"
@@ -159,6 +160,49 @@ TEST(DataBagTest, SetGet) {
159160
ElementsAreArray(ds_a.values<ObjectId>()));
160161
}
161162

163+
TEST(DataBagTest, GetAttrWithRemoved) {
164+
for (int64_t size : {1, 3, 7, 13, 1023}) {
165+
SCOPED_TRACE(absl::StrCat("size: ", size));
166+
auto db = DataBagImpl::CreateEmptyDatabag();
167+
AllocationId alloc = Allocate(size);
168+
std::vector<DataItem> objects;
169+
for (int64_t i = 0; i < size; ++i) {
170+
objects.push_back(i % 3 == 2 ? DataItem()
171+
: DataItem(alloc.ObjectByOffset(i)));
172+
}
173+
auto ds = DataSliceImpl::Create(objects);
174+
std::vector<DataItem> items;
175+
for (int64_t i = 0; i < size; ++i) {
176+
items.push_back(i % 2 == 1 ? DataItem(i) : DataItem());
177+
}
178+
auto ds_a = DataSliceImpl::Create(items);
179+
180+
ASSERT_OK(db->SetAttr(ds, "a", ds_a));
181+
182+
auto all_objects = DataSliceImpl::ObjectsFromAllocation(alloc, size);
183+
ASSERT_OK_AND_ASSIGN(auto ds_a_get,
184+
db->GetAttrWithRemoved(all_objects, "a"));
185+
186+
EXPECT_EQ(ds_a_get.size(), size);
187+
EXPECT_THAT(ds_a_get.allocation_ids(), IsEmpty());
188+
EXPECT_EQ(ds_a_get.types_buffer().size(), size);
189+
EXPECT_EQ(ds_a_get.is_empty_and_unknown(), size == 1);
190+
for (int64_t i = 0; i < size; ++i) {
191+
bool is_set = i % 3 != 2;
192+
bool is_removed = i % 2 == 0;
193+
ASSERT_EQ(ds_a_get[i], is_set && !is_removed ? DataItem(i) : DataItem())
194+
<< i;
195+
if (size != 1) {
196+
auto expected_typeidx = is_set
197+
? (is_removed ? TypesBuffer::kRemoved : 0)
198+
: TypesBuffer::kUnset;
199+
ASSERT_EQ(ds_a_get.types_buffer().id_to_typeidx[i], expected_typeidx)
200+
<< i;
201+
}
202+
}
203+
}
204+
}
205+
162206
TEST(DataBagTest, GetObjSchemaAttr) {
163207
constexpr int64_t kSize = 13;
164208
auto db = DataBagImpl::CreateEmptyDatabag();

koladata/internal/op_utils/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ cc_library(
128128
"//koladata/internal:dtype",
129129
"//koladata/internal:object_id",
130130
"//koladata/internal:schema_utils",
131+
"//koladata/internal:types_buffer",
131132
"//koladata/internal:uuid_object",
132133
"@com_google_absl//absl/base:nullability",
133134
"@com_google_absl//absl/log:check",

koladata/internal/op_utils/extract.cc

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,9 @@
4242
#include "koladata/internal/op_utils/presence_or.h"
4343
#include "koladata/internal/schema_utils.h"
4444
#include "koladata/internal/slice_builder.h"
45+
#include "koladata/internal/types_buffer.h"
4546
#include "koladata/internal/uuid_object.h"
47+
#include "arolla/dense_array/bitmap.h"
4648
#include "arolla/dense_array/dense_array.h"
4749
#include "arolla/dense_array/ops/dense_ops.h"
4850
#include "arolla/memory/optional_value.h"
@@ -234,23 +236,30 @@ class CopyingProcessor {
234236
absl::Status ProcessAttribute(const QueuedSlice& slice,
235237
const std::string_view attr_name,
236238
const DataItem& attr_schema) {
237-
const auto& ds = slice.slice;
239+
auto ds = slice.slice;
238240
DataSliceImpl old_ds;
239241
if (is_shallow_clone_) {
240242
ASSIGN_OR_RETURN(old_ds, objects_tracker_->GetAttr(ds, kMappingAttrName));
241243
} else {
242244
old_ds = ds;
243245
}
244-
ASSIGN_OR_RETURN(auto attr_ds,
245-
databag_.GetAttr(old_ds, attr_name, fallbacks_));
246-
// TODO: Extract and respect removed values.
247-
ASSIGN_OR_RETURN(auto has_attr_ds, HasOp()(attr_ds));
248-
ASSIGN_OR_RETURN(auto filtered_ds, PresenceAndOp()(ds, has_attr_ds));
246+
ASSIGN_OR_RETURN(auto attr_ds, databag_.GetAttrWithRemoved(
247+
old_ds, attr_name, fallbacks_));
248+
if (attr_ds.types_buffer().size() != 0) {
249+
auto set_mask =
250+
attr_ds.types_buffer().ToInvertedBitmap(TypesBuffer::kUnset);
251+
if (!arolla::bitmap::AreAllBitsSet(set_mask.begin(), ds.size())) {
252+
const auto& objects_array = ds.values<ObjectId>();
253+
ds = DataSliceImpl::CreateWithAllocIds(
254+
ds.allocation_ids(),
255+
ObjectIdArray{objects_array.values, std::move(set_mask)});
256+
}
257+
}
249258
if (max_depth_ == -1 || slice.depth < max_depth_) {
250-
RETURN_IF_ERROR(new_databag_->SetAttr(filtered_ds, attr_name, attr_ds));
259+
RETURN_IF_ERROR(new_databag_->SetAttr(ds, attr_name, attr_ds));
251260
}
252-
RETURN_IF_ERROR(Visit(
253-
{std::move(attr_ds), attr_schema, slice.schema_source, slice.depth}));
261+
RETURN_IF_ERROR(
262+
Visit({std::move(attr_ds), attr_schema, slice.schema_source}));
254263
return absl::OkStatus();
255264
}
256265

koladata/internal/op_utils/extract_test.cc

Lines changed: 101 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,15 +47,17 @@
4747
namespace koladata::internal {
4848
namespace {
4949

50+
using ::absl_testing::IsOkAndHolds;
5051
using ::absl_testing::StatusIs;
5152
using ::testing::IsEmpty;
5253
using ::testing::UnorderedElementsAre;
5354

5455
using ::arolla::CreateDenseArray;
5556
using ::koladata::internal::testing::DataBagEqual;
57+
using ::koladata::internal::testing::IsEquivalentTo;
5658

57-
using TriplesT = std::vector<
58-
std::pair<DataItem, std::vector<std::pair<std::string_view, DataItem>>>>;
59+
using AttrsT = std::vector<std::pair<std::string_view, DataItem>>;
60+
using TriplesT = std::vector<std::pair<DataItem, AttrsT>>;
5961

6062
DataItem AllocateSchema() {
6163
return DataItem(internal::AllocateExplicitSchema());
@@ -1019,6 +1021,103 @@ TEST_P(ExtractTest, DataSliceEntity) {
10191021
EXPECT_THAT(result_db, DataBagEqual(*expected_db));
10201022
}
10211023

1024+
TEST_P(ExtractTest, DataSliceEntityRemovedValues) {
1025+
for (int64_t size : {1, 3, 17, 1034}) {
1026+
auto obj_ids = AllocateEmptyObjects(size);
1027+
auto int_dtype = DataItem(schema::kInt32);
1028+
auto schema = AllocateSchema();
1029+
1030+
TriplesT schema_triples = {{schema, {{"x", int_dtype}, {"y", int_dtype}}}};
1031+
TriplesT data_triples;
1032+
TriplesT data_triples_fallback;
1033+
TriplesT data_triples_expected;
1034+
for (int64_t i = 0; i < size; ++i) {
1035+
if (i % 3 == 0) {
1036+
data_triples.emplace_back(obj_ids[i], AttrsT{{"x", DataItem()}});
1037+
data_triples_fallback.emplace_back(obj_ids[i],
1038+
AttrsT{{"x", DataItem(i * 2 + 97)}});
1039+
data_triples_expected.emplace_back(obj_ids[i],
1040+
AttrsT{{"x", DataItem()}});
1041+
} else if (i % 3 == 1) {
1042+
data_triples.emplace_back(obj_ids[i], AttrsT{{"x", DataItem(i * 2)}});
1043+
data_triples_fallback.emplace_back(obj_ids[i],
1044+
AttrsT{{"x", DataItem(i * 2 + 19)}});
1045+
data_triples_expected.emplace_back(obj_ids[i],
1046+
AttrsT{{"x", DataItem(i * 2)}});
1047+
} else {
1048+
data_triples_fallback.emplace_back(obj_ids[i],
1049+
AttrsT{{"x", DataItem(i * 2 - 13)}});
1050+
data_triples_expected.emplace_back(obj_ids[i],
1051+
AttrsT{{"x", DataItem(i * 2 - 13)}});
1052+
}
1053+
}
1054+
1055+
auto db = DataBagImpl::CreateEmptyDatabag();
1056+
SetSchemaTriples(*db, schema_triples);
1057+
SetDataTriples(*db, data_triples);
1058+
auto fb = DataBagImpl::CreateEmptyDatabag();
1059+
SetSchemaTriples(*fb, schema_triples);
1060+
SetDataTriples(*fb, data_triples_fallback);
1061+
SetSchemaTriples(*db, GenNoiseSchemaTriples());
1062+
SetDataTriples(*db, GenNoiseDataTriples());
1063+
1064+
auto expected_db = DataBagImpl::CreateEmptyDatabag();
1065+
SetSchemaTriples(*expected_db, schema_triples);
1066+
SetDataTriples(*expected_db, data_triples_expected);
1067+
1068+
auto result_db = DataBagImpl::CreateEmptyDatabag();
1069+
ASSERT_OK(ExtractOp(result_db.get())(obj_ids, schema, *GetMainDb(db),
1070+
{GetFallbackDb(db).get()}, nullptr,
1071+
{}));
1072+
1073+
ASSERT_NE(result_db.get(), db.get());
1074+
{
1075+
ASSERT_OK_AND_ASSIGN(auto expected_x, expected_db->GetAttr(obj_ids, "x"));
1076+
EXPECT_THAT(result_db->GetAttr(obj_ids, "x", {fb.get()}),
1077+
IsOkAndHolds(IsEquivalentTo(expected_x)));
1078+
}
1079+
}
1080+
}
1081+
1082+
TEST_P(ExtractTest, DataSliceEntityAllUnset) {
1083+
for (int64_t size : {1, 3, 17, 1034}) {
1084+
auto obj_ids = AllocateEmptyObjects(size);
1085+
auto int_dtype = DataItem(schema::kInt32);
1086+
auto schema = AllocateSchema();
1087+
1088+
TriplesT schema_triples = {{schema, {{"x", int_dtype}, {"y", int_dtype}}}};
1089+
TriplesT data_triples;
1090+
for (int64_t i = 0; i < size; ++i) {
1091+
data_triples.emplace_back(obj_ids[i],
1092+
AttrsT{{"x", DataItem(i)}});
1093+
}
1094+
1095+
auto db = DataBagImpl::CreateEmptyDatabag();
1096+
SetSchemaTriples(*db, schema_triples);
1097+
auto fb = DataBagImpl::CreateEmptyDatabag();
1098+
SetSchemaTriples(*fb, schema_triples);
1099+
SetDataTriples(*fb, data_triples);
1100+
SetSchemaTriples(*db, GenNoiseSchemaTriples());
1101+
SetDataTriples(*db, GenNoiseDataTriples());
1102+
1103+
auto expected_db = DataBagImpl::CreateEmptyDatabag();
1104+
SetSchemaTriples(*expected_db, schema_triples);
1105+
SetDataTriples(*expected_db, data_triples);
1106+
1107+
auto result_db = DataBagImpl::CreateEmptyDatabag();
1108+
ASSERT_OK(ExtractOp(result_db.get())(obj_ids, schema, *GetMainDb(db),
1109+
{GetFallbackDb(db).get()}, nullptr,
1110+
{}));
1111+
1112+
ASSERT_NE(result_db.get(), db.get());
1113+
{
1114+
ASSERT_OK_AND_ASSIGN(auto expected_x, expected_db->GetAttr(obj_ids, "x"));
1115+
EXPECT_THAT(result_db->GetAttr(obj_ids, "x", {fb.get()}),
1116+
IsOkAndHolds(IsEquivalentTo(expected_x)));
1117+
}
1118+
}
1119+
}
1120+
10221121
TEST_P(ExtractTest, MaxDepthSliceOfListsSingleAllocation) {
10231122
const DataItem obj_dtype = DataItem(schema::kObject);
10241123
const DataItem int_dtype = DataItem(schema::kInt32);

0 commit comments

Comments
 (0)