Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions core/api/core.api
Original file line number Diff line number Diff line change
Expand Up @@ -5473,6 +5473,10 @@ public final class org/jetbrains/kotlinx/dataframe/impl/api/MapKt {
public static final fun mapNotNullValues (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
}

public final class org/jetbrains/kotlinx/dataframe/impl/api/RequireKt {
public static final fun requireImpl (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/jvm/functions/Function2;Lkotlin/reflect/KType;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}

public final class org/jetbrains/kotlinx/dataframe/impl/api/SchemaKt {
public static final fun compileTimeSchemaImpl (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;Lkotlin/reflect/KClass;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package org.jetbrains.kotlinx.dataframe.api

import org.jetbrains.kotlinx.dataframe.ColumnSelector
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
import org.jetbrains.kotlinx.dataframe.annotations.Refine
import org.jetbrains.kotlinx.dataframe.impl.api.requireImpl
import kotlin.reflect.typeOf

/**
* Resolves [column] in this [DataFrame] and checks that its runtime type is a subtype of [C].
* Throws if the column can't be resolved or if its type doesn't match.
*/
@Refine
@Interpretable("Require0")
public inline fun <T, reified C> DataFrame<T>.requireColumn(noinline column: ColumnSelector<T, C>): DataFrame<T> =
requireImpl(column, typeOf<C>())
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import org.jetbrains.kotlinx.dataframe.api.asDataColumn
import org.jetbrains.kotlinx.dataframe.api.cast
import org.jetbrains.kotlinx.dataframe.api.isColumnGroup
import org.jetbrains.kotlinx.dataframe.api.pathOf
import org.jetbrains.kotlinx.dataframe.api.toPath
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
Expand All @@ -21,6 +22,7 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.addPath
import org.jetbrains.kotlinx.dataframe.impl.columns.missing.MissingColumnGroup
import org.jetbrains.kotlinx.dataframe.impl.columns.missing.MissingDataColumn
import org.jetbrains.kotlinx.dataframe.nrow
import kotlin.collections.map

private fun <T> DataFrame<T>.unbox(): DataFrame<T> =
when (this) {
Expand All @@ -47,9 +49,7 @@ internal open class DataFrameReceiver<T>(
host = this@DataFrameReceiver,
).asDataColumn().cast()

UnresolvedColumnsPolicy.Fail -> error(
"Column '${path.joinToString()}' not found among ${df.columnNames()}.",
)
UnresolvedColumnsPolicy.Fail -> error(formatMissingColumnMessage(path))
}

is MissingDataColumn -> this
Expand All @@ -59,6 +59,43 @@ internal open class DataFrameReceiver<T>(
else -> this
}

// Context:
// it's strange that we have to reverse-search why the column is missing
// would be nice to "fail fast" exactly where resolve failed, knowing the current path and parent.
// but it's very unclear what to do with resolveSingle.
// at first glance: a lot of changes.
@Suppress("FoldInitializerAndIfToElvis")
private fun formatMissingColumnMessage(path: ColumnPath): String {
val fullPath = path.joinToString()

for (depth in path.indices) {
val currentPath = path.slice(0..depth).toPath()
val currentPathString = currentPath.joinToString()
val column = df.getColumnOrNull(currentPath)
if (column == null) {
return if (depth == 0) {
"Column '$currentPathString' not found among ${df.columnNames()}."
} else {
val parentPath = currentPath.dropLast()
val parentPathString = parentPath.joinToString()
val parentColumn = df.getColumnOrNull(parentPath)
if (parentColumn != null && parentColumn.isColumnGroup()) {
"Column '$currentPathString' not found among columns of '$parentPathString': ${parentColumn.columnNames()}."
} else {
"Column '$currentPathString' not found among ${df.columnNames()}."
}
}
}

if (depth != path.lastIndex) {
if (!column.isColumnGroup()) {
return "Column '$fullPath' cannot be resolved: '$currentPathString' is not a column group."
}
}
}
return "Column '$fullPath' not found among ${df.columnNames()}."
}

override fun getColumnOrNull(name: String) = df.getColumnOrNull(name).check(pathOf(name))

override fun getColumnOrNull(index: Int) = df.getColumnOrNull(index).check(pathOf(""))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package org.jetbrains.kotlinx.dataframe.impl.api

import org.jetbrains.kotlinx.dataframe.ColumnSelector
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.getColumnWithPath
import org.jetbrains.kotlinx.dataframe.api.isSubtypeOf
import org.jetbrains.kotlinx.dataframe.type
import kotlin.reflect.KType

@PublishedApi
internal fun <T, C> DataFrame<T>.requireImpl(column: ColumnSelector<T, C>, type: KType): DataFrame<T> {
val resolvedColumn = getColumnWithPath(column)
val actualType = resolvedColumn.data.type
require(resolvedColumn.data.isSubtypeOf(type)) {
"Column '${resolvedColumn.path.joinToString()}' has type '$actualType', which is not subtype of required '$type' type."
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

*a subtype of the required '$type' type.

}
return this
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package org.jetbrains.kotlinx.dataframe.api

import io.kotest.assertions.throwables.shouldThrow
import io.kotest.assertions.throwables.shouldThrowAny
import io.kotest.matchers.shouldBe
import org.junit.Test

class RequireTests : ColumnsSelectionDslTests() {

@Test
fun `require returns same dataframe for existing typed column`() {
val checked = df.requireColumn { "name"["firstName"]<String>() }
checked shouldBe df
}

@Test
fun `require throws on type mismatch`() {
val throwable = shouldThrow<IllegalArgumentException> {
df.requireColumn { "name"["firstName"]<Int>() }
}
throwable.message shouldBe
"Column 'name/firstName' has type 'kotlin.String', which is not subtype of required 'kotlin.Int' type."
}

@Test
fun `require throws when column cannot be resolved`() {
val exception = shouldThrowAny {
df.requireColumn { "name"["unknown"]<String>() }
}
exception.message shouldBe
"Column 'name/unknown' not found among columns of 'name': [firstName, lastName]."
}

@Test
fun `require missing parent message includes available columns`() {
val exception = shouldThrowAny {
df.requireColumn { "name2"["unknown"]<String>() }
}
exception.message shouldBe
"Column 'name2' not found among [name, age, city, weight, isHappy]."
}

@Test
fun `require deep missing parent message uses nearest existing ancestor`() {
val exception = shouldThrowAny {
df.requireColumn { "name"["unknownGroup"]["value"]<String>() }
}
exception.message shouldBe
"Column 'name/unknownGroup' not found among columns of 'name': [firstName, lastName]."
}
}
1 change: 1 addition & 0 deletions docs/StardustDocs/d.tree
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@
<toc-element topic="rename.md"/>
<toc-element topic="reorder.md"/>
<toc-element topic="replace.md"/>
<toc-element topic="require.md"/>
<toc-element topic="reverse.md"/>
<toc-element topic="select.md"/>
<toc-element topic="sliceRows.md"/>
Expand Down
1 change: 1 addition & 0 deletions docs/StardustDocs/topics/adjustSchema.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ To match your knowledge with expected real-time [`DataFrame`](DataFrame.md) cont
* [`cast`](cast.md) — change type argument of [`DataFrame`](DataFrame.md) to the expected schema without changing data in [`DataFrame`](DataFrame.md).
* [`convertTo`](convertTo.md) — convert [`DataFrame`](DataFrame.md) contents to match the expected schema.

Alternatively, use [](require.md) to incrementally add type information to compile time schema.
25 changes: 25 additions & 0 deletions docs/StardustDocs/topics/require.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[//]: # (title: requireColumn)
<!---IMPORT org.jetbrains.kotlinx.dataframe.samples.api.Require-->

Throws an exception if the specified column is missing or its type is not subtype of `C`.
From the compiler plugin perspective, a new column will appear in the compile-time schema as a result of this operation.
The aim here is to help incrementally migrate workflows to [extension properties API](extensionPropertiesApi.md).
We recommend considering declaring a [DataSchema](dataSchema.md) and use [](cast.md) or [](convertTo) if you end up with more than a few `requireColumn` calls.

Will work in compiler plugin starting from IntelliJ IDEA 2026.2 and Kotlin 2.4.0.

```text
requireColumn { column }
```

**Related operations**: [](cast.md), [](convertTo)

```kotlin
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Please add a Korro sample in "samples" module
    2)Might be a good idea to show this that peopleDf originally doesn't have EPs:
// Won't compile
peopleDf.select { name.firstName }
// Declare column with a runtime check
val df = peopleDf.require { "name"["firstName"]<String>() }
// Use extension properties after `require`
df.select { name.firstName }

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I cannot do it yet because require is not supported in compiler plugin :( But i'll do after we update to 2.4.0-RC or something

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but i'll update the code snippet

Copy link
Collaborator

@AndreiKingsley AndreiKingsley Mar 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I really didn't think about that, sorry 😄 !
But please create at least a commented function and korro marks so we don't forget and an issue.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

// Before `requireColumn` extension property will not be resolved
// peopleDf.select { name.firstName }

// Require a column with a runtime check
val df = peopleDf.requireColumn { "name"["firstName"]<String>() }
// Use extension property after `requireColumn`
val v: String = df.name.firstName[0]
```