Merge pull request #447 from datastax/wip-446-npe-when-saving-nulls

pkolaczk · pkolaczk · commit 348a22ae90c4 · 2014-11-24T12:07:21.000+01:00
Fix NPE when saving CassandraRows containing null values. Fixes #446.
diff --git a/CHANGES.txt b/CHANGES.txt
@@ -1,3 +1,5 @@
+* Fix NPE when saving CassandraRows containing null values (#446)
+
 1.1.0 rc 2
 * Added JavaTypeConverter to make is easy to implement custom TypeConverter in Java (#429)
 * Fix SparkSQL failures caused by presence of non-selected columns of UDT type in the table.
diff --git a/spark-cassandra-connector/src/it/scala/com/datastax/spark/connector/writer/TableWriterSpec.scala b/spark-cassandra-connector/src/it/scala/com/datastax/spark/connector/writer/TableWriterSpec.scala
@@ -37,6 +37,7 @@ class TableWriterSpec extends FlatSpec with Matchers with BeforeAndAfter with Sh
     session.execute("CREATE TABLE IF NOT EXISTS write_test.key_value_9 (key INT, group BIGINT, value TEXT, PRIMARY KEY (key, group))")
     session.execute("CREATE TABLE IF NOT EXISTS write_test.key_value_10 (key INT, group BIGINT, value TEXT, PRIMARY KEY (key, group))")
 
+    session.execute("CREATE TABLE IF NOT EXISTS write_test.nulls (key INT PRIMARY KEY, text_value TEXT, int_value INT)")
     session.execute("CREATE TABLE IF NOT EXISTS write_test.collections (key INT PRIMARY KEY, l list<text>, s set<text>, m map<text, text>)")
     session.execute("CREATE TABLE IF NOT EXISTS write_test.blobs (key INT PRIMARY KEY, b blob)")
     session.execute("CREATE TABLE IF NOT EXISTS write_test.counters (pkey INT, ckey INT, c1 counter, c2 counter, PRIMARY KEY (pkey, ckey))")
@@ -130,6 +131,22 @@ class TableWriterSpec extends FlatSpec with Matchers with BeforeAndAfter with Sh
     }
   }
 
+  it should "write null values" in {
+    val key = 1.asInstanceOf[AnyRef]
+    val row = new CassandraRow(IndexedSeq(key, null, null), IndexedSeq("key", "text_value", "int_value"))
+
+    sc.parallelize(Seq(row)).saveToCassandra("write_test", "nulls")
+    conn.withSessionDo { session =>
+      val result = session.execute("SELECT * FROM write_test.nulls").all()
+      result should have size 1
+      for (r <- result) {
+        r.getInt(0) shouldBe key
+        r.isNull(1) shouldBe true
+        r.isNull(2) shouldBe true
+      }
+    }
+  }
+
   it should "write only specific column data if ColumnNames is passed as 'columnNames'" in {
     val col = Seq((1, 1L, None))
     sc.parallelize(col).saveToCassandra("write_test", "key_value_8", SomeColumns("key", "group"))
diff --git a/spark-cassandra-connector/src/main/scala/com/datastax/spark/connector/CassandraRow.scala b/spark-cassandra-connector/src/main/scala/com/datastax/spark/connector/CassandraRow.scala
@@ -99,34 +99,43 @@ final class CassandraRow(data: IndexedSeq[AnyRef], columnNames: IndexedSeq[Strin
     * Looks the column up by column name. Column names are case-sensitive.*/
   def get[T](name: String)(implicit c: TypeConverter[T]): T =
     get[T](_indexOfOrThrow(name))
-  
-  /** Equivalent to `getAny` */
-  def apply(index: Int): Any = getAny(index)
-  def apply(name: String): Any = getAny(name)
-
-  def get(index: Int): AnyRef = getAnyRef(index)
-  def get(name: String): AnyRef = getAnyRef(name)
 
   /** Returns a column value without applying any conversion.
     * The underlying type is the same as the type returned by the low-level Cassandra driver.
     * May return Java null. */
-  def getAny(index: Int) = get[Any](index)
-  def getAny(name: String) = get[Any](name)
+  @deprecated("Use getRaw instead", "1.1")
+  def getAny(index: Int) = getRaw(index)
+  @deprecated("Use getRaw instead", "1.1")
+  def getAny(name: String) = getRaw(name)
 
   /** Returns a column value without applying any conversion, besides converting a null to a None.
     * The underlying type is the same as the type returned by the low-level Cassandra driver.*/
-  def getAnyOption(index: Int) = get[Option[Any]](index)
-  def getAnyOption(name: String) = get[Option[Any]](name)
+  @deprecated("Use getRaw and wrap the result in an Option instead", "1.1")
+  def getAnyOption(index: Int) = Option(getRaw(index))
+  @deprecated("Use getRaw and wrap the result in an Option instead", "1.1")
+  def getAnyOption(name: String) = Option(getRaw(name))
 
-  /** Returns a column value by index without applying any conversion.
-    * The underlying type is the same as the type returned by the low-level Cassandra driver. */
-  def getAnyRef(index: Int) = get[AnyRef](index)
-  def getAnyRef(name: String) = get[AnyRef](name)
+  /** Returns a column value without applying any conversion.
+    * The underlying type is the same as the type returned by the low-level Cassandra driver.
+    * May return Java null. */
+  @deprecated("Use getRaw instead", "1.1")
+  def getAnyRef(index: Int) = getRaw(index)
+  @deprecated("Use getRaw instead", "1.1")
+  def getAnyRef(name: String) = getRaw(name)
 
   /** Returns a column value without applying any conversion, besides converting a null to a None.
-    * The underlying type is the same as the type returned by the low-level Cassandra driver. */
-  def getAnyRefOption(index: Int) = get[Option[AnyRef]](index)
-  def getAnyRefOption(name: String) = get[Option[AnyRef]](name)
+    * The underlying type is the same as the type returned by the low-level Cassandra driver.*/
+  @deprecated("Use getRaw and wrap the result in an Option instead", "1.1")
+  def getAnyRefOption(index: Int) = Option(getRaw(index))
+  @deprecated("Use getRaw and wrap the result in an Option instead", "1.1")
+  def getAnyRefOption(name: String) = Option(getRaw(name))
+
+  /** Returns a column value by index without applying any conversion.
+    * The underlying type is the same as the type returned by the low-level Cassandra driver,
+    * is implementation defined and may change in the future.
+    * Cassandra nulls are returned as Scala nulls. */
+  def getRaw(index: Int): AnyRef = data(index)
+  def getRaw(name: String): AnyRef = data(_indexOfOrThrow(name))
 
   /** Returns a `bool` column value. Besides working with `bool` Cassandra type, it can also read
     * numbers and strings. Non-zero numbers are converted to `true`, zero is converted to `false`.
diff --git a/spark-cassandra-connector/src/main/scala/com/datastax/spark/connector/types/CollectionColumnType.scala b/spark-cassandra-connector/src/main/scala/com/datastax/spark/connector/types/CollectionColumnType.scala
@@ -35,7 +35,7 @@ case class SetType[T](elemType: ColumnType[T]) extends CollectionColumnType[Set[
 
 case class MapType[K, V](keyType: ColumnType[K], valueType: ColumnType[V]) extends CollectionColumnType[Map[K, V]] {
   @transient
-  lazy val converterToCassandra: TypeConverter[_] =
+  lazy val converterToCassandra =
     new OptionToNullConverter(
       TypeConverter.javaHashMapConverter(keyType.converterToCassandra, valueType.converterToCassandra))
 
diff --git a/spark-cassandra-connector/src/main/scala/com/datastax/spark/connector/types/ColumnType.scala b/spark-cassandra-connector/src/main/scala/com/datastax/spark/connector/types/ColumnType.scala
@@ -11,7 +11,7 @@ trait ColumnType[T] extends Serializable {
 
   /** Returns a converter that converts values to the type of this column expected by the
     * Cassandra Java driver when saving the row.*/
-  def converterToCassandra: TypeConverter[_]
+  def converterToCassandra: TypeConverter[_ <: AnyRef]
 
   /** Returns a converter that converts values to the Scala type associated with this column. */
   lazy val converterToScala: TypeConverter[T] =
diff --git a/spark-cassandra-connector/src/main/scala/com/datastax/spark/connector/types/TypeConverter.scala b/spark-cassandra-connector/src/main/scala/com/datastax/spark/connector/types/TypeConverter.scala
@@ -631,14 +631,13 @@ object TypeConverter {
     new JavaHashMapConverter[K, V]
 
   /** Converts Scala Options to Java nullable references. Used when saving data to Cassandra. */
-  class OptionToNullConverter(nestedConverter: TypeConverter[_]) extends TypeConverter[AnyRef] {
+  class OptionToNullConverter(nestedConverter: TypeConverter[_]) extends NullableTypeConverter[AnyRef] {
 
     def targetTypeTag = implicitly[TypeTag[AnyRef]]
 
     def convertPF = {
       case Some(x) => nestedConverter.convert(x).asInstanceOf[AnyRef]
       case None => null
-      case null => null
       case x => nestedConverter.convert(x).asInstanceOf[AnyRef]
     }
   }
diff --git a/spark-cassandra-connector/src/main/scala/com/datastax/spark/connector/writer/GenericRowWriter.scala b/spark-cassandra-connector/src/main/scala/com/datastax/spark/connector/writer/GenericRowWriter.scala
@@ -11,8 +11,8 @@ class GenericRowWriter(table: TableDef, selectedColumns: Seq[String])
     val index = data.indexOf(columnName)
     if (index >= 0) {
       val converter = table.columnByName(columnName).columnType.converterToCassandra
-      val value = data.get[AnyRef](index)
-      converter.convert(value).asInstanceOf[AnyRef]
+      val value = data.getRaw(index)
+      converter.convert(value)
     }
     else
       null

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+* Fix NPE when saving CassandraRows containing null values (#446)`
	`2`	`+`
`1`	`3`	`1.1.0 rc 2`
`2`	`4`	`* Added JavaTypeConverter to make is easy to implement custom TypeConverter in Java (#429)`
`3`	`5`	`* Fix SparkSQL failures caused by presence of non-selected columns of UDT type in the table.`
Original file line number	Diff line number	Diff line change
`@@ -631,14 +631,13 @@ object TypeConverter {`
`631`	`631`	`new JavaHashMapConverter[K, V]`
`632`	`632`
`633`	`633`	`/** Converts Scala Options to Java nullable references. Used when saving data to Cassandra. */`
`634`		`- class OptionToNullConverter(nestedConverter: TypeConverter[_]) extends TypeConverter[AnyRef] {`
	`634`	`+ class OptionToNullConverter(nestedConverter: TypeConverter[_]) extends NullableTypeConverter[AnyRef] {`
`635`	`635`
`636`	`636`	`def targetTypeTag = implicitly[TypeTag[AnyRef]]`
`637`	`637`
`638`	`638`	`def convertPF = {`
`639`	`639`	`case Some(x) => nestedConverter.convert(x).asInstanceOf[AnyRef]`
`640`	`640`	`case None => null`
`641`		`- case null => null`
`642`	`641`	`case x => nestedConverter.convert(x).asInstanceOf[AnyRef]`
`643`	`642`	`}`
`644`	`643`	`}`