Skip to content
This repository was archived by the owner on Oct 23, 2024. It is now read-only.

Commit 6b568d7

Browse files
MaxGekkcloud-fan
authored andcommitted
[SPARK-31237][SQL][TESTS] Replace 3-letter time zones by zone offsets
In the PR, I propose to add a few `ZoneId` constant values to the `DateTimeTestUtils` object, and reuse the constants in tests. Proposed the following constants: - PST = -08:00 - UTC = +00:00 - CEST = +02:00 - CET = +01:00 - JST = +09:00 - MIT = -09:30 - LA = America/Los_Angeles All proposed constant values (except `LA`) are initialized by zone offsets according to their definitions. This will allow to avoid: - Using of 3-letter time zones that have been already deprecated in JDK, see _Three-letter time zone IDs_ in https://docs.oracle.com/javase/8/docs/api/java/util/TimeZone.html - Incorrect mapping of 3-letter time zones to zone offsets, see SPARK-31237. For example, `PST` is mapped to `America/Los_Angeles` instead of the `-08:00` zone offset. Also this should improve stability and maintainability of test suites. No By running affected test suites. Closes apache#28001 from MaxGekk/replace-pst. Authored-by: Maxim Gekk <[email protected]> Signed-off-by: Wenchen Fan <[email protected]> (cherry picked from commit cec9604) Signed-off-by: Wenchen Fan <[email protected]>
1 parent a9a185c commit 6b568d7

22 files changed

+346
-340
lines changed

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ import org.apache.spark.sql.types._
2828
class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
2929

3030
test("String fields types are inferred correctly from null types") {
31-
val options = new CSVOptions(Map("timestampFormat" -> "yyyy-MM-dd HH:mm:ss"), false, "GMT")
31+
val options = new CSVOptions(Map("timestampFormat" -> "yyyy-MM-dd HH:mm:ss"), false, "UTC")
3232
val inferSchema = new CSVInferSchema(options)
3333

3434
assert(inferSchema.inferField(NullType, "") == NullType)
@@ -48,7 +48,7 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
4848
}
4949

5050
test("String fields types are inferred correctly from other types") {
51-
val options = new CSVOptions(Map("timestampFormat" -> "yyyy-MM-dd HH:mm:ss"), false, "GMT")
51+
val options = new CSVOptions(Map("timestampFormat" -> "yyyy-MM-dd HH:mm:ss"), false, "UTC")
5252
val inferSchema = new CSVInferSchema(options)
5353

5454
assert(inferSchema.inferField(LongType, "1.0") == DoubleType)
@@ -69,18 +69,18 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
6969
}
7070

7171
test("Timestamp field types are inferred correctly via custom data format") {
72-
var options = new CSVOptions(Map("timestampFormat" -> "yyyy-mm"), false, "GMT")
72+
var options = new CSVOptions(Map("timestampFormat" -> "yyyy-mm"), false, "UTC")
7373
var inferSchema = new CSVInferSchema(options)
7474

7575
assert(inferSchema.inferField(TimestampType, "2015-08") == TimestampType)
7676

77-
options = new CSVOptions(Map("timestampFormat" -> "yyyy"), false, "GMT")
77+
options = new CSVOptions(Map("timestampFormat" -> "yyyy"), false, "UTC")
7878
inferSchema = new CSVInferSchema(options)
7979
assert(inferSchema.inferField(TimestampType, "2015") == TimestampType)
8080
}
8181

8282
test("Timestamp field types are inferred correctly from other types") {
83-
val options = new CSVOptions(Map.empty[String, String], false, "GMT")
83+
val options = new CSVOptions(Map.empty[String, String], false, "UTC")
8484
val inferSchema = new CSVInferSchema(options)
8585

8686
assert(inferSchema.inferField(IntegerType, "2015-08-20 14") == StringType)
@@ -89,15 +89,15 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
8989
}
9090

9191
test("Boolean fields types are inferred correctly from other types") {
92-
val options = new CSVOptions(Map.empty[String, String], false, "GMT")
92+
val options = new CSVOptions(Map.empty[String, String], false, "UTC")
9393
val inferSchema = new CSVInferSchema(options)
9494

9595
assert(inferSchema.inferField(LongType, "Fale") == StringType)
9696
assert(inferSchema.inferField(DoubleType, "TRUEe") == StringType)
9797
}
9898

9999
test("Type arrays are merged to highest common type") {
100-
val options = new CSVOptions(Map.empty[String, String], false, "GMT")
100+
val options = new CSVOptions(Map.empty[String, String], false, "UTC")
101101
val inferSchema = new CSVInferSchema(options)
102102

103103
assert(
@@ -112,14 +112,14 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
112112
}
113113

114114
test("Null fields are handled properly when a nullValue is specified") {
115-
var options = new CSVOptions(Map("nullValue" -> "null"), false, "GMT")
115+
var options = new CSVOptions(Map("nullValue" -> "null"), false, "UTC")
116116
var inferSchema = new CSVInferSchema(options)
117117

118118
assert(inferSchema.inferField(NullType, "null") == NullType)
119119
assert(inferSchema.inferField(StringType, "null") == StringType)
120120
assert(inferSchema.inferField(LongType, "null") == LongType)
121121

122-
options = new CSVOptions(Map("nullValue" -> "\\N"), false, "GMT")
122+
options = new CSVOptions(Map("nullValue" -> "\\N"), false, "UTC")
123123
inferSchema = new CSVInferSchema(options)
124124

125125
assert(inferSchema.inferField(IntegerType, "\\N") == IntegerType)
@@ -130,22 +130,22 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
130130
}
131131

132132
test("Merging Nulltypes should yield Nulltype.") {
133-
val options = new CSVOptions(Map.empty[String, String], false, "GMT")
133+
val options = new CSVOptions(Map.empty[String, String], false, "UTC")
134134
val inferSchema = new CSVInferSchema(options)
135135

136136
val mergedNullTypes = inferSchema.mergeRowTypes(Array(NullType), Array(NullType))
137137
assert(mergedNullTypes.sameElements(Array(NullType)))
138138
}
139139

140140
test("SPARK-18433: Improve DataSource option keys to be more case-insensitive") {
141-
val options = new CSVOptions(Map("TiMeStampFormat" -> "yyyy-mm"), false, "GMT")
141+
val options = new CSVOptions(Map("TiMeStampFormat" -> "yyyy-mm"), false, "UTC")
142142
val inferSchema = new CSVInferSchema(options)
143143

144144
assert(inferSchema.inferField(TimestampType, "2015-08") == TimestampType)
145145
}
146146

147147
test("SPARK-18877: `inferField` on DecimalType should find a common type with `typeSoFar`") {
148-
val options = new CSVOptions(Map.empty[String, String], false, "GMT")
148+
val options = new CSVOptions(Map.empty[String, String], false, "UTC")
149149
val inferSchema = new CSVInferSchema(options)
150150

151151
withSQLConf(SQLConf.LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED.key -> "true") {
@@ -166,7 +166,7 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
166166

167167
test("DoubleType should be inferred when user defined nan/inf are provided") {
168168
val options = new CSVOptions(Map("nanValue" -> "nan", "negativeInf" -> "-inf",
169-
"positiveInf" -> "inf"), false, "GMT")
169+
"positiveInf" -> "inf"), false, "UTC")
170170
val inferSchema = new CSVInferSchema(options)
171171

172172
assert(inferSchema.inferField(NullType, "nan") == DoubleType)
@@ -179,7 +179,7 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
179179
val options = new CSVOptions(
180180
parameters = Map("locale" -> langTag, "inferSchema" -> "true", "sep" -> "|"),
181181
columnPruning = false,
182-
defaultTimeZoneId = "GMT")
182+
defaultTimeZoneId = "UTC")
183183
val inferSchema = new CSVInferSchema(options)
184184

185185
val df = new DecimalFormat("", new DecimalFormatSymbols(Locale.forLanguageTag(langTag)))

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package org.apache.spark.sql.catalyst.csv
1919

2020
import java.math.BigDecimal
2121
import java.text.{DecimalFormat, DecimalFormatSymbols}
22-
import java.time.ZoneOffset
2322
import java.util.{Locale, TimeZone}
2423

2524
import org.apache.commons.lang3.time.FastDateFormat
@@ -44,7 +43,7 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
4443

4544
stringValues.zip(decimalValues).foreach { case (strVal, decimalVal) =>
4645
val decimalValue = new BigDecimal(decimalVal.toString)
47-
val options = new CSVOptions(Map.empty[String, String], false, "GMT")
46+
val options = new CSVOptions(Map.empty[String, String], false, "UTC")
4847
val parser = new UnivocityParser(StructType(Seq.empty), options)
4948
assert(parser.makeConverter("_1", decimalType).apply(strVal) ===
5049
Decimal(decimalValue, decimalType.precision, decimalType.scale))
@@ -58,22 +57,22 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
5857
// Nullable field with nullValue option.
5958
types.foreach { t =>
6059
// Tests that a custom nullValue.
61-
val nullValueOptions = new CSVOptions(Map("nullValue" -> "-"), false, "GMT")
60+
val nullValueOptions = new CSVOptions(Map("nullValue" -> "-"), false, "UTC")
6261
var parser = new UnivocityParser(StructType(Seq.empty), nullValueOptions)
6362
val converter = parser.makeConverter("_1", t, nullable = true)
6463
assertNull(converter.apply("-"))
6564
assertNull(converter.apply(null))
6665

6766
// Tests that the default nullValue is empty string.
68-
val options = new CSVOptions(Map.empty[String, String], false, "GMT")
67+
val options = new CSVOptions(Map.empty[String, String], false, "UTC")
6968
parser = new UnivocityParser(StructType(Seq.empty), options)
7069
assertNull(parser.makeConverter("_1", t, nullable = true).apply(""))
7170
}
7271

7372
// Not nullable field with nullValue option.
7473
types.foreach { t =>
7574
// Casts a null to not nullable field should throw an exception.
76-
val options = new CSVOptions(Map("nullValue" -> "-"), false, "GMT")
75+
val options = new CSVOptions(Map("nullValue" -> "-"), false, "UTC")
7776
val parser = new UnivocityParser(StructType(Seq.empty), options)
7877
val converter = parser.makeConverter("_1", t, nullable = false)
7978
var message = intercept[RuntimeException] {
@@ -89,15 +88,15 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
8988
// If nullValue is different with empty string, then, empty string should not be casted into
9089
// null.
9190
Seq(true, false).foreach { b =>
92-
val options = new CSVOptions(Map("nullValue" -> "null"), false, "GMT")
91+
val options = new CSVOptions(Map("nullValue" -> "null"), false, "UTC")
9392
val parser = new UnivocityParser(StructType(Seq.empty), options)
9493
val converter = parser.makeConverter("_1", StringType, nullable = b)
9594
assert(converter.apply("") == UTF8String.fromString(""))
9695
}
9796
}
9897

9998
test("Throws exception for empty string with non null type") {
100-
val options = new CSVOptions(Map.empty[String, String], false, "GMT")
99+
val options = new CSVOptions(Map.empty[String, String], false, "UTC")
101100
val parser = new UnivocityParser(StructType(Seq.empty), options)
102101
val exception = intercept[RuntimeException]{
103102
parser.makeConverter("_1", IntegerType, nullable = false).apply("")
@@ -106,7 +105,7 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
106105
}
107106

108107
test("Types are cast correctly") {
109-
val options = new CSVOptions(Map.empty[String, String], false, "GMT")
108+
val options = new CSVOptions(Map.empty[String, String], false, "UTC")
110109
var parser = new UnivocityParser(StructType(Seq.empty), options)
111110
assert(parser.makeConverter("_1", ByteType).apply("10") == 10)
112111
assert(parser.makeConverter("_1", ShortType).apply("10") == 10)
@@ -117,7 +116,7 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
117116
assert(parser.makeConverter("_1", BooleanType).apply("true") == true)
118117

119118
var timestampsOptions =
120-
new CSVOptions(Map("timestampFormat" -> "dd/MM/yyyy HH:mm"), false, "GMT")
119+
new CSVOptions(Map("timestampFormat" -> "dd/MM/yyyy HH:mm"), false, "UTC")
121120
parser = new UnivocityParser(StructType(Seq.empty), timestampsOptions)
122121
val customTimestamp = "31/01/2015 00:00"
123122
var format = FastDateFormat.getInstance(
@@ -130,7 +129,7 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
130129
assert(castedTimestamp == expectedTime * 1000L)
131130

132131
val customDate = "31/01/2015"
133-
val dateOptions = new CSVOptions(Map("dateFormat" -> "dd/MM/yyyy"), false, "GMT")
132+
val dateOptions = new CSVOptions(Map("dateFormat" -> "dd/MM/yyyy"), false, "UTC")
134133
parser = new UnivocityParser(StructType(Seq.empty), dateOptions)
135134
format = FastDateFormat.getInstance(
136135
dateOptions.dateFormat,
@@ -139,7 +138,7 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
139138
val expectedDate = format.parse(customDate).getTime
140139
val castedDate = parser.makeConverter("_1", DateType, nullable = true)
141140
.apply(customDate)
142-
assert(castedDate == DateTimeUtils.millisToDays(expectedDate, ZoneOffset.UTC))
141+
assert(castedDate == DateTimeUtils.millisToDays(expectedDate, UTC))
143142

144143
val timestamp = "2015-01-01 00:00:00"
145144
timestampsOptions = new CSVOptions(Map(
@@ -154,7 +153,7 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
154153
}
155154

156155
test("Throws exception for casting an invalid string to Float and Double Types") {
157-
val options = new CSVOptions(Map.empty[String, String], false, "GMT")
156+
val options = new CSVOptions(Map.empty[String, String], false, "UTC")
158157
val parser = new UnivocityParser(StructType(Seq.empty), options)
159158
val types = Seq(DoubleType, FloatType)
160159
val input = Seq("10u000", "abc", "1 2/3")
@@ -169,7 +168,7 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
169168
}
170169

171170
test("Float NaN values are parsed correctly") {
172-
val options = new CSVOptions(Map("nanValue" -> "nn"), false, "GMT")
171+
val options = new CSVOptions(Map("nanValue" -> "nn"), false, "UTC")
173172
val parser = new UnivocityParser(StructType(Seq.empty), options)
174173
val floatVal: Float = parser.makeConverter(
175174
"_1", FloatType, nullable = true).apply("nn").asInstanceOf[Float]
@@ -180,7 +179,7 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
180179
}
181180

182181
test("Double NaN values are parsed correctly") {
183-
val options = new CSVOptions(Map("nanValue" -> "-"), false, "GMT")
182+
val options = new CSVOptions(Map("nanValue" -> "-"), false, "UTC")
184183
val parser = new UnivocityParser(StructType(Seq.empty), options)
185184
val doubleVal: Double = parser.makeConverter(
186185
"_1", DoubleType, nullable = true).apply("-").asInstanceOf[Double]
@@ -189,14 +188,14 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
189188
}
190189

191190
test("Float infinite values can be parsed") {
192-
val negativeInfOptions = new CSVOptions(Map("negativeInf" -> "max"), false, "GMT")
191+
val negativeInfOptions = new CSVOptions(Map("negativeInf" -> "max"), false, "UTC")
193192
var parser = new UnivocityParser(StructType(Seq.empty), negativeInfOptions)
194193
val floatVal1 = parser.makeConverter(
195194
"_1", FloatType, nullable = true).apply("max").asInstanceOf[Float]
196195

197196
assert(floatVal1 == Float.NegativeInfinity)
198197

199-
val positiveInfOptions = new CSVOptions(Map("positiveInf" -> "max"), false, "GMT")
198+
val positiveInfOptions = new CSVOptions(Map("positiveInf" -> "max"), false, "UTC")
200199
parser = new UnivocityParser(StructType(Seq.empty), positiveInfOptions)
201200
val floatVal2 = parser.makeConverter(
202201
"_1", FloatType, nullable = true).apply("max").asInstanceOf[Float]
@@ -205,14 +204,14 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
205204
}
206205

207206
test("Double infinite values can be parsed") {
208-
val negativeInfOptions = new CSVOptions(Map("negativeInf" -> "max"), false, "GMT")
207+
val negativeInfOptions = new CSVOptions(Map("negativeInf" -> "max"), false, "UTC")
209208
var parser = new UnivocityParser(StructType(Seq.empty), negativeInfOptions)
210209
val doubleVal1 = parser.makeConverter(
211210
"_1", DoubleType, nullable = true).apply("max").asInstanceOf[Double]
212211

213212
assert(doubleVal1 == Double.NegativeInfinity)
214213

215-
val positiveInfOptions = new CSVOptions(Map("positiveInf" -> "max"), false, "GMT")
214+
val positiveInfOptions = new CSVOptions(Map("positiveInf" -> "max"), false, "UTC")
216215
parser = new UnivocityParser(StructType(Seq.empty), positiveInfOptions)
217216
val doubleVal2 = parser.makeConverter(
218217
"_1", DoubleType, nullable = true).apply("max").asInstanceOf[Double]
@@ -228,7 +227,7 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
228227
val df = new DecimalFormat("", new DecimalFormatSymbols(Locale.forLanguageTag(langTag)))
229228
val input = df.format(expected.toBigDecimal)
230229

231-
val options = new CSVOptions(Map("locale" -> langTag), false, "GMT")
230+
val options = new CSVOptions(Map("locale" -> langTag), false, "UTC")
232231
val parser = new UnivocityParser(new StructType().add("d", decimalType), options)
233232

234233
assert(parser.makeConverter("_1", decimalType).apply(input) === expected)
@@ -263,7 +262,7 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
263262
val input = "name\t42"
264263
val expected = UTF8String.fromString(input)
265264

266-
val options = new CSVOptions(Map.empty[String, String], false, "GMT")
265+
val options = new CSVOptions(Map.empty[String, String], false, "UTC")
267266
val parser = new UnivocityParser(StructType(Seq.empty), options)
268267

269268
val convertedValue = parser.makeConverter("_1", StringBasedUDT, nullable = false).apply(input)
@@ -280,7 +279,7 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
280279
filters: Seq[Filter],
281280
expected: Option[InternalRow]): Unit = {
282281
Seq(false, true).foreach { columnPruning =>
283-
val options = new CSVOptions(Map.empty[String, String], columnPruning, "GMT")
282+
val options = new CSVOptions(Map.empty[String, String], columnPruning, "UTC")
284283
val parser = new UnivocityParser(dataSchema, requiredSchema, options, filters)
285284
val actual = parser.parse(input)
286285
assert(actual === expected)
@@ -355,8 +354,8 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
355354
val options = new CSVOptions(Map.empty[String, String], false, "UTC")
356355
check(new UnivocityParser(StructType(Seq.empty), options))
357356

358-
val optionsWithPattern =
359-
new CSVOptions(Map("timestampFormat" -> "invalid", "dateFormat" -> "invalid"), false, "UTC")
357+
val optionsWithPattern = new CSVOptions(
358+
Map("timestampFormat" -> "invalid", "dateFormat" -> "invalid"), false, "UTC")
360359
check(new UnivocityParser(StructType(Seq.empty), optionsWithPattern))
361360
}
362361
}

0 commit comments

Comments
 (0)