Skip to content

Commit 8db279f

Browse files
[SPARK-51830] Exception handling for partition datatype conversion call
1 parent ed702c0 commit 8db279f

File tree

2 files changed

+38
-5
lines changed

2 files changed

+38
-5
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala

+15-5
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Literal}
3838
import org.apache.spark.sql.catalyst.types.DataTypeUtils
3939
import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateFormatter, DateTimeUtils, TimeFormatter, TimestampFormatter}
4040
import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
41+
import org.apache.spark.sql.internal.SQLConf
4142
import org.apache.spark.sql.types._
4243
import org.apache.spark.sql.util.SchemaUtils
4344
import org.apache.spark.unsafe.types.UTF8String
@@ -361,12 +362,21 @@ object PartitioningUtils extends SQLConfHelper {
361362
}.mkString("/")
362363
}
363364

364-
def removeLeadingZerosFromNumberTypePartition(value: String, dataType: DataType): String =
365-
dataType match {
366-
case ByteType | ShortType | IntegerType | LongType | FloatType | DoubleType =>
367-
Option(castPartValueToDesiredType(dataType, value, null)).map(_.toString).orNull
368-
case _ => value
365+
def removeLeadingZerosFromNumberTypePartition(value: String, dataType: DataType): String = {
366+
try {
367+
dataType match {
368+
case ByteType | ShortType | IntegerType | LongType | FloatType | DoubleType =>
369+
Option(castPartValueToDesiredType(dataType, value, null)).map(_.toString).orNull
370+
case _ => value
371+
}
372+
} catch {
373+
case NonFatal(_) =>
374+
if (SQLConf.get.validatePartitionColumns) {
375+
throw QueryExecutionErrors.failedToCastValueToDataTypeForPartitionColumnError(
376+
value, dataType, null)
377+
} else value
369378
}
379+
}
370380

371381
def getPathFragment(spec: TablePartitionSpec, partitionColumns: Seq[Attribute]): String = {
372382
getPathFragment(spec, DataTypeUtils.fromAttributes(partitionColumns))

sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala

+23
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils
3434
import org.apache.spark.sql.catalyst.types.DataTypeUtils
3535
import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimeFormatter, TimestampFormatter}
3636
import org.apache.spark.sql.catalyst.util.DateTimeUtils.localDateTimeToMicros
37+
import org.apache.spark.sql.errors.QueryExecutionErrors
3738
import org.apache.spark.sql.execution.datasources._
3839
import org.apache.spark.sql.execution.datasources.{PartitionPath => Partition}
3940
import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileTable}
@@ -1405,6 +1406,28 @@ class ParquetV2PartitionDiscoverySuite extends ParquetPartitionDiscoverySuite {
14051406
assert("p_int=10/p_float=1.0" === path)
14061407
}
14071408

1409+
test("SPARK-51830 handle exception and validate partition column as true") {
1410+
SQLConf.get.setConf(SQLConf.VALIDATE_PARTITION_COLUMNS, true)
1411+
val spec = Map("p_int" -> "not_a_number")
1412+
val schema = new StructType().add("p_int", "int")
1413+
val exception = intercept[Exception] {
1414+
PartitioningUtils.getPathFragment(spec, schema)
1415+
}
1416+
assert(
1417+
exception.isInstanceOf[
1418+
QueryExecutionErrors.FailedToCastValueToDataTypeForPartitionColumnError
1419+
]
1420+
)
1421+
}
1422+
1423+
test("SPARK-51830 handle exception and validate partition column as false") {
1424+
SQLConf.get.setConf(SQLConf.VALIDATE_PARTITION_COLUMNS, false)
1425+
val spec = Map("p_int" -> "not_a_number")
1426+
val schema = new StructType().add("p_int", "int")
1427+
val path = PartitioningUtils.getPathFragment(spec, schema)
1428+
assert("p_int=not_a_number" === path)
1429+
}
1430+
14081431
test("SPARK-39417: Null partition value") {
14091432
// null partition value is replaced by DEFAULT_PARTITION_NAME before hitting getPathFragment.
14101433
val spec = Map("p_int"-> ExternalCatalogUtils.DEFAULT_PARTITION_NAME)

0 commit comments

Comments
 (0)