-
Notifications
You must be signed in to change notification settings - Fork 28.5k
[SPARK-51834][SQL] Support end-to-end table constraint management #50631
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
f73411b
259de54
3ab17fc
2fac017
76508df
a6507e0
b78c4f0
d54a64f
0e4b88a
aa2b99b
f4932ab
d284204
722d0a1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,7 +18,7 @@ | |
package org.apache.spark.sql.catalyst.analysis | ||
|
||
import org.apache.spark.SparkThrowable | ||
import org.apache.spark.sql.catalyst.expressions.{Expression, Literal} | ||
import org.apache.spark.sql.catalyst.expressions._ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the agreement in the community on wildcard imports? Are they permitted after a given number of elements are imported directly? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As per https://github.com/databricks/scala-style-guide?tab=readme-ov-file#imports, |
||
import org.apache.spark.sql.catalyst.optimizer.ComputeCurrentTime | ||
import org.apache.spark.sql.catalyst.plans.logical._ | ||
import org.apache.spark.sql.catalyst.rules.Rule | ||
|
@@ -61,7 +61,7 @@ object ResolveTableSpec extends Rule[LogicalPlan] { | |
input: LogicalPlan, | ||
tableSpec: TableSpecBase, | ||
withNewSpec: TableSpecBase => LogicalPlan): LogicalPlan = tableSpec match { | ||
case u: UnresolvedTableSpec if u.optionExpression.resolved => | ||
case u: UnresolvedTableSpec if u.childrenResolved => | ||
val newOptions: Seq[(String, String)] = u.optionExpression.options.map { | ||
case (key: String, null) => | ||
(key, null) | ||
|
@@ -86,6 +86,18 @@ object ResolveTableSpec extends Rule[LogicalPlan] { | |
} | ||
(key, newValue) | ||
} | ||
|
||
u.constraints.foreach { | ||
case check: CheckConstraint => | ||
if (!check.child.deterministic) { | ||
check.child.failAnalysis( | ||
errorClass = "NON_DETERMINISTIC_CHECK_CONSTRAINT", | ||
messageParameters = Map("checkCondition" -> check.condition) | ||
) | ||
} | ||
case _ => | ||
} | ||
|
||
val newTableSpec = TableSpec( | ||
properties = u.properties, | ||
provider = u.provider, | ||
|
@@ -94,7 +106,8 @@ object ResolveTableSpec extends Rule[LogicalPlan] { | |
comment = u.comment, | ||
collation = u.collation, | ||
serde = u.serde, | ||
external = u.external) | ||
external = u.external, | ||
constraints = u.constraints.map(_.toV2Constraint(isCreateTable = true))) | ||
withNewSpec(newTableSpec) | ||
case _ => | ||
input | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,11 +18,17 @@ package org.apache.spark.sql.catalyst.expressions | |
|
||
import java.util.UUID | ||
|
||
import org.apache.spark.SparkUnsupportedOperationException | ||
import org.apache.spark.sql.catalyst.parser.ParseException | ||
import org.apache.spark.sql.catalyst.trees.CurrentOrigin | ||
import org.apache.spark.sql.types.{DataType, StringType} | ||
import org.apache.spark.sql.catalyst.util.V2ExpressionBuilder | ||
import org.apache.spark.sql.connector.catalog.constraints.Constraint | ||
import org.apache.spark.sql.connector.expressions.FieldReference | ||
import org.apache.spark.sql.types.DataType | ||
|
||
trait TableConstraint { | ||
trait TableConstraint extends Expression with Unevaluable { | ||
// Convert to a data source v2 constraint | ||
def toV2Constraint(isCreateTable: Boolean): Constraint | ||
|
||
/** Returns the user-provided name of the constraint */ | ||
def userProvidedName: String | ||
|
@@ -92,6 +98,11 @@ trait TableConstraint { | |
) | ||
} | ||
} | ||
|
||
override def nullable: Boolean = true | ||
|
||
override def dataType: DataType = | ||
throw new SparkUnsupportedOperationException("CONSTRAINT_DOES_NOT_HAVE_DATA_TYPE") | ||
} | ||
|
||
case class ConstraintCharacteristic(enforced: Option[Boolean], rely: Option[Boolean]) | ||
|
@@ -108,10 +119,30 @@ case class CheckConstraint( | |
override val tableName: String = null, | ||
override val userProvidedCharacteristic: ConstraintCharacteristic = ConstraintCharacteristic.empty) | ||
extends UnaryExpression | ||
with Unevaluable | ||
with TableConstraint { | ||
// scalastyle:on line.size.limit | ||
|
||
def toV2Constraint(isCreateTable: Boolean): Constraint = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if the input param should be related to the validation status, rather than to whether it is create or alter. For instance, we can make validation optional in ALTER. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, how about let's make all the validate status as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Makes sense to me. |
||
val predicate = new V2ExpressionBuilder(child, true).buildPredicate().orNull | ||
val enforced = userProvidedCharacteristic.enforced.getOrElse(true) | ||
val rely = userProvidedCharacteristic.rely.getOrElse(false) | ||
// The validation status is set to UNVALIDATED for create table and | ||
// VALID for alter table. | ||
val validateStatus = if (isCreateTable) { | ||
Constraint.ValidationStatus.UNVALIDATED | ||
} else { | ||
Constraint.ValidationStatus.VALID | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the idea here that we always validate existing data in ALTER? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes for check constraint |
||
} | ||
Constraint | ||
.check(name) | ||
.predicateSql(condition) | ||
.predicate(predicate) | ||
.rely(rely) | ||
.enforced(enforced) | ||
.validationStatus(validateStatus) | ||
.build() | ||
} | ||
|
||
override protected def withNewChildInternal(newChild: Expression): Expression = | ||
copy(child = newChild) | ||
|
||
|
@@ -121,8 +152,6 @@ case class CheckConstraint( | |
|
||
override def sql: String = s"CONSTRAINT $userProvidedName CHECK ($condition)" | ||
|
||
override def dataType: DataType = StringType | ||
|
||
override def withUserProvidedName(name: String): TableConstraint = copy(userProvidedName = name) | ||
|
||
override def withTableName(tableName: String): TableConstraint = copy(tableName = tableName) | ||
|
@@ -137,9 +166,20 @@ case class PrimaryKeyConstraint( | |
override val userProvidedName: String = null, | ||
override val tableName: String = null, | ||
override val userProvidedCharacteristic: ConstraintCharacteristic = ConstraintCharacteristic.empty) | ||
extends TableConstraint { | ||
extends LeafExpression with TableConstraint { | ||
// scalastyle:on line.size.limit | ||
|
||
override def toV2Constraint(isCreateTable: Boolean): Constraint = { | ||
val enforced = userProvidedCharacteristic.enforced.getOrElse(false) | ||
val rely = userProvidedCharacteristic.rely.getOrElse(false) | ||
Constraint | ||
.primaryKey(name, columns.map(FieldReference.column).toArray) | ||
.rely(rely) | ||
.enforced(enforced) | ||
.validationStatus(Constraint.ValidationStatus.UNVALIDATED) | ||
.build() | ||
} | ||
|
||
override protected def generateName(tableName: String): String = s"${tableName}_pk" | ||
|
||
override def withUserProvidedName(name: String): TableConstraint = copy(userProvidedName = name) | ||
|
@@ -158,9 +198,20 @@ case class UniqueConstraint( | |
override val userProvidedName: String = null, | ||
override val tableName: String = null, | ||
override val userProvidedCharacteristic: ConstraintCharacteristic = ConstraintCharacteristic.empty) | ||
extends TableConstraint { | ||
extends LeafExpression with TableConstraint { | ||
// scalastyle:on line.size.limit | ||
|
||
override def toV2Constraint(isCreateTable: Boolean): Constraint = { | ||
val enforced = userProvidedCharacteristic.enforced.getOrElse(false) | ||
val rely = userProvidedCharacteristic.rely.getOrElse(false) | ||
Constraint | ||
.unique(name, columns.map(FieldReference.column).toArray) | ||
.rely(rely) | ||
.enforced(enforced) | ||
.validationStatus(Constraint.ValidationStatus.UNVALIDATED) | ||
.build() | ||
} | ||
|
||
override protected def generateName(tableName: String): String = { | ||
s"${tableName}_uniq_$randomSuffix" | ||
} | ||
|
@@ -183,9 +234,25 @@ case class ForeignKeyConstraint( | |
override val userProvidedName: String = null, | ||
override val tableName: String = null, | ||
override val userProvidedCharacteristic: ConstraintCharacteristic = ConstraintCharacteristic.empty) | ||
extends TableConstraint { | ||
extends LeafExpression with TableConstraint { | ||
// scalastyle:on line.size.limit | ||
|
||
import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ | ||
|
||
override def toV2Constraint(isCreateTable: Boolean): Constraint = { | ||
val enforced = userProvidedCharacteristic.enforced.getOrElse(false) | ||
val rely = userProvidedCharacteristic.rely.getOrElse(false) | ||
Constraint | ||
.foreignKey(name, | ||
childColumns.map(FieldReference.column).toArray, | ||
parentTableId.asIdentifier, | ||
parentColumns.map(FieldReference.column).toArray) | ||
.rely(rely) | ||
.enforced(enforced) | ||
.validationStatus(Constraint.ValidationStatus.UNVALIDATED) | ||
.build() | ||
} | ||
|
||
override protected def generateName(tableName: String): String = | ||
s"${tableName}_${parentTableId.last}_fk_$randomSuffix" | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,7 +17,7 @@ | |
|
||
package org.apache.spark.sql.catalyst.plans.logical | ||
|
||
import org.apache.spark.sql.catalyst.analysis.{FieldName, FieldPosition, ResolvedFieldName, UnresolvedException} | ||
import org.apache.spark.sql.catalyst.analysis.{FieldName, FieldPosition, ResolvedFieldName, ResolvedTable, UnresolvedException} | ||
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec | ||
import org.apache.spark.sql.catalyst.catalog.ClusterBySpec | ||
import org.apache.spark.sql.catalyst.expressions.{Expression, TableConstraint, Unevaluable} | ||
|
@@ -295,7 +295,16 @@ case class AlterTableCollation( | |
case class AddConstraint( | ||
table: LogicalPlan, | ||
tableConstraint: TableConstraint) extends AlterTableCommand { | ||
override def changes: Seq[TableChange] = Seq.empty | ||
override def changes: Seq[TableChange] = { | ||
val constraint = tableConstraint.toV2Constraint(isCreateTable = false) | ||
val validatedTableVersion = table match { | ||
case t: ResolvedTable if constraint.enforced() => | ||
t.table.currentVersion() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Created a follow-up https://issues.apache.org/jira/browse/SPARK-51835 for testing the table version |
||
case _ => | ||
null | ||
} | ||
Seq(TableChange.addConstraint(constraint, validatedTableVersion)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. CHECK constraints must optionally validate existing data in ALTER. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Make sense. Do you mean There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ENFORCED/NOT ENFORCED impacts subsequent writes. I was referring to ALTER TABLE ... ADD CONSTRAINT that must scan the existing data. |
||
} | ||
|
||
protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = copy(table = newChild) | ||
} | ||
|
@@ -308,7 +317,8 @@ case class DropConstraint( | |
name: String, | ||
ifExists: Boolean, | ||
cascade: Boolean) extends AlterTableCommand { | ||
override def changes: Seq[TableChange] = Seq.empty | ||
override def changes: Seq[TableChange] = | ||
Seq(TableChange.dropConstraint(name, ifExists, cascade)) | ||
|
||
protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = copy(table = newChild) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The error code seems consistent with DB2 and what we use for generated columns, +1.