-
Notifications
You must be signed in to change notification settings - Fork 3k
[WIP] Replace transactions rebase onto refreshed metadata #15092
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
60bf394
078801d
4a1b7f8
0beff6b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -47,6 +47,7 @@ | |
| import org.apache.iceberg.metrics.MetricsReporter; | ||
| import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; | ||
| import org.apache.iceberg.relocated.com.google.common.base.Preconditions; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.Lists; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.Sets; | ||
|
|
@@ -80,6 +81,12 @@ enum TransactionType { | |
| private boolean hasLastOpCommitted; | ||
| private final MetricsReporter reporter; | ||
|
|
||
| private Schema replaceSchema; | ||
| private PartitionSpec replaceSpec; | ||
| private SortOrder replaceSortOrder; | ||
| private String replaceLocation; | ||
| private Map<String, String> replaceProperties; | ||
|
|
||
| BaseTransaction( | ||
| String tableName, TableOperations ops, TransactionType type, TableMetadata start) { | ||
| this(tableName, ops, type, start, LoggingMetricsReporter.instance()); | ||
|
|
@@ -101,6 +108,17 @@ enum TransactionType { | |
| this.type = type; | ||
| this.hasLastOpCommitted = true; | ||
| this.reporter = reporter; | ||
|
|
||
| // For replace-style transactions, the provided TableMetadata contains the information needed to | ||
| // build the replaced table state. This is stored so the replacement can be re-applied on top of | ||
| // refreshed metadata on commit. | ||
| if (type == TransactionType.REPLACE_TABLE || type == TransactionType.CREATE_OR_REPLACE_TABLE) { | ||
| this.replaceSchema = start.schema(); | ||
| this.replaceSpec = start.spec(); | ||
| this.replaceSortOrder = start.sortOrder(); | ||
| this.replaceLocation = start.location(); | ||
| this.replaceProperties = ImmutableMap.copyOf(start.properties()); | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -260,12 +278,8 @@ public void commitTransaction() { | |
| commitCreateTransaction(); | ||
| break; | ||
|
|
||
| case REPLACE_TABLE: | ||
| commitReplaceTransaction(false); | ||
| break; | ||
|
|
||
| case CREATE_OR_REPLACE_TABLE: | ||
| commitReplaceTransaction(true); | ||
| case REPLACE_TABLE, CREATE_OR_REPLACE_TABLE: | ||
| commitReplaceTransaction(); | ||
| break; | ||
|
|
||
| case SIMPLE: | ||
|
|
@@ -298,9 +312,13 @@ private void commitCreateTransaction() { | |
| } | ||
| } | ||
|
|
||
| private void commitReplaceTransaction(boolean orCreate) { | ||
| private void commitReplaceTransaction() { | ||
| Map<String, String> props = base != null ? base.properties() : current.properties(); | ||
|
|
||
| Set<Long> startingSnapshots = | ||
| base != null | ||
| ? base.snapshots().stream().map(Snapshot::snapshotId).collect(Collectors.toSet()) | ||
| : ImmutableSet.of(); | ||
| try { | ||
| Tasks.foreach(ops) | ||
| .retry(PropertyUtil.propertyAsInt(props, COMMIT_NUM_RETRIES, COMMIT_NUM_RETRIES_DEFAULT)) | ||
|
|
@@ -315,40 +333,27 @@ private void commitReplaceTransaction(boolean orCreate) { | |
| .onlyRetryOn(CommitFailedException.class) | ||
| .run( | ||
| underlyingOps -> { | ||
| try { | ||
| underlyingOps.refresh(); | ||
| } catch (NoSuchTableException e) { | ||
| if (!orCreate) { | ||
| throw e; | ||
| } | ||
| } | ||
|
|
||
| // because this is a replace table, it will always completely replace the table | ||
| // metadata. even if it was just updated. | ||
| if (base != underlyingOps.current()) { | ||
| this.base = underlyingOps.current(); // just refreshed | ||
| } | ||
| applyUpdates(underlyingOps); | ||
|
|
||
| underlyingOps.commit(base, current); | ||
| }); | ||
|
|
||
| } catch (CommitStateUnknownException e) { | ||
| throw e; | ||
|
|
||
| } catch (PendingUpdateFailedException e) { | ||
| cleanUpOnCommitFailure(); | ||
| throw e.wrapped(); | ||
|
|
||
| } catch (RuntimeException e) { | ||
| // the commit failed and no files were committed. clean up each update. | ||
| if (!ops.requireStrictCleanup() || e instanceof CleanableFailure) { | ||
| cleanAllUpdates(); | ||
| cleanUpOnCommitFailure(); | ||
| } | ||
|
|
||
| throw e; | ||
|
|
||
| } finally { | ||
| // replace table never needs to retry because the table state is completely replaced. because | ||
| // retries are not | ||
| // a concern, it is safe to delete all the deleted files from individual operations | ||
| deleteUncommittedFiles(deletedFiles); | ||
| } | ||
|
|
||
| cleanUpAfterCommitSuccess(startingSnapshots); | ||
| } | ||
|
|
||
| private void commitSimpleTransaction() { | ||
|
|
@@ -381,6 +386,7 @@ private void commitSimpleTransaction() { | |
| } catch (PendingUpdateFailedException e) { | ||
| cleanUpOnCommitFailure(); | ||
| throw e.wrapped(); | ||
|
|
||
| } catch (RuntimeException e) { | ||
| if (!ops.requireStrictCleanup() || e instanceof CleanableFailure) { | ||
| cleanUpOnCommitFailure(); | ||
|
|
@@ -389,8 +395,18 @@ private void commitSimpleTransaction() { | |
| throw e; | ||
| } | ||
|
|
||
| // the commit succeeded | ||
| cleanUpAfterCommitSuccess(startingSnapshots); | ||
| } | ||
|
|
||
| private void cleanUpOnCommitFailure() { | ||
| // the commit failed and no files were committed. clean up each update. | ||
| cleanAllUpdates(); | ||
|
|
||
| // delete all the uncommitted files | ||
| deleteUncommittedFiles(deletedFiles); | ||
| } | ||
|
|
||
| private void cleanUpAfterCommitSuccess(Set<Long> startingSnapshots) { | ||
| try { | ||
| // clean up the data files that were deleted by each operation. first, get the list of | ||
| // committed manifests to ensure that no committed manifest is deleted. | ||
|
|
@@ -420,14 +436,6 @@ private void commitSimpleTransaction() { | |
| } | ||
| } | ||
|
|
||
| private void cleanUpOnCommitFailure() { | ||
| // the commit failed and no files were committed. clean up each update. | ||
| cleanAllUpdates(); | ||
|
|
||
| // delete all the uncommitted files | ||
| deleteUncommittedFiles(deletedFiles); | ||
| } | ||
|
|
||
| private void cleanAllUpdates() { | ||
| Tasks.foreach(updates) | ||
| .suppressFailureWhenFinished() | ||
|
|
@@ -459,10 +467,40 @@ private void deleteUncommittedFiles(Iterable<String> paths) { | |
| } | ||
|
|
||
| private void applyUpdates(TableOperations underlyingOps) { | ||
| if (base != underlyingOps.refresh()) { | ||
| // use refreshed the metadata | ||
| try { | ||
| underlyingOps.refresh(); | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TODO: |
||
| } catch (NoSuchTableException e) { | ||
| if (type == TransactionType.CREATE_OR_REPLACE_TABLE) { | ||
| return; | ||
| } | ||
| throw e; | ||
| } | ||
|
|
||
| if (base != underlyingOps.current()) { | ||
| // use the refreshed metadata | ||
| this.base = underlyingOps.current(); | ||
| this.current = underlyingOps.current(); | ||
|
|
||
| this.current = | ||
| switch (type) { | ||
| case REPLACE_TABLE, CREATE_OR_REPLACE_TABLE -> | ||
| // Even if we are dealing with a replace-style transaction, we need to re-apply | ||
| // updates on top of the refreshed metadata's replacement, because of (1) possible | ||
| // row lineage requirements, and (2) to not overwrite the metadata with an outdated | ||
| // replacement that may cause history loss or table corruption. | ||
| underlyingOps | ||
| .current() | ||
| .buildReplacement( | ||
| replaceSchema, | ||
| replaceSpec, | ||
| replaceSortOrder, | ||
| replaceLocation, | ||
| replaceProperties); | ||
| case SIMPLE -> underlyingOps.current(); | ||
| case CREATE_TABLE -> | ||
| throw new IllegalStateException( | ||
| "Transaction update application not expected for create transactions"); | ||
| }; | ||
|
|
||
| for (PendingUpdate update : updates) { | ||
| // re-commit each update in the chain to apply it and update current | ||
| try { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2655,15 +2655,20 @@ public void testReplaceTableKeepsSnapshotLog() { | |
| .containsExactly(snapshotBeforeReplace, snapshotAfterReplace); | ||
| } | ||
|
|
||
| @Test | ||
| public void testConcurrentReplaceTransactions() { | ||
| @ParameterizedTest | ||
| @ValueSource(ints = {2, 3}) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. #15091 shows the failure of this V3 test, prior to this PR |
||
| public void testConcurrentReplaceTransactions(int formatVersion) { | ||
| C catalog = catalog(); | ||
|
|
||
| if (requiresNamespaceCreate()) { | ||
| catalog.createNamespace(NS); | ||
| } | ||
|
|
||
| Transaction transaction = catalog.buildTable(TABLE, SCHEMA).createTransaction(); | ||
| Transaction transaction = | ||
| catalog | ||
| .buildTable(TABLE, SCHEMA) | ||
| .withProperty("format-version", String.valueOf(formatVersion)) | ||
| .createTransaction(); | ||
| transaction.newFastAppend().appendFile(FILE_A).commit(); | ||
| transaction.commitTransaction(); | ||
|
|
||
|
|
@@ -2691,6 +2696,8 @@ public void testConcurrentReplaceTransactions() { | |
| secondReplace.commitTransaction(); | ||
|
|
||
| Table afterSecondReplace = catalog.loadTable(TABLE); | ||
| // All three successfully committed snapshots should be present | ||
| assertThat(afterSecondReplace.snapshots()).hasSize(3); | ||
|
Comment on lines
+2699
to
+2700
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. #15090 shows the failure of this added line, prior to this PR
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you just add a new test please to show where exactly stuff fails with V3?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Makes sense - I've updated the PR description in the meantime to cover this. |
||
| assertThat(afterSecondReplace.schema().asStruct()) | ||
| .as("Table schema should match the original schema") | ||
| .isEqualTo(original.schema().asStruct()); | ||
|
|
@@ -2734,9 +2741,15 @@ public void testConcurrentReplaceTransactionSchema() { | |
| secondReplace.commitTransaction(); | ||
|
|
||
| Table afterSecondReplace = catalog.loadTable(TABLE); | ||
|
|
||
| // The second replace will be rebased on the first replace, so new field IDs will be assigned | ||
| assertThat(afterSecondReplace.schema().asStruct()) | ||
| .as("Table schema should match the original schema") | ||
| .isEqualTo(original.schema().asStruct()); | ||
| .as("Table schema should differ from the original schema") | ||
| .isNotEqualTo(original.schema().asStruct()); | ||
| assertThat(afterSecondReplace.schema().select("name", "type").asStruct()) | ||
| .as("Table schema should match the original schema in names and types") | ||
| .isEqualTo(original.schema().select("name", "type").asStruct()); | ||
|
|
||
| assertUUIDsMatch(original, afterSecondReplace); | ||
| assertFiles(afterSecondReplace, FILE_C); | ||
| } | ||
|
|
@@ -2782,8 +2795,6 @@ public void testConcurrentReplaceTransactionSchema2() { | |
|
|
||
| @Test | ||
| public void testConcurrentReplaceTransactionSchemaConflict() { | ||
| assumeThat(supportsServerSideRetry()).as("Schema conflicts are detected server-side").isTrue(); | ||
|
|
||
| C catalog = catalog(); | ||
|
|
||
| if (requiresNamespaceCreate()) { | ||
|
|
@@ -2812,12 +2823,11 @@ public void testConcurrentReplaceTransactionSchemaConflict() { | |
| assertUUIDsMatch(original, afterFirstReplace); | ||
| assertFiles(afterFirstReplace, FILE_B); | ||
|
|
||
| // even though the new schema is identical, the assertion that the last assigned id has not | ||
| // changed will fail | ||
| assertThatThrownBy(secondReplace::commitTransaction) | ||
| .isInstanceOf(CommitFailedException.class) | ||
| .hasMessageStartingWith( | ||
| "Commit failed: Requirement failed: last assigned field id changed"); | ||
| secondReplace.commitTransaction(); | ||
| Table afterSecondReplace = catalog.loadTable(TABLE); | ||
| assertThat(afterSecondReplace.schema().asStruct()) | ||
| .as("Table schema should match the new schema") | ||
| .isEqualTo(REPLACE_SCHEMA.asStruct()); | ||
| } | ||
|
|
||
| @Test | ||
|
|
@@ -2902,7 +2912,6 @@ public void testConcurrentReplaceTransactionPartitionSpec2() { | |
|
|
||
| @Test | ||
| public void testConcurrentReplaceTransactionPartitionSpecConflict() { | ||
| assumeThat(supportsServerSideRetry()).as("Spec conflicts are detected server-side").isTrue(); | ||
| C catalog = catalog(); | ||
|
|
||
| if (requiresNamespaceCreate()) { | ||
|
|
@@ -2932,12 +2941,10 @@ public void testConcurrentReplaceTransactionPartitionSpecConflict() { | |
| assertUUIDsMatch(original, afterFirstReplace); | ||
| assertFiles(afterFirstReplace, FILE_B); | ||
|
|
||
| // even though the new spec is identical, the assertion that the last assigned id has not | ||
| // changed will fail | ||
| assertThatThrownBy(secondReplace::commitTransaction) | ||
| .isInstanceOf(CommitFailedException.class) | ||
| .hasMessageStartingWith( | ||
| "Commit failed: Requirement failed: last assigned partition id changed"); | ||
| secondReplace.commitTransaction(); | ||
| assertThat(catalog.loadTable(TABLE).spec().fields()) | ||
| .as("Table spec should match the new spec") | ||
| .isEqualTo(TABLE_SPEC.fields()); | ||
| } | ||
|
|
||
| @Test | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
TODO: This change turned out to be more breaking than I expected. If we want to proceed, see if this can be cleaned up
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(Realise this is the sort of change that'd require a dev list discussion - I wanted to experiment with this approach first)