Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IGNITE-23252 ItReplicaLifecycleTest is unstable #4956

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.RepeatedTest;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestInfo;
import org.junit.jupiter.api.Timeout;
Expand All @@ -237,7 +237,6 @@
@ExtendWith({WorkDirectoryExtension.class, ConfigurationExtension.class, ExecutorServiceExtension.class})
@Timeout(60)
// TODO: https://issues.apache.org/jira/browse/IGNITE-22522 remove this test after the switching to zone-based replication
@Disabled("https://issues.apache.org/jira/browse/IGNITE-23252")
public class ItReplicaLifecycleTest extends BaseIgniteAbstractTest {
private static final IgniteLogger LOG = Loggers.forClass(ItReplicaLifecycleTest.class);

Expand Down Expand Up @@ -455,7 +454,6 @@ public void testZoneReplicaListener(TestInfo testInfo) throws Exception {
}

@Test
@Disabled("https://issues.apache.org/jira/browse/IGNITE-22944")
void testAlterReplicaTrigger(TestInfo testInfo) throws Exception {
startNodes(testInfo, 3);

Expand Down Expand Up @@ -641,7 +639,7 @@ void testTableReplicaListenersCreationAfterRebalance(TestInfo testInfo) throws E
));
}

@Test
@RepeatedTest(20)
void testTableReplicaListenersRemoveAfterRebalance(TestInfo testInfo) throws Exception {
String zoneName = "TEST_ZONE";
String tableName = "TEST_TABLE";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import static java.util.stream.Collectors.toSet;
import static org.apache.ignite.internal.failure.FailureType.CRITICAL_ERROR;
import static org.apache.ignite.internal.lang.IgniteStringFormatter.format;
import static org.apache.ignite.internal.lang.IgniteSystemProperties.getBoolean;
import static org.apache.ignite.internal.raft.PeersAndLearners.fromAssignments;
import static org.apache.ignite.internal.replicator.LocalReplicaEvent.AFTER_REPLICA_STARTED;
import static org.apache.ignite.internal.replicator.LocalReplicaEvent.BEFORE_REPLICA_STOPPED;
Expand Down Expand Up @@ -152,6 +153,10 @@ public class ReplicaManager extends AbstractEventProducer<LocalReplicaEvent, Loc

private static final PlacementDriverMessagesFactory PLACEMENT_DRIVER_MESSAGES_FACTORY = new PlacementDriverMessagesFactory();

/* Feature flag for zone based collocation track */
// TODO IGNITE-22115 remove it; direct property name usage because the flag isn't a reason to add replicator as a dependency.
public static final boolean ZONE_COLOCATION_IS_ENABLED = getBoolean("IGNITE_ZONE_BASED_REPLICATION", false);

private final IgniteThrottledLogger throttledLog;

/** Busy lock to stop synchronously. */
Expand Down Expand Up @@ -656,7 +661,7 @@ private CompletableFuture<HybridTimestamp> stopLeaseProlongation(
}

/**
* Creates and starts a new replica.
* Creates and starts a new partition replica.
*
* @param raftGroupEventsListener Raft group events listener for raft group starting.
* @param raftGroupListener Raft group listener for raft group starting.
Expand All @@ -680,6 +685,8 @@ public CompletableFuture<Replica> startReplica(
ReplicationGroupId replicaGrpId,
PeersAndLearners newConfiguration
) throws NodeStoppingException {
assert !ZONE_COLOCATION_IS_ENABLED : "Partition replicas are prohibited while zone based colocation is enabled.";

if (!enterBusy()) {
throw new NodeStoppingException();
}
Expand Down Expand Up @@ -711,7 +718,7 @@ public CompletableFuture<Replica> startReplica(
}

/**
* Starts a replica. If a replica with the same partition id already exists, the method throws an exception.
* Starts a zone based replica. If a replica with the same partition id already exists, the method throws an exception.
*
* @param replicaGrpId Replication group id.
* @param snapshotStorageFactory Snapshot storage factory for raft group option's parameterization.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import static org.apache.ignite.internal.event.EventListener.fromConsumer;
import static org.apache.ignite.internal.hlc.HybridTimestamp.LOGICAL_TIME_BITS_SIZE;
import static org.apache.ignite.internal.hlc.HybridTimestamp.hybridTimestampToLong;
import static org.apache.ignite.internal.lang.IgniteSystemProperties.getBoolean;
import static org.apache.ignite.internal.metastorage.dsl.Conditions.notExists;
import static org.apache.ignite.internal.metastorage.dsl.Operations.put;
import static org.apache.ignite.internal.partitiondistribution.PartitionDistributionUtils.calculateAssignmentForPartition;
Expand Down Expand Up @@ -265,6 +266,10 @@ public class TableManager implements IgniteTablesInternal, IgniteComponent {
/** Table messages factory. */
private static final PartitionReplicationMessagesFactory TABLE_MESSAGES_FACTORY = new PartitionReplicationMessagesFactory();

/* Feature flag for zone based collocation track */
// TODO IGNITE-22115 remove it
public static final boolean ZONE_COLOCATION_IS_ENABLED = getBoolean(PartitionReplicaLifecycleManager.FEATURE_FLAG_NAME, false);

private final TopologyService topologyService;

/** Replica manager. */
Expand Down Expand Up @@ -654,11 +659,14 @@ public CompletableFuture<Void> startAsync(ComponentContext componentContext) {

startTables(recoveryRevision, lowWatermark.getLowWatermark());

processAssignmentsOnRecovery(recoveryRevision);
if (!ZONE_COLOCATION_IS_ENABLED) {
processAssignmentsOnRecovery(recoveryRevision);

metaStorageMgr.registerPrefixWatch(new ByteArray(PENDING_ASSIGNMENTS_PREFIX_BYTES), pendingAssignmentsRebalanceListener);
metaStorageMgr.registerPrefixWatch(new ByteArray(STABLE_ASSIGNMENTS_PREFIX_BYTES), stableAssignmentsRebalanceListener);
metaStorageMgr.registerPrefixWatch(new ByteArray(ASSIGNMENTS_SWITCH_REDUCE_PREFIX_BYTES), assignmentsSwitchRebalanceListener);
metaStorageMgr.registerPrefixWatch(new ByteArray(PENDING_ASSIGNMENTS_PREFIX_BYTES), pendingAssignmentsRebalanceListener);
metaStorageMgr.registerPrefixWatch(new ByteArray(STABLE_ASSIGNMENTS_PREFIX_BYTES), stableAssignmentsRebalanceListener);
metaStorageMgr.registerPrefixWatch(new ByteArray(ASSIGNMENTS_SWITCH_REDUCE_PREFIX_BYTES),
assignmentsSwitchRebalanceListener);
}

catalogService.listen(CatalogEvent.TABLE_CREATE, parameters -> onTableCreate((CreateTableEventParameters) parameters));
catalogService.listen(CatalogEvent.TABLE_CREATE, parameters ->
Expand Down Expand Up @@ -903,6 +911,10 @@ private void preparePartitionResourcesAndLoadToZoneReplica(


private CompletableFuture<Boolean> onPrimaryReplicaExpired(PrimaryReplicaEventParameters parameters) {
// if (ZONE_COLOCATION_IS_ENABLED) {
// return falseCompletedFuture();
// }

if (topologyService.localMember().id().equals(parameters.leaseholderId())) {
TablePartitionId groupId = (TablePartitionId) parameters.groupId();

Expand Down Expand Up @@ -1267,6 +1279,14 @@ private CompletableFuture<Void> startPartitionAndStartClient(

mvGc.addStorage(replicaGrpId, partitionUpdateHandlers.gcUpdateHandler);

minTimeCollectorService.addPartition(new TablePartitionId(tableId, partId));

if (ZONE_COLOCATION_IS_ENABLED) {
return trueCompletedFuture();
}

assert !ZONE_COLOCATION_IS_ENABLED : "Partition replicas are prohibited while zone based colocation is enabled.";

RaftGroupListener raftGroupListener = new PartitionListener(
txManager,
partitionDataStorage,
Expand All @@ -1281,8 +1301,6 @@ private CompletableFuture<Void> startPartitionAndStartClient(
minTimeCollectorService
);

minTimeCollectorService.addPartition(new TablePartitionId(tableId, partId));

SnapshotStorageFactory snapshotStorageFactory = createSnapshotStorageFactory(replicaGrpId,
partitionUpdateHandlers, internalTbl);

Expand Down Expand Up @@ -1892,6 +1910,9 @@ private CompletableFuture<Void> destroyTableLocally(int tableId) {
.collect(toSet());
metaStorageMgr.removeAll(assignmentKeys);

CompletableFuture<?> stopReplicaAndDestroyFuture;

// if (ZONE_COLOCATION_IS_ENABLED) {
CompletableFuture<?>[] stopReplicaAndDestroyFutures = new CompletableFuture<?>[partitions];

// TODO https://issues.apache.org/jira/browse/IGNITE-19170 Partitions should be stopped on the assignments change
Expand All @@ -1902,7 +1923,12 @@ private CompletableFuture<Void> destroyTableLocally(int tableId) {
stopReplicaAndDestroyFutures[partitionId] = stopAndDestroyPartition(replicationGroupId, table);
}

return allOf(stopReplicaAndDestroyFutures)
stopReplicaAndDestroyFuture = allOf(stopReplicaAndDestroyFutures);
// } else {
// stopReplicaAndDestroyFuture = nullCompletedFuture();
// }

return stopReplicaAndDestroyFuture
.thenComposeAsync(
unused -> inBusyLockAsync(busyLock, () -> allOf(
internalTable.storage().destroy(),
Expand Down Expand Up @@ -2369,6 +2395,7 @@ private CompletableFuture<Void> handleChangePendingAssignmentEvent(
}

assert replicaMgr.isReplicaStarted(replicaGrpId) : "The local node is outside of the replication group ["
+ "groupId=[type=" + replicaGrpId.getClass() + ", id=" + replicaGrpId + "]"
+ ", stable=" + stableAssignments
+ ", pending=" + pendingAssignments
+ ", localName=" + localNode().name() + "].";
Expand Down Expand Up @@ -2766,12 +2793,16 @@ private CompletableFuture<Void> stopPartition(TablePartitionId tablePartitionId,

CompletableFuture<Boolean> stopReplicaFuture;

// if (ZONE_COLOCATION_IS_ENABLED) {
// stopReplicaFuture = trueCompletedFuture();
// } else {
try {
stopReplicaFuture = replicaMgr.stopReplica(tablePartitionId);
} catch (NodeStoppingException e) {
// No-op.
stopReplicaFuture = falseCompletedFuture();
}
// }

return stopReplicaFuture
.thenCompose(v -> {
Expand Down