Skip to content

Commit 08ac32d

Browse files
authored
HDDS-12608. Race condition in datanode version file creation (apache#8093)
1 parent e5ef35d commit 08ac32d

File tree

7 files changed

+29
-6
lines changed

7 files changed

+29
-6
lines changed

hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java

-6
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import org.apache.hadoop.hdds.conf.ConfigurationSource;
2525
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMVersionResponseProto;
2626
import org.apache.hadoop.ozone.OzoneConsts;
27-
import org.apache.hadoop.ozone.container.common.DatanodeLayoutStorage;
2827
import org.apache.hadoop.ozone.container.common.statemachine.EndpointStateMachine;
2928
import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
3029
import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet;
@@ -88,11 +87,6 @@ public EndpointStateMachine.EndPointStates call() throws Exception {
8887
// Check HddsVolumes
8988
checkVolumeSet(ozoneContainer.getVolumeSet(), scmId, clusterId);
9089

91-
DatanodeLayoutStorage layoutStorage
92-
= new DatanodeLayoutStorage(configuration);
93-
layoutStorage.setClusterId(clusterId);
94-
layoutStorage.persistCurrentState();
95-
9690
// Start the container services after getting the version information
9791
ozoneContainer.start(clusterId);
9892
}

hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java

+6
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
import org.apache.hadoop.hdds.utils.db.Table;
6666
import org.apache.hadoop.hdds.utils.db.TableIterator;
6767
import org.apache.hadoop.ozone.HddsDatanodeService;
68+
import org.apache.hadoop.ozone.container.common.DatanodeLayoutStorage;
6869
import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics;
6970
import org.apache.hadoop.ozone.container.common.impl.BlockDeletingService;
7071
import org.apache.hadoop.ozone.container.common.impl.ContainerSet;
@@ -484,6 +485,11 @@ public void start(String clusterId) throws IOException {
484485
return;
485486
}
486487

488+
DatanodeLayoutStorage layoutStorage
489+
= new DatanodeLayoutStorage(config);
490+
layoutStorage.setClusterId(clusterId);
491+
layoutStorage.persistCurrentState();
492+
487493
buildContainerSet();
488494

489495
// Start background volume checks, which will begin after the configured

hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java

+11
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
package org.apache.hadoop.ozone.container.common;
1919

20+
import static org.apache.hadoop.ozone.common.Storage.StorageState.INITIALIZED;
2021
import static org.mockito.Mockito.any;
2122
import static org.mockito.Mockito.mock;
2223
import static org.mockito.Mockito.when;
@@ -46,6 +47,7 @@
4647
import org.apache.hadoop.ipc.ProtobufRpcEngine;
4748
import org.apache.hadoop.ipc.RPC;
4849
import org.apache.hadoop.net.NetUtils;
50+
import org.apache.hadoop.ozone.HddsDatanodeService;
4951
import org.apache.hadoop.ozone.OzoneConfigKeys;
5052
import org.apache.hadoop.ozone.container.ContainerTestHelper;
5153
import org.apache.hadoop.ozone.container.common.impl.ContainerData;
@@ -350,4 +352,13 @@ public static XceiverServerRatis newXceiverServerRatis(
350352
getNoopContainerDispatcher(), getEmptyContainerController(),
351353
null, null);
352354
}
355+
356+
/** Initialize {@link DatanodeLayoutStorage}. Normally this is done during {@link HddsDatanodeService} start,
357+
* have to do the same for tests that create {@link OzoneContainer} manually. */
358+
public static void initializeDatanodeLayout(ConfigurationSource conf, DatanodeDetails dn) throws IOException {
359+
DatanodeLayoutStorage layoutStorage = new DatanodeLayoutStorage(conf, dn.getUuidString());
360+
if (layoutStorage.getState() != INITIALIZED) {
361+
layoutStorage.initialize();
362+
}
363+
}
353364
}

hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java

+8
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
import org.apache.hadoop.ipc.RPC;
6464
import org.apache.hadoop.ozone.OzoneConfigKeys;
6565
import org.apache.hadoop.ozone.OzoneConsts;
66+
import org.apache.hadoop.ozone.common.Storage.StorageState;
6667
import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine;
6768
import org.apache.hadoop.ozone.container.common.statemachine.EndpointStateMachine;
6869
import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
@@ -310,6 +311,13 @@ public void testDnLayoutVersionFile() throws Exception {
310311

311312
assertEquals("different_cluster_id", layout1.getClusterID());
312313
assertNotEquals(scmServerImpl.getClusterId(), layout1.getClusterID());
314+
315+
// another call() with OzoneContainer already started should not write the file
316+
FileUtils.forceDelete(layout1.getVersionFile());
317+
rpcEndPoint.setState(EndpointStateMachine.EndPointStates.GETVERSION);
318+
versionTask.call();
319+
assertEquals(StorageState.NOT_INITIALIZED, new DatanodeLayoutStorage(ozoneConf, "any").getState());
320+
313321
FileUtils.forceDelete(storageDir);
314322
}
315323
}

hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainer.java

+2
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ public void testCreateOzoneContainer(
8282
.getOzoneContainer(datanodeDetails, conf);
8383
StorageVolumeUtil.getHddsVolumesList(container.getVolumeSet().getVolumesList())
8484
.forEach(hddsVolume -> hddsVolume.setDbParentDir(tempDir.toFile()));
85+
ContainerTestUtils.initializeDatanodeLayout(conf, datanodeDetails);
8586
//Set clusterId and manually start ozone container.
8687
container.start(UUID.randomUUID().toString());
8788

@@ -112,6 +113,7 @@ void testOzoneContainerStart(
112113
DatanodeDetails datanodeDetails = randomDatanodeDetails();
113114
container = ContainerTestUtils
114115
.getOzoneContainer(datanodeDetails, conf);
116+
ContainerTestUtils.initializeDatanodeLayout(conf, datanodeDetails);
115117

116118
String clusterId = UUID.randomUUID().toString();
117119
container.start(clusterId);

hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java

+1
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,7 @@ private OzoneContainer createAndStartOzoneContainerInstance() {
313313
MutableVolumeSet volumeSet = container.getVolumeSet();
314314
StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList())
315315
.forEach(hddsVolume -> hddsVolume.setDbParentDir(tempFolder.toFile()));
316+
ContainerTestUtils.initializeDatanodeLayout(conf, dn);
316317
container.start(clusterID);
317318
} catch (Throwable e) {
318319
if (container != null) {

hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestSecureOzoneContainer.java

+1
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ void testCreateOzoneContainer(boolean requireToken, boolean hasToken,
138138
MutableVolumeSet volumeSet = container.getVolumeSet();
139139
StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList())
140140
.forEach(hddsVolume -> hddsVolume.setDbParentDir(tempFolder.toFile()));
141+
ContainerTestUtils.initializeDatanodeLayout(conf, dn);
141142
//Set scmId and manually start ozone container.
142143
container.start(UUID.randomUUID().toString());
143144

0 commit comments

Comments
 (0)