Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,15 @@ public synchronized VolumeCheckResult check(@Nullable Boolean unused)
return VolumeCheckResult.HEALTHY;
}

// At least some space required to check disk read/write
// If there are not enough space remaining,
// to avoid volume failure we can ignore checking disk read/write
int minimumDiskSpace = healthCheckFileSize * 2;
if (volumeInfo.get().getCurrentUsage().getAvailable() < minimumDiskSpace) {
ioTestSlidingWindow.add(true);
return VolumeCheckResult.HEALTHY;
}

// Since IO errors may be intermittent, volume remains healthy until the
// threshold of failures is crossed.
boolean diskChecksPassed = DiskCheckUtil.checkReadWrite(storageDir,
Expand All @@ -625,6 +634,14 @@ public synchronized VolumeCheckResult check(@Nullable Boolean unused)
" interrupted.");
}

// As WRITE keeps happening there is probability, disk has become full during above check.
// We can check again if disk is full. If it is full,
// in this case keep volume as healthy so that READ can still be served
if (!diskChecksPassed && volumeInfo.get().getCurrentUsage().getAvailable() < minimumDiskSpace) {
ioTestSlidingWindow.add(true);
return VolumeCheckResult.HEALTHY;
}

// Move the sliding window of IO test results forward 1 by adding the
// latest entry and removing the oldest entry from the window.
// Update the failure counter for the new window.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,53 @@ public boolean checkExistence(File storageDir) {
assertEquals(VolumeCheckResult.FAILED, result);
}

@ParameterizedTest
@MethodSource("volumeBuilders")
public void testVolumeFullHealth(StorageVolume.Builder<?> builder) throws Exception {
verifyFullVolumeHealthWithDiskReadWriteStatus(builder, true, false);
}


public void verifyFullVolumeHealthWithDiskReadWriteStatus(StorageVolume.Builder<?> builder, boolean... checkResult)
throws Exception {

for (boolean result : checkResult) {
StorageVolume volume = builder.build();

VolumeUsage usage = volume.getVolumeInfo().get().getUsageForTesting();
DatanodeConfiguration dnConf = CONF.getObject(DatanodeConfiguration.class);
int minimumDiskSpace = dnConf.getVolumeHealthCheckFileSize() * 2;
// Keep remaining space as just less than double of VolumeHealthCheckFileSize.
usage.incrementUsedSpace(usage.getCurrentUsage().getAvailable() - minimumDiskSpace + 1);
usage.realUsage();
DiskCheckUtil.DiskChecks ioFailure = new DiskCheckUtil.DiskChecks() {
@Override
public boolean checkReadWrite(File storageDir, File testFileDir,
int numBytesToWrite) {
return result;
}
};
DiskCheckUtil.setTestImpl(ioFailure);
// Volume will remain healthy as volume don't have enough space to check READ/WRITE
assertEquals(VolumeCheckResult.HEALTHY, volume.check(false));
// Even in second try volume will remain HEALTHY
assertEquals(VolumeCheckResult.HEALTHY, volume.check(false));

// Now keep enough space for read/write check to go through
usage.decrementUsedSpace(minimumDiskSpace + 1);

// volumeIOFailureTolerance is 1, so first time it will be HEALTHY always
assertEquals(VolumeCheckResult.HEALTHY, volume.check(false));
if (result) {
// Volume will remain as healthy as READ/WRITE check is fine
assertEquals(VolumeCheckResult.HEALTHY, volume.check(false));
} else {
// Second time volume will fail as READ/WRITE check has failed
assertEquals(VolumeCheckResult.FAILED, volume.check(false));
}
}
}

@ParameterizedTest
@MethodSource("volumeBuilders")
public void testCheckPermissions(StorageVolume.Builder<?> builder)
Expand Down