Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 114 additions & 17 deletions src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,38 +19,71 @@

package org.apache.sysds.test.functions.io.hdf5;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.io.File;
import org.apache.commons.io.FileUtils;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;

import org.apache.sysds.api.DMLScript;
import org.apache.sysds.common.Types.ExecMode;
import org.apache.sysds.conf.CompilerConfig;
import org.apache.sysds.runtime.matrix.data.MatrixValue;
import org.apache.sysds.test.TestConfiguration;
import org.apache.sysds.test.TestUtils;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;

public abstract class ReadHDF5Test extends ReadHDF5TestBase {
public class ReadHDF5Test extends ReadHDF5TestBase {

protected abstract int getId();
private static final double eps = 1e-9;
private static final String TEST_NAME = "ReadHDF5Test";

protected String getInputHDF5FileName() {
return "transfusion_" + getId() + ".h5";
private static final List<Hdf5TestCase> TEST_CASES = Collections.unmodifiableList(
Arrays.asList(new Hdf5TestCase("test_single_dataset.h5", "data", DmlVariant.FORMAT_AND_DATASET),
new Hdf5TestCase("test_multiple_datasets.h5", "matrix_2d", DmlVariant.DATASET_ONLY),
new Hdf5TestCase("test_multiple_datasets.h5", "matrix_3d", DmlVariant.DATASET_ONLY),
new Hdf5TestCase("test_multi_tensor_samples.h5", "label", DmlVariant.DATASET_ONLY),
new Hdf5TestCase("test_multi_tensor_samples.h5", "sen1", DmlVariant.DATASET_ONLY),
new Hdf5TestCase("test_nested_groups.h5", "group1/subgroup/data2", DmlVariant.FORMAT_AND_DATASET)));

@Override
protected String getTestName() {
return TEST_NAME;
}

private final static double eps = 1e-9;
@Override
protected String getTestClassDir() {
return TEST_CLASS_DIR;
}

@Test
public void testHDF51_Seq_CP() {
runReadHDF5Test(getId(), ExecMode.SINGLE_NODE, false);
@BeforeClass
public static void setUpClass() {
Path scriptDir = Paths.get(SCRIPT_DIR + TEST_DIR);
generateHdf5Data(scriptDir);
}

@Test
public void testHDF51_Parallel_CP() {
runReadHDF5Test(getId(), ExecMode.SINGLE_NODE, true);
public void testReadSequential() {
for(Hdf5TestCase tc : TEST_CASES)
runReadHDF5Test(tc, ExecMode.SINGLE_NODE, false);
}

protected void runReadHDF5Test(int testNumber, ExecMode platform, boolean parallel) {
@Test
public void testReadSequentialParallelIO() {
for(Hdf5TestCase tc : TEST_CASES)
runReadHDF5Test(tc, ExecMode.SINGLE_NODE, true);
}

protected void runReadHDF5Test(Hdf5TestCase testCase, ExecMode platform, boolean parallel) {
ExecMode oldPlatform = rtplatform;
rtplatform = platform;

Expand All @@ -61,21 +94,28 @@ protected void runReadHDF5Test(int testNumber, ExecMode platform, boolean parall
boolean oldpar = CompilerConfig.FLAG_PARREADWRITE_TEXT;

try {

CompilerConfig.FLAG_PARREADWRITE_TEXT = parallel;

TestConfiguration config = getTestConfiguration(getTestName());
loadTestConfiguration(config);

String HOME = SCRIPT_DIR + TEST_DIR;
String inputMatrixName = HOME + INPUT_DIR + getInputHDF5FileName(); // always read the same data
String datasetName = "DATASET_1";
String inputMatrixName = HOME + INPUT_DIR + testCase.hdf5File;

fullDMLScriptName = HOME + testCase.variant.getScriptName();
programArgs = new String[] {"-args", inputMatrixName, testCase.dataset, output("Y")};

fullDMLScriptName = HOME + getTestName() + "_" + testNumber + ".dml";
programArgs = new String[] {"-args", inputMatrixName, datasetName, output("Y")};
// Clean per-case output/expected to avoid reusing stale metadata between looped cases
String outY = output("Y");
String expY = expected("Y");
FileUtils.deleteQuietly(new File(outY));
FileUtils.deleteQuietly(new File(outY + ".mtd"));
FileUtils.deleteQuietly(new File(expY));
FileUtils.deleteQuietly(new File(expY + ".mtd"));

fullRScriptName = HOME + "ReadHDF5_Verify.R";
rCmd = "Rscript" + " " + fullRScriptName + " " + inputMatrixName + " " + datasetName + " " + expectedDir();
rCmd = "Rscript" + " " + fullRScriptName + " " + inputMatrixName + " " + testCase.dataset + " "
+ expectedDir();

runTest(true, false, null, -1);
runRScript(true);
Expand All @@ -90,4 +130,61 @@ protected void runReadHDF5Test(int testNumber, ExecMode platform, boolean parall
DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
}
}

private static void generateHdf5Data(Path scriptDir) {
ProcessBuilder processBuilder = new ProcessBuilder("Rscript", "gen_HDF5_testdata.R");
processBuilder.directory(scriptDir.toFile());
processBuilder.redirectErrorStream(true);

try {
Process process = processBuilder.start();
StringBuilder output = new StringBuilder();
try(BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) {
reader.lines().forEach(line -> output.append(line).append(System.lineSeparator()));
}
int exitCode = process.waitFor();
if(exitCode != 0)
Assert.fail("Failed to execute gen_HDF5_testdata.R (exit " + exitCode + "):\n" + output);
}
catch(IOException e) {
Assert.fail("Unable to execute gen_HDF5_testdata.R: " + e.getMessage());
}
catch(InterruptedException e) {
Thread.currentThread().interrupt();
Assert.fail("Interrupted while generating HDF5 test data.");
}
}

private enum DmlVariant {
FORMAT_AND_DATASET("ReadHDF5_WithFormatAndDataset.dml"), DATASET_ONLY("ReadHDF5_WithDataset.dml"),
DEFAULT("ReadHDF5_Default.dml");

private final String scriptName;

DmlVariant(String scriptName) {
this.scriptName = scriptName;
}

public String getScriptName() {
return scriptName;
}
}

private static final class Hdf5TestCase {
private final String hdf5File;
private final String dataset;
private final DmlVariant variant;

private Hdf5TestCase(String hdf5File, String dataset, DmlVariant variant) {
this.hdf5File = hdf5File;
this.dataset = dataset;
this.variant = variant;
}

@Override
public String toString() {
return hdf5File + "::" + dataset;
}
}
}

This file was deleted.

This file was deleted.

This file was deleted.

18 changes: 16 additions & 2 deletions src/test/scripts/functions/io/hdf5/ReadHDF5_Verify.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,19 @@ options(digits=22)

library("rhdf5")

Y = h5read(args[1],args[2],native = TRUE)
writeMM(as(Y, "CsparseMatrix"), paste(args[3], "Y", sep=""))
Y = h5read(args[1], args[2], native = TRUE)
dims = dim(Y)

if(length(dims) == 1) {
# convert to a column matrix
Y_mat = matrix(Y, ncol = 1)
} else if(length(dims) > 2) {
# flatten everything beyond the first dimension into columns
perm = c(1, rev(seq(2, length(dims))))
Y_mat = matrix(aperm(Y, perm), nrow = dims[1], ncol = prod(dims[-1]))
} else {
# for 2d , systemds treats it the same
Y_mat = Y
}

writeMM(as(Y_mat, "CsparseMatrix"), paste(args[3], "Y", sep=""))
Loading