Skip to content

Commit 7b6df96

Browse files
add code passed build
1 parent e58d7f8 commit 7b6df96

File tree

2 files changed

+19
-45
lines changed

2 files changed

+19
-45
lines changed

src/main/java/au/csiro/variantspark/utils/FileUtils.java

Lines changed: 15 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -14,47 +14,21 @@ public class FileUtils {
1414
*/
1515
public static boolean isInputBGZ(final File file) {
1616

17-
//.vcf.bgz is type of GZP file
18-
//.vcf.gz is also GZP file but get java.lang.OutOfMemoryError at java.io.InputStreamReader.read(InputStreamReader.java:184)
19-
//.vcf.bz2 is not GZP file and get java.lang.OutOfMemoryError at java.io.InputStreamReader.read(InputStreamReader.java:184)
20-
//.vcf is not GZP file and get htsjdk.samtools.SAMFormatException: at header from java.io.BufferedReader.readLine(BufferedReader.java:389)
21-
22-
boolean isGzip = false;
23-
try {
24-
isGzip = isInputGZip(file); //ture if .bgz or .gz
25-
} catch (IOException e) {}
26-
27-
28-
//if not gzip file, do following check
29-
if(isGzip) {
30-
31-
try (BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(file))) {
32-
bufferedInputStream.mark(100); // mark the current position
33-
boolean isValid = BlockCompressedInputStream.isValidFile(bufferedInputStream);
34-
bufferedInputStream.reset(); // reset back to the marked position
35-
return isValid;
36-
} catch (IOException e) {
37-
// Handle the exception
38-
return false;
39-
}
40-
41-
// try(final BlockCompressedInputStream bgzInputStream = new BlockCompressedInputStream(file)) {
42-
// System.out.println(" inside try block: start bufferReader ...");
43-
// BufferedReader reader = new BufferedReader(new InputStreamReader(bgzInputStream));
44-
// System.out.println(" inside try block: reader.readLine()... ");
45-
// String line = reader.readLine();
46-
// return line != null && !line.isEmpty();
47-
// } catch (Exception e) {
48-
// //file is not .vcf.bgz file
49-
// //it will throw any type exception according to file type
50-
// //hence we try to catch any type exception
51-
// e.printStackTrace();
52-
// return false;
53-
// }
54-
}
55-
56-
return false;
57-
17+
/**
18+
* .vcf.bgz is type of GZP file
19+
* .vcf.gz is also GZP file but get java.lang.OutOfMemoryError at java.io.InputStreamReader.read(InputStreamReader.java:184)
20+
* .vcf.bz2 is not GZP file and get java.lang.OutOfMemoryError at java.io.InputStreamReader.read(InputStreamReader.java:184)
21+
* .vcf is not GZP file and get htsjdk.samtools.SAMFormatException: at header from java.io.BufferedReader.readLine(BufferedReader.java:389)
22+
*/
23+
try (BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(file))) {
24+
bufferedInputStream.mark(100); // mark the current position
25+
boolean isValid = BlockCompressedInputStream.isValidFile(bufferedInputStream);
26+
bufferedInputStream.reset(); // reset back to the marked position
27+
return isValid;
28+
} catch (IOException e) {
29+
// Handle the exception
30+
return false;
31+
}
5832
}
5933

6034
/**

src/main/scala/au/csiro/variantspark/cli/args/SparkArgs.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package au.csiro.variantspark.cli.args
22

33
import org.kohsuke.args4j.Option
44
import au.csiro.pbdava.ssparkle.spark.SparkApp
5+
import au.csiro.variantspark.utils._
56
import org.apache.spark.rdd.RDD
67
import htsjdk.samtools.util.BlockCompressedInputStream
78
import org.apache.hadoop.fs.Path
@@ -14,10 +15,9 @@ trait SparkArgs extends SparkApp {
1415
val sparkPar: Int = 0
1516

1617
def textFile(inputFile: String): RDD[String] = {
17-
val input = new File(inputFile)
18-
val isBGZ = input.getName.split('.').lastOption.getOrElse("").equalsIgnoreCase("bgz")
19-
println(inputFile + " is loading to spark RDD " + isBGZ)
20-
if (isBGZ) {
18+
val isBGZ = FileUtils.isInputBGZ(new File(inputFile))
19+
println(inputFile + " is loading to spark RDD, isBGZFile: " + isBGZ)
20+
if (isBGZ ) {
2121
val path = new Path(inputFile)
2222
val fs = path.getFileSystem(sc.hadoopConfiguration)
2323
val bgzInputStream = new BlockCompressedInputStream(fs.open(path))

0 commit comments

Comments
 (0)