add code passed build

ChristinaXu2017 · ChristinaXu2017 · commit 7b6df9649471 · 2024-04-04T17:14:59.000+10:00
diff --git a/src/main/java/au/csiro/variantspark/utils/FileUtils.java b/src/main/java/au/csiro/variantspark/utils/FileUtils.java
@@ -14,47 +14,21 @@ public class FileUtils {
 	 */	
 	public static boolean isInputBGZ(final File file) {
 		 
-		//.vcf.bgz is type of GZP file
-		//.vcf.gz is also GZP file but get java.lang.OutOfMemoryError at java.io.InputStreamReader.read(InputStreamReader.java:184)
-		//.vcf.bz2 is not GZP file and get java.lang.OutOfMemoryError at java.io.InputStreamReader.read(InputStreamReader.java:184)
-		//.vcf is not GZP file and get htsjdk.samtools.SAMFormatException: at header from java.io.BufferedReader.readLine(BufferedReader.java:389)
-		
-		boolean isGzip = false; 
-		try {			
-			isGzip = isInputGZip(file);	//ture if .bgz or .gz		  		 
-		} catch (IOException e) {}
-		
-		
-		//if not gzip file, do following check
-		if(isGzip) {
-			
-		    try (BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(file))) {
-		        bufferedInputStream.mark(100); // mark the current position
-		        boolean isValid = BlockCompressedInputStream.isValidFile(bufferedInputStream);
-		        bufferedInputStream.reset(); // reset back to the marked position
-		        return isValid;
-		    } catch (IOException e) {
-		        // Handle the exception
-		        return false;
-		    }
-					
-//			try(final BlockCompressedInputStream bgzInputStream = new BlockCompressedInputStream(file)) {
-//				System.out.println(" inside try block: start bufferReader ...");
-//				BufferedReader reader = new BufferedReader(new InputStreamReader(bgzInputStream));
-//				System.out.println(" inside try block: reader.readLine()... ");
-//	            String line = reader.readLine();
-//	            return line != null && !line.isEmpty();
-//			} catch (Exception e) {
-//				//file is not .vcf.bgz file 
-//				//it will throw any type exception according to file type
-//				//hence we try to catch any type exception
-//				e.printStackTrace();
-//				return false;
-//			}
-		}		
-		
-		return false; 
-
+		/**
+		 * .vcf.bgz is type of GZP file
+		 * .vcf.gz is also GZP file but get java.lang.OutOfMemoryError at java.io.InputStreamReader.read(InputStreamReader.java:184)
+		 * .vcf.bz2 is not GZP file and get java.lang.OutOfMemoryError at java.io.InputStreamReader.read(InputStreamReader.java:184)
+		 * .vcf is not GZP file and get htsjdk.samtools.SAMFormatException: at header from java.io.BufferedReader.readLine(BufferedReader.java:389)
+		*/				
+	    try (BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(file))) {
+	        bufferedInputStream.mark(100); // mark the current position
+	        boolean isValid = BlockCompressedInputStream.isValidFile(bufferedInputStream);
+	        bufferedInputStream.reset(); // reset back to the marked position
+	        return isValid;
+	    } catch (IOException e) {
+	        // Handle the exception
+	        return false;
+	    }				
 	}
 	
 	/**
diff --git a/src/main/scala/au/csiro/variantspark/cli/args/SparkArgs.scala b/src/main/scala/au/csiro/variantspark/cli/args/SparkArgs.scala
@@ -2,6 +2,7 @@ package au.csiro.variantspark.cli.args
 
 import org.kohsuke.args4j.Option
 import au.csiro.pbdava.ssparkle.spark.SparkApp
+import au.csiro.variantspark.utils._
 import org.apache.spark.rdd.RDD
 import htsjdk.samtools.util.BlockCompressedInputStream
 import org.apache.hadoop.fs.Path
@@ -14,10 +15,9 @@ trait SparkArgs extends SparkApp {
   val sparkPar: Int = 0
 
   def textFile(inputFile: String): RDD[String] = {
-    val input = new File(inputFile)
-    val isBGZ = input.getName.split('.').lastOption.getOrElse("").equalsIgnoreCase("bgz")
-    println(inputFile + " is loading to spark RDD " + isBGZ)
-    if (isBGZ) {
+    val isBGZ = FileUtils.isInputBGZ(new File(inputFile))
+    println(inputFile + " is loading to spark RDD, isBGZFile: " + isBGZ)
+    if (isBGZ ) {
       val path = new Path(inputFile)
       val fs = path.getFileSystem(sc.hadoopConfiguration)
       val bgzInputStream = new BlockCompressedInputStream(fs.open(path))