Skip to content

Commit 0123815

Browse files
committed
Modified PCR duplicate removal process to use temporary files that are
deleted on exit, rather than reusable intermediate files. This allows multiple copies of ESAT with different parameters to run at the same time without interfering with each other.
1 parent 8381110 commit 0123815

File tree

2 files changed

+14
-40
lines changed

2 files changed

+14
-40
lines changed

src/java/umms/core/utils/InDropPreprocess.java

Lines changed: 12 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import net.sf.samtools.SAMRecordIterator;
2323

2424

25+
2526
//import org.apache.commons.math3.util.MultidimensionalCounter.Iterator;
2627
import org.apache.log4j.Logger;
2728

@@ -63,6 +64,7 @@ public InDropPreprocess(HashMap<String,ArrayList<File>> bamFiles,
6364

6465
SAMRecord r;
6566
SAMFileWriterFactory sf = new SAMFileWriterFactory();
67+
File outFile;
6668
int readsIn = 0;
6769
int readsOut = 0;
6870

@@ -88,52 +90,26 @@ public InDropPreprocess(HashMap<String,ArrayList<File>> bamFiles,
8890
SAMFileHeader bamHeader = bamReader.getFileHeader(); // get the header information
8991

9092
// create the pre-processed output BAM file:
91-
// The processed file is called <original bam file base name>_nextPrep.bam
93+
// The processed file is called <original bam file base name>_inPrep.bam
9294
String inFile = bamFile.getCanonicalPath();
9395
int extPos = inFile.length()-4;
94-
File outFile = new File(inFile.substring(0,extPos)+"_inPrep"+inFile.substring(extPos));
96+
// File outFile = new File(inFile.substring(0,extPos)+"_inPrep"+inFile.substring(extPos));
97+
98+
// Make a temporary file for the PCR de-duplicated reads:
99+
//logger.info("creating temporary file: "+inFile.substring(0,extPos)+"xxxxxxx.bam");
100+
//outFile = File.createTempFile(inFile.substring(0,extPos), ".bam");
101+
outFile = File.createTempFile("sc_umiFiltered_", ".bam");
102+
logger.info("temporary file created: "+outFile);
103+
// delete after exit:
104+
outFile.deleteOnExit();
95105

96106
// add this file to the list of files to be processed by ESAT:
97107
if (!bamFiles_prep.containsKey(exp)) {
98108
bamFiles_prep.put(exp, new ArrayList<File>());
99109
}
100110
bamFiles_prep.get(exp).add(outFile);
101-
SAMProgramRecord prepProg = new SAMProgramRecord("ESAT");
102-
prepProg.setProgramVersion(PROGRAM_VERSION);
103-
prepProg.setAttribute("task", task);
104-
prepProg.setAttribute("wExt", ""+wExt);
105-
boolean makeNewPrepFile = true; // by default, make a new file.
106-
107-
// check for the existence of this file:
108-
if (outFile.exists()) {
109-
// open the file to read the header
110-
SAMFileReader scReader = new SAMFileReader(outFile); // open as a non-eager reader
111-
SAMFileHeader scHeader = scReader.getFileHeader(); // get the header information
112-
List<SAMProgramRecord> scProg = scHeader.getProgramRecords();
113-
// check to make sure the ESAT parameters are the same:
114-
for (SAMProgramRecord sp:scProg) {
115-
String pid = sp.getProgramGroupId();
116-
if (pid.equals("ESAT")) {
117-
// Extract all necessary parameters:
118-
String oldVer = sp.getProgramVersion();
119-
String oldTask = sp.getAttribute("task");
120-
int oldWExt = Integer.parseInt(sp.getAttribute("wExt"));
121-
if (oldVer.equals(PROGRAM_VERSION) && oldTask.equals(task) && oldWExt==wExt) {
122-
makeNewPrepFile = false;
123-
}
124-
}
125-
}
126-
}
127111

128-
if (!makeNewPrepFile) {
129-
// close the input file
130-
bamReader.close();
131-
// skip creating a new output file:
132-
continue;
133-
}
134-
135112
// copy the header from the input BAM file:
136-
bamHeader.addProgramRecord(prepProg);
137113
SAMFileWriter bamWriter = sf.makeBAMWriter(bamHeader, false, outFile);
138114

139115
//bamReader.setValidationStringency(ValidationStringency.LENIENT);

src/java/umms/esat/NewESAT.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,8 @@ public class NewESAT {
9494
private static boolean stranded; // allow for unstranded analysis (defaults to stranded)
9595

9696
/* single-cell parameters */
97-
private static boolean nextPreprocess; // Nextera library reads preprocessing flag
98-
// NOTE: barcode and UMI are in the read name, separated by "_".
9997
private static boolean scPreprocess; // inDrop library reads preprocessing flag
100-
// NOTE: barcode is encoded in filename, UMIs are in the read name, separated by "_".
98+
// NOTE: barcode and UMIs are appended to the read name, with the format "<readName>:<barcode>:<UMI>".
10199
private static int bcMin; // minimum number of reads that must be observed for a barcode to be considered valid (after PCR duplicate removal)
102100

103101
/* optional AT filter */
@@ -189,6 +187,7 @@ public NewESAT(String[] args) throws IOException, ParseException, IllegalArgumen
189187
* NOTE: This was originally specific to inDrop libraries, but is now used for ALL single-cell methods
190188
*/
191189
inDropData = new InDropPreprocess(bamFiles, annotations, qFilter, qThresh, multimap, windowExtend, stranded, task, filtAT, filtAtN);
190+
// replace the original input file list with the pre-processed (PCR de-duplicated) files:
192191
bamFiles = inDropData.getPreprocessedFiles();
193192
// Fill in barcode counts from preprocessed files, if necessary:
194193
//int rCount = inDropData.fillBarcodeCounts();
@@ -299,7 +298,6 @@ private static boolean validateArguments(ArgumentMap argMap) throws IOException
299298
stranded = argMap.isPresent("unstranded")? false : true;
300299

301300
/* single-cell pre-processing? */
302-
nextPreprocess = argMap.isPresent("nextPrep") ? true : false;
303301
scPreprocess = argMap.isPresent("scPrep") ? true : false;
304302
bcMin = argMap.isPresent("bcMin") ? argMap.getInteger("bcMin") : 0;
305303

0 commit comments

Comments
 (0)