@@ -291,7 +291,13 @@ public static boolean isLocalMaster() {
291
291
@ SuppressWarnings ("unchecked" )
292
292
public JavaPairRDD <MatrixIndexes ,MatrixBlock > getBinaryBlockRDDHandleForVariable ( String varname ) {
293
293
return (JavaPairRDD <MatrixIndexes ,MatrixBlock >)
294
- getRDDHandleForVariable ( varname , InputInfo .BinaryBlockInputInfo );
294
+ getRDDHandleForVariable ( varname , InputInfo .BinaryBlockInputInfo , -1 );
295
+ }
296
+
297
+ @ SuppressWarnings ("unchecked" )
298
+ public JavaPairRDD <MatrixIndexes ,MatrixBlock > getBinaryBlockRDDHandleForVariable ( String varname , int numParts ) {
299
+ return (JavaPairRDD <MatrixIndexes ,MatrixBlock >)
300
+ getRDDHandleForVariable ( varname , InputInfo .BinaryBlockInputInfo , numParts );
295
301
}
296
302
297
303
/**
@@ -304,15 +310,19 @@ public JavaPairRDD<MatrixIndexes,MatrixBlock> getBinaryBlockRDDHandleForVariable
304
310
@ SuppressWarnings ("unchecked" )
305
311
public JavaPairRDD <Long ,FrameBlock > getFrameBinaryBlockRDDHandleForVariable ( String varname ) {
306
312
JavaPairRDD <Long ,FrameBlock > out = (JavaPairRDD <Long ,FrameBlock >)
307
- getRDDHandleForVariable ( varname , InputInfo .BinaryBlockInputInfo );
313
+ getRDDHandleForVariable ( varname , InputInfo .BinaryBlockInputInfo , - 1 );
308
314
return out ;
309
315
}
310
316
311
317
public JavaPairRDD <?,?> getRDDHandleForVariable ( String varname , InputInfo inputInfo ) {
318
+ return getRDDHandleForVariable (varname , inputInfo , -1 );
319
+ }
320
+
321
+ public JavaPairRDD <?,?> getRDDHandleForVariable ( String varname , InputInfo inputInfo , int numParts ) {
312
322
Data dat = getVariable (varname );
313
323
if ( dat instanceof MatrixObject ) {
314
324
MatrixObject mo = getMatrixObject (varname );
315
- return getRDDHandleForMatrixObject (mo , inputInfo );
325
+ return getRDDHandleForMatrixObject (mo , inputInfo , numParts );
316
326
}
317
327
else if ( dat instanceof FrameObject ) {
318
328
FrameObject fo = getFrameObject (varname );
@@ -323,16 +333,12 @@ else if( dat instanceof FrameObject ) {
323
333
}
324
334
}
325
335
326
- /**
327
- * This call returns an RDD handle for a given matrix object. This includes
328
- * the creation of RDDs for in-memory or binary-block HDFS data.
329
- *
330
- * @param mo matrix object
331
- * @param inputInfo input info
332
- * @return JavaPairRDD handle for a matrix object
333
- */
334
- @ SuppressWarnings ("unchecked" )
335
336
public JavaPairRDD <?,?> getRDDHandleForMatrixObject ( MatrixObject mo , InputInfo inputInfo ) {
337
+ return getRDDHandleForMatrixObject (mo , inputInfo , -1 );
338
+ }
339
+
340
+ @ SuppressWarnings ("unchecked" )
341
+ public JavaPairRDD <?,?> getRDDHandleForMatrixObject ( MatrixObject mo , InputInfo inputInfo , int numParts ) {
336
342
//NOTE: MB this logic should be integrated into MatrixObject
337
343
//However, for now we cannot assume that spark libraries are
338
344
//always available and hence only store generic references in
@@ -366,7 +372,7 @@ else if( mo.isDirty() || mo.isCached(false) )
366
372
}
367
373
else { //default case
368
374
MatrixBlock mb = mo .acquireRead (); //pin matrix in memory
369
- rdd = toMatrixJavaPairRDD (sc , mb , (int )mo .getNumRowsPerBlock (), (int )mo .getNumColumnsPerBlock ());
375
+ rdd = toMatrixJavaPairRDD (sc , mb , (int )mo .getNumRowsPerBlock (), (int )mo .getNumColumnsPerBlock (), numParts );
370
376
mo .release (); //unpin matrix
371
377
_parRDDs .registerRDD (rdd .id (), OptimizerUtils .estimatePartitionedSizeExactSparsity (mc ), true );
372
378
}
@@ -657,16 +663,11 @@ public void setRDDHandleForVariable(String varname, JavaPairRDD<?,?> rdd) {
657
663
obj .setRDDHandle ( rddhandle );
658
664
}
659
665
660
- /**
661
- * Utility method for creating an RDD out of an in-memory matrix block.
662
- *
663
- * @param sc java spark context
664
- * @param src matrix block
665
- * @param brlen block row length
666
- * @param bclen block column length
667
- * @return JavaPairRDD handle to matrix block
668
- */
669
666
public static JavaPairRDD <MatrixIndexes ,MatrixBlock > toMatrixJavaPairRDD (JavaSparkContext sc , MatrixBlock src , int brlen , int bclen ) {
667
+ return toMatrixJavaPairRDD (sc , src , brlen , bclen , -1 );
668
+ }
669
+
670
+ public static JavaPairRDD <MatrixIndexes ,MatrixBlock > toMatrixJavaPairRDD (JavaSparkContext sc , MatrixBlock src , int brlen , int bclen , int numParts ) {
670
671
long t0 = DMLScript .STATISTICS ? System .nanoTime () : 0 ;
671
672
List <Tuple2 <MatrixIndexes ,MatrixBlock >> list = null ;
672
673
@@ -681,7 +682,9 @@ public static JavaPairRDD<MatrixIndexes,MatrixBlock> toMatrixJavaPairRDD(JavaSpa
681
682
.collect (Collectors .toList ());
682
683
}
683
684
684
- JavaPairRDD <MatrixIndexes ,MatrixBlock > result = sc .parallelizePairs (list );
685
+ JavaPairRDD <MatrixIndexes ,MatrixBlock > result = (numParts > 1 ) ?
686
+ sc .parallelizePairs (list , numParts ) : sc .parallelizePairs (list );
687
+
685
688
if (DMLScript .STATISTICS ) {
686
689
Statistics .accSparkParallelizeTime (System .nanoTime () - t0 );
687
690
Statistics .incSparkParallelizeCount (1 );
0 commit comments