IGNITE-4507: Hadoop: added direct output support for combiner. This closes apache#1434.

tledkov-gridgain · devozerov · commit d596f02b1c64 · 2017-01-20T17:33:34.000+03:00
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/hadoop/HadoopTaskContext.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/hadoop/HadoopTaskContext.java
@@ -207,4 +207,14 @@ public void output(HadoopTaskOutput out) {
      * @throws IgniteCheckedException On any error in callable.
      */
     public abstract <T> T runAsJobOwner(Callable<T> c) throws IgniteCheckedException;
+
+    /**
+     * Callback invoked from mapper thread when map is finished.
+     *
+     * @throws IgniteCheckedException If failed.
+     */
+    public void onMapperFinished() throws IgniteCheckedException {
+        if (output instanceof HadoopMapperAwareTaskOutput)
+            ((HadoopMapperAwareTaskOutput)output).onMapperFinished();
+    }
 }
diff --git a/modules/hadoop/src/main/java/org/apache/ignite/internal/processors/hadoop/impl/v1/HadoopV1MapTask.java b/modules/hadoop/src/main/java/org/apache/ignite/internal/processors/hadoop/impl/v1/HadoopV1MapTask.java
@@ -30,6 +30,7 @@
 import org.apache.ignite.internal.processors.hadoop.HadoopFileBlock;
 import org.apache.ignite.internal.processors.hadoop.HadoopInputSplit;
 import org.apache.ignite.internal.processors.hadoop.HadoopJob;
+import org.apache.ignite.internal.processors.hadoop.HadoopMapperUtils;
 import org.apache.ignite.internal.processors.hadoop.HadoopTaskCancelledException;
 import org.apache.ignite.internal.processors.hadoop.HadoopTaskContext;
 import org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo;
@@ -45,7 +46,7 @@ public class HadoopV1MapTask extends HadoopV1Task {
     /**
      * Constructor.
      *
-     * @param taskInfo 
+     * @param taskInfo Taks info.
      */
     public HadoopV1MapTask(HadoopTaskInfo taskInfo) {
         super(taskInfo);
@@ -56,67 +57,79 @@ public HadoopV1MapTask(HadoopTaskInfo taskInfo) {
     @Override public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
         HadoopJob job = taskCtx.job();
 
-        HadoopV2TaskContext ctx = (HadoopV2TaskContext)taskCtx;
+        HadoopV2TaskContext taskCtx0 = (HadoopV2TaskContext)taskCtx;
 
-        JobConf jobConf = ctx.jobConf();
+        if (taskCtx.taskInfo().hasMapperIndex())
+            HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex());
+        else
+            HadoopMapperUtils.clearMapperIndex();
 
-        InputFormat inFormat = jobConf.getInputFormat();
+        try {
+            JobConf jobConf = taskCtx0.jobConf();
 
-        HadoopInputSplit split = info().inputSplit();
+            InputFormat inFormat = jobConf.getInputFormat();
 
-        InputSplit nativeSplit;
+            HadoopInputSplit split = info().inputSplit();
 
-        if (split instanceof HadoopFileBlock) {
-            HadoopFileBlock block = (HadoopFileBlock)split;
+            InputSplit nativeSplit;
 
-            nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(), EMPTY_HOSTS);
-        }
-        else
-            nativeSplit = (InputSplit)ctx.getNativeSplit(split);
+            if (split instanceof HadoopFileBlock) {
+                HadoopFileBlock block = (HadoopFileBlock)split;
 
-        assert nativeSplit != null;
+                nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(), EMPTY_HOSTS);
+            }
+            else
+                nativeSplit = (InputSplit)taskCtx0.getNativeSplit(split);
 
-        Reporter reporter = new HadoopV1Reporter(taskCtx);
+            assert nativeSplit != null;
 
-        HadoopV1OutputCollector collector = null;
+            Reporter reporter = new HadoopV1Reporter(taskCtx);
 
-        try {
-            collector = collector(jobConf, ctx, !job.info().hasCombiner() && !job.info().hasReducer(),
-                fileName(), ctx.attemptId());
+            HadoopV1OutputCollector collector = null;
 
-            RecordReader reader = inFormat.getRecordReader(nativeSplit, jobConf, reporter);
+            try {
+                collector = collector(jobConf, taskCtx0, !job.info().hasCombiner() && !job.info().hasReducer(),
+                    fileName(), taskCtx0.attemptId());
 
-            Mapper mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf);
+                RecordReader reader = inFormat.getRecordReader(nativeSplit, jobConf, reporter);
 
-            Object key = reader.createKey();
-            Object val = reader.createValue();
+                Mapper mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf);
 
-            assert mapper != null;
+                Object key = reader.createKey();
+                Object val = reader.createValue();
+
+                assert mapper != null;
 
-            try {
                 try {
-                    while (reader.next(key, val)) {
-                        if (isCancelled())
-                            throw new HadoopTaskCancelledException("Map task cancelled.");
+                    try {
+                        while (reader.next(key, val)) {
+                            if (isCancelled())
+                                throw new HadoopTaskCancelledException("Map task cancelled.");
+
+                            mapper.map(key, val, collector, reporter);
+                        }
 
-                        mapper.map(key, val, collector, reporter);
+                        taskCtx.onMapperFinished();
+                    }
+                    finally {
+                        mapper.close();
                     }
                 }
                 finally {
-                    mapper.close();
+                    collector.closeWriter();
                 }
+
+                collector.commit();
             }
-            finally {
-                collector.closeWriter();
-            }
+            catch (Exception e) {
+                if (collector != null)
+                    collector.abort();
 
-            collector.commit();
+                throw new IgniteCheckedException(e);
+            }
         }
-        catch (Exception e) {
-            if (collector != null)
-                collector.abort();
-
-            throw new IgniteCheckedException(e);
+        finally {
+            HadoopMapperUtils.clearMapperIndex();
         }
     }
 }
diff --git a/modules/hadoop/src/main/java/org/apache/ignite/internal/processors/hadoop/impl/v1/HadoopV1ReduceTask.java b/modules/hadoop/src/main/java/org/apache/ignite/internal/processors/hadoop/impl/v1/HadoopV1ReduceTask.java
@@ -23,6 +23,7 @@
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.ignite.IgniteCheckedException;
 import org.apache.ignite.internal.processors.hadoop.HadoopJob;
+import org.apache.ignite.internal.processors.hadoop.HadoopMapperUtils;
 import org.apache.ignite.internal.processors.hadoop.HadoopTaskCancelledException;
 import org.apache.ignite.internal.processors.hadoop.HadoopTaskContext;
 import org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo;
@@ -53,49 +54,63 @@ public HadoopV1ReduceTask(HadoopTaskInfo taskInfo, boolean reduce) {
     @Override public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
         HadoopJob job = taskCtx.job();
 
-        HadoopV2TaskContext ctx = (HadoopV2TaskContext)taskCtx;
+        HadoopV2TaskContext taskCtx0 = (HadoopV2TaskContext)taskCtx;
 
-        JobConf jobConf = ctx.jobConf();
-
-        HadoopTaskInput input = taskCtx.input();
-
-        HadoopV1OutputCollector collector = null;
+        if (!reduce && taskCtx.taskInfo().hasMapperIndex())
+            HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex());
+        else
+            HadoopMapperUtils.clearMapperIndex();
 
         try {
-            collector = collector(jobConf, ctx, reduce || !job.info().hasReducer(), fileName(), ctx.attemptId());
+            JobConf jobConf = taskCtx0.jobConf();
 
-            Reducer reducer;
-            if (reduce) reducer = ReflectionUtils.newInstance(jobConf.getReducerClass(),
-                jobConf);
-            else reducer = ReflectionUtils.newInstance(jobConf.getCombinerClass(),
-                jobConf);
+            HadoopTaskInput input = taskCtx.input();
 
-            assert reducer != null;
+            HadoopV1OutputCollector collector = null;
 
             try {
+                collector = collector(jobConf, taskCtx0, reduce || !job.info().hasReducer(), fileName(), taskCtx0.attemptId());
+
+                Reducer reducer;
+                if (reduce) reducer = ReflectionUtils.newInstance(jobConf.getReducerClass(),
+                    jobConf);
+                else reducer = ReflectionUtils.newInstance(jobConf.getCombinerClass(),
+                    jobConf);
+
+                assert reducer != null;
+
                 try {
-                    while (input.next()) {
-                        if (isCancelled())
-                            throw new HadoopTaskCancelledException("Reduce task cancelled.");
+                    try {
+                        while (input.next()) {
+                            if (isCancelled())
+                                throw new HadoopTaskCancelledException("Reduce task cancelled.");
+
+                            reducer.reduce(input.key(), input.values(), collector, Reporter.NULL);
+                        }
 
-                        reducer.reduce(input.key(), input.values(), collector, Reporter.NULL);
+                        if (!reduce)
+                            taskCtx.onMapperFinished();
+                    }
+                    finally {
+                        reducer.close();
                     }
                 }
                 finally {
-                    reducer.close();
+                    collector.closeWriter();
                 }
+
+                collector.commit();
             }
-            finally {
-                collector.closeWriter();
-            }
+            catch (Exception e) {
+                if (collector != null)
+                    collector.abort();
 
-            collector.commit();
+                throw new IgniteCheckedException(e);
+            }
         }
-        catch (Exception e) {
-            if (collector != null)
-                collector.abort();
-
-            throw new IgniteCheckedException(e);
+        finally {
+            if (!reduce)
+                HadoopMapperUtils.clearMapperIndex();
         }
     }
 }
diff --git a/modules/hadoop/src/main/java/org/apache/ignite/internal/processors/hadoop/impl/v2/HadoopV2Context.java b/modules/hadoop/src/main/java/org/apache/ignite/internal/processors/hadoop/impl/v2/HadoopV2Context.java
@@ -154,16 +154,6 @@ public HadoopV2Context(HadoopV2TaskContext ctx) {
         }
     }
 
-    /**
-     * Callback invoked from mapper thread when map is finished.
-     *
-     * @throws IgniteCheckedException If failed.
-     */
-    public void onMapperFinished() throws IgniteCheckedException {
-        if (output instanceof HadoopMapperAwareTaskOutput)
-            ((HadoopMapperAwareTaskOutput)output).onMapperFinished();
-    }
-
     /** {@inheritDoc} */
     @Override public OutputCommitter getOutputCommitter() {
         throw new UnsupportedOperationException();
diff --git a/modules/hadoop/src/main/java/org/apache/ignite/internal/processors/hadoop/impl/v2/HadoopV2MapTask.java b/modules/hadoop/src/main/java/org/apache/ignite/internal/processors/hadoop/impl/v2/HadoopV2MapTask.java
@@ -56,30 +56,32 @@ public HadoopV2MapTask(HadoopTaskInfo taskInfo) {
             HadoopMapperUtils.clearMapperIndex();
 
         try {
-            InputSplit nativeSplit = hadoopContext().getInputSplit();
+            HadoopV2Context hadoopCtx = hadoopContext();
+
+            InputSplit nativeSplit = hadoopCtx.getInputSplit();
 
             if (nativeSplit == null)
                 throw new IgniteCheckedException("Input split cannot be null.");
 
             InputFormat inFormat = ReflectionUtils.newInstance(jobCtx.getInputFormatClass(),
-                hadoopContext().getConfiguration());
+                hadoopCtx.getConfiguration());
 
-            RecordReader reader = inFormat.createRecordReader(nativeSplit, hadoopContext());
+            RecordReader reader = inFormat.createRecordReader(nativeSplit, hadoopCtx);
 
-            reader.initialize(nativeSplit, hadoopContext());
+            reader.initialize(nativeSplit, hadoopCtx);
 
-            hadoopContext().reader(reader);
+            hadoopCtx.reader(reader);
 
             HadoopJobInfo jobInfo = taskCtx.job().info();
 
             outputFormat = jobInfo.hasCombiner() || jobInfo.hasReducer() ? null : prepareWriter(jobCtx);
 
-            Mapper mapper = ReflectionUtils.newInstance(jobCtx.getMapperClass(), hadoopContext().getConfiguration());
+            Mapper mapper = ReflectionUtils.newInstance(jobCtx.getMapperClass(), hadoopCtx.getConfiguration());
 
             try {
-                mapper.run(new WrappedMapper().getMapContext(hadoopContext()));
+                mapper.run(new WrappedMapper().getMapContext(hadoopCtx));
 
-                hadoopContext().onMapperFinished();
+                taskCtx.onMapperFinished();
             }
             finally {
                 closeWriter();
diff --git a/modules/hadoop/src/main/java/org/apache/ignite/internal/processors/hadoop/impl/v2/HadoopV2ReduceTask.java b/modules/hadoop/src/main/java/org/apache/ignite/internal/processors/hadoop/impl/v2/HadoopV2ReduceTask.java
@@ -24,6 +24,7 @@
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.ignite.IgniteCheckedException;
 import org.apache.ignite.internal.IgniteInterruptedCheckedException;
+import org.apache.ignite.internal.processors.hadoop.HadoopMapperUtils;
 import org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo;
 
 /**
@@ -53,17 +54,27 @@ public HadoopV2ReduceTask(HadoopTaskInfo taskInfo, boolean reduce) {
 
         JobContextImpl jobCtx = taskCtx.jobContext();
 
+        // Set mapper index for combiner tasks
+        if (!reduce && taskCtx.taskInfo().hasMapperIndex())
+            HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex());
+        else
+            HadoopMapperUtils.clearMapperIndex();
+
         try {
             outputFormat = reduce || !taskCtx.job().info().hasReducer() ? prepareWriter(jobCtx) : null;
 
             Reducer reducer;
+
             if (reduce) reducer = ReflectionUtils.newInstance(jobCtx.getReducerClass(),
                 jobCtx.getConfiguration());
             else reducer = ReflectionUtils.newInstance(jobCtx.getCombinerClass(),
                 jobCtx.getConfiguration());
 
             try {
                 reducer.run(new WrappedReducer().getReducerContext(hadoopContext()));
+
+                if (!reduce)
+                    taskCtx.onMapperFinished();
             }
             finally {
                 closeWriter();
@@ -84,6 +95,9 @@ public HadoopV2ReduceTask(HadoopTaskInfo taskInfo, boolean reduce) {
             throw new IgniteCheckedException(e);
         }
         finally {
+            if (!reduce)
+                HadoopMapperUtils.clearMapperIndex();
+
             if (err != null)
                 abort(outputFormat);
         }
diff --git a/modules/hadoop/src/main/java/org/apache/ignite/internal/processors/hadoop/impl/v2/HadoopV2TaskContext.java b/modules/hadoop/src/main/java/org/apache/ignite/internal/processors/hadoop/impl/v2/HadoopV2TaskContext.java
@@ -48,6 +48,7 @@
 import org.apache.ignite.internal.processors.hadoop.HadoopJob;
 import org.apache.ignite.internal.processors.hadoop.HadoopJobId;
 import org.apache.ignite.internal.processors.hadoop.HadoopJobProperty;
+import org.apache.ignite.internal.processors.hadoop.HadoopMapperAwareTaskOutput;
 import org.apache.ignite.internal.processors.hadoop.HadoopPartitioner;
 import org.apache.ignite.internal.processors.hadoop.HadoopSerialization;
 import org.apache.ignite.internal.processors.hadoop.HadoopSplitWrapper;
diff --git a/modules/hadoop/src/main/java/org/apache/ignite/internal/processors/hadoop/shuffle/HadoopShuffleJob.java b/modules/hadoop/src/main/java/org/apache/ignite/internal/processors/hadoop/shuffle/HadoopShuffleJob.java
@@ -182,13 +182,6 @@ public HadoopShuffleJob(T locReduceAddr, IgniteLogger log, HadoopJob job, GridUn
         boolean stripeMappers0 = get(job.info(), SHUFFLE_MAPPER_STRIPED_OUTPUT, false);
 
         if (stripeMappers0) {
-            if (job.info().hasCombiner()) {
-                log.info("Striped mapper output is disabled because it cannot be used together with combiner [jobId=" +
-                    job.id() + ']');
-
-                stripeMappers0 = false;
-            }
-
             if (!embedded) {
                 log.info("Striped mapper output is disabled becuase it cannot be used in external mode [jobId=" +
                     job.id() + ']');
diff --git a/modules/hadoop/src/main/java/org/apache/ignite/internal/processors/hadoop/shuffle/direct/HadoopDirectDataInput.java b/modules/hadoop/src/main/java/org/apache/ignite/internal/processors/hadoop/shuffle/direct/HadoopDirectDataInput.java
@@ -48,7 +48,7 @@ public HadoopDirectDataInput(byte[] buf) {
 
     /** {@inheritDoc} */
     @Override public int read() throws IOException {
-        return readByte();
+        return (int)readByte() & 0xFF;
     }
 
     /** {@inheritDoc} */
diff --git a/modules/hadoop/src/main/java/org/apache/ignite/internal/processors/hadoop/taskexecutor/HadoopRunnableTask.java b/modules/hadoop/src/main/java/org/apache/ignite/internal/processors/hadoop/taskexecutor/HadoopRunnableTask.java
diff --git a/modules/hadoop/src/test/java/org/apache/ignite/internal/processors/hadoop/impl/HadoopAbstractMapReduceTest.java b/modules/hadoop/src/test/java/org/apache/ignite/internal/processors/hadoop/impl/HadoopAbstractMapReduceTest.java
diff --git a/modules/hadoop/src/test/java/org/apache/ignite/internal/processors/hadoop/impl/HadoopMapReduceEmbeddedSelfTest.java b/modules/hadoop/src/test/java/org/apache/ignite/internal/processors/hadoop/impl/HadoopMapReduceEmbeddedSelfTest.java

Original file line number	Diff line number	Diff line change
`@@ -48,7 +48,7 @@ public HadoopDirectDataInput(byte[] buf) {`
`48`	`48`
`49`	`49`	`/** {@inheritDoc} */`
`50`	`50`	`@Override public int read() throws IOException {`
`51`		`- return readByte();`
	`51`	`+ return (int)readByte() & 0xFF;`
`52`	`52`	`}`
`53`	`53`
`54`	`54`	`/** {@inheritDoc} */`