Skip to content

Commit a56e951

Browse files
author
Qianxi Zhang
committed
add some comments for the mapreduce example code
1 parent 928a1a2 commit a56e951

File tree

4 files changed

+17
-11
lines changed

4 files changed

+17
-11
lines changed

hadoop/src/main/java/cn/chinahadoop/mapreduce/Grep.java

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,18 +29,19 @@ public int run(String[] args) throws Exception {
2929
ToolRunner.printGenericCommandUsage(System.out);
3030
return 2;
3131
}
32-
32+
// the temp dir between two mapreduce jobs
3333
Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
3434

3535
Configuration conf = getConf();
3636
conf.set(RegexMapper.PATTERN, args[2]);
3737
if (args.length == 4)
3838
conf.set(RegexMapper.GROUP, args[3]);
39-
39+
//the first job
40+
// word count
4041
Job grepJob = new Job(conf);
4142

4243
try {
43-
44+
//define the first job
4445
grepJob.setJobName("grep-search");
4546

4647
FileInputFormat.setInputPaths(grepJob, args[0]);
@@ -49,22 +50,23 @@ public int run(String[] args) throws Exception {
4950

5051
grepJob.setCombinerClass(LongSumReducer.class);
5152
grepJob.setReducerClass(LongSumReducer.class);
52-
53+
// output to tempDir
5354
FileOutputFormat.setOutputPath(grepJob, tempDir);
5455
grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
5556
grepJob.setOutputKeyClass(Text.class);
5657
grepJob.setOutputValueClass(LongWritable.class);
57-
58+
// result: word + count
5859
grepJob.waitForCompletion(true);
59-
60+
//the second job
61+
//sort
6062
Job sortJob = new Job(conf);
6163
sortJob.setJobName("grep-sort");
62-
64+
//tempDir to input
6365
FileInputFormat.setInputPaths(sortJob, tempDir);
6466
sortJob.setInputFormatClass(SequenceFileInputFormat.class);
6567

6668
sortJob.setMapperClass(InverseMapper.class);
67-
69+
//just write the sort data out
6870
sortJob.setNumReduceTasks(1); // write a single file
6971
FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
7072
sortJob.setSortComparatorClass( // sort by decreasing freq

hadoop/src/main/java/cn/chinahadoop/mapreduce/InvertedIndex.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,10 @@ public static void main(String[] args) throws Exception {
8080
job.setInputFormatClass(TextInputFormat.class);
8181
job.setOutputFormatClass(TextOutputFormat.class);
8282

83-
FileInputFormat.addInputPath(job, new Path(args[0]));
84-
FileOutputFormat.setOutputPath(job, new Path(args[1]));
83+
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
84+
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
8585

86-
Path outputPath = new Path(args[1]);
86+
Path outputPath = new Path(otherArgs[1]);
8787

8888
outputPath.getFileSystem(conf).delete(outputPath);
8989

hadoop/src/main/java/cn/chinahadoop/mapreduce/JobFailureTest.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritab
2525
private Text word = new Text();
2626

2727
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
28+
//get the task id from context
29+
//for the first map task(task id=0), the task always fails
2830
int id = context.getTaskAttemptID().getTaskID().getId();
2931
System.out.println("id:" + id);
3032
if (id == 0)

hadoop/src/main/java/cn/chinahadoop/mapreduce/TaskAttemptTest.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritab
2525
private Text word = new Text();
2626

2727
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
28+
//get the task attempt id
29+
//for the 4 previous attempt for the task, the attempt always fails.
2830
int id = context.getTaskAttemptID().getId();
2931
System.out.println("id:" + id);
3032
if (id < 4)

0 commit comments

Comments
 (0)