倒排索引建立案例
阿新 • • 發佈:2018-12-14
重點注意:
-
1.FileSplit split = (FileSplit) context.getInputSplit();
String fileName = split.getPath().getName();
//獲取檔案的名字
-
2.context.write(new Text(split1[0]),new Text(split1[1].replaceAll("\t","–>")));
replaceAll替換所有
-
3.StringBuffer stringBuffer = new StringBuffer();
stringBuffer.append(value.toString());
//字串追加
第一次編寫類:
public class InvertedOne { public static class OneMapper extends Mapper<LongWritable,Text,Text,IntWritable>{ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { FileSplit split = (FileSplit) context.getInputSplit(); String fileName = split.getPath().getName(); String string = value.toString(); String[] split1 = string.split(" "); for (String s : split1) { context.write(new Text(s + "-"+fileName),new IntWritable(1)); } } } public static class OneReducer extends Reducer<Text,IntWritable,Text,IntWritable>{ @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int count = 0; for (IntWritable value : values) { count += value.get(); } context.write(new Text(key),new IntWritable(count)); } } }
第一次測試類:
public class OneTest { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(OneTest.class); job.setMapperClass(InvertedOne.OneMapper.class); job.setReducerClass(InvertedOne.OneReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); File file = new File("某盤output1"); if (file.exists()){ FileUtils.deleteDirectory(file); } FileInputFormat.setInputPaths(job,new Path("某盤input")); FileOutputFormat.setOutputPath(job,new Path("某盤output1")); job.setNumReduceTasks(1); boolean b = job.waitForCompletion(true); System.exit(b ? 0 : 1); } }
第二次編寫類:
public class InvertedTwo {
public static class TwoMapper extends Mapper<LongWritable,Text,Text,Text>{
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
FileSplit split = (FileSplit) context.getInputSplit();
String fileName = split.getPath().getName();
String[] split1 = value.toString().split("-");
context.write(new Text(split1[0]),new Text(split1[1].replaceAll("\t","-->")));
}
}
public static class TwoReducer extends Reducer<Text,Text,Text,Text>{
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
StringBuffer stringBuffer = new StringBuffer();
for (Text value : values) {
stringBuffer.append(value.toString());
}context.write(new Text(key),new Text(stringBuffer.toString()));
}
}
}
第二次測試類:
public class Twotest {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(Twotest.class);
job.setMapperClass(InvertedTwo.TwoMapper.class);
job.setReducerClass(InvertedTwo.TwoReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
File file = new File("某盤output2");
if (file.exists()){
FileUtils.deleteDirectory(file);
}
FileInputFormat.setInputPaths(job,new Path("某盤output1"));
FileOutputFormat.setOutputPath(job,new Path("某盤output2"));
job.setNumReduceTasks(1);
boolean b = job.waitForCompletion(true);
System.exit(b ? 0 : 1);
}
}