MapReduce學習寫測試
阿新 • • 發佈:2019-02-01
參考
測試方式
mrunit
mrunit 已經被退休了,很多人建議不要再使用這個進行測試
千遍一律的WordCount
public class WordCountMRUnitTest {
MapReduceDriver<LongWritable, Text, Text, IntWritable, Text, IntWritable> mapReduceDriver;
MapDriver<LongWritable, Text, Text, IntWritable> mapDriver;
ReduceDriver<Text, IntWritable, Text, IntWritable> reduceDriver;
@Before
public void setup() {
WordCountMapper mapper = new WordCountMapper();
WordCountReducer reducer = new WordCountReducer();
mapDriver = new MapDriver<LongWritable, Text, Text, IntWritable>();
mapDriver.setMapper(mapper);
reduceDriver = new ReduceDriver<Text, IntWritable, Text, IntWritable>();
reduceDriver.setReducer(reducer);
mapReduceDriver = new MapReduceDriver<LongWritable, Text, Text, IntWritable, Text, IntWritable>();
mapReduceDriver.setMapper(mapper);
mapReduceDriver.setReducer(reducer);
Configuration conf = new Configuration();
// add config here as needed
mapReduceDriver.setConfiguration(conf);
reduceDriver.setConfiguration(conf);
mapDriver.setConfiguration(conf);
}
@Test
public void testMapper() throws IOException {
mapDriver.withInput(new LongWritable(1), new Text("cat cat dog"));
mapDriver.withOutput(new Text("cat"), new IntWritable(1));
mapDriver.withOutput(new Text("cat"), new IntWritable(1));
mapDriver.withOutput(new Text("dog"), new IntWritable(1));
mapDriver.runTest();
}
@Test
public void testReducer() throws IOException {
List<IntWritable> values = new ArrayList<IntWritable>();
values.add(new IntWritable(1));
values.add(new IntWritable(1));
reduceDriver.withInput(new Text("cat"), values);
reduceDriver.withOutput(new Text("cat"), new IntWritable(2));
reduceDriver.runTest();
}
@Test
public void testMapReduce() throws IOException {
mapReduceDriver.withInput(new LongWritable(1), new Text("cat cat dog"));
mapReduceDriver.addOutput(new Text("cat"), new IntWritable(2));
mapReduceDriver.addOutput(new Text("dog"), new IntWritable(1));
mapReduceDriver.runTest();
}
}
junit
對map,reduce和Driver分別測試,主要用到了Mockito
public class WordCountMapperTest {
private WordCountMapper mapper;
private Mapper.Context context;
private IntWritable one;
@Before
public void init() throws IOException, InterruptedException {
mapper = new WordCountMapper();
context = mock(Mapper.Context.class); //
mapper.word = mock(Text.class);
one = new IntWritable(1);
}
@Test
public void testSingleWord() throws IOException, InterruptedException {
mapper.map(new LongWritable(1L), new Text("foo"), context);
InOrder inOrder = inOrder(mapper.word, context);
assertCountedOnce(inOrder, "foo");
}
@Test
public void testMultipleWords() throws IOException, InterruptedException {
mapper.map(new LongWritable(1L), new Text("one two three four"), context);
InOrder inOrder = inOrder(mapper.word, context, mapper.word, context, mapper.word, context, mapper.word, context);
assertCountedOnce(inOrder, "one");
assertCountedOnce(inOrder, "two");
assertCountedOnce(inOrder, "three");
assertCountedOnce(inOrder, "four");
}
private void assertCountedOnce(InOrder inOrder, String w) throws IOException, InterruptedException {
inOrder.verify(mapper.word).set(eq(w));
inOrder.verify(context).write(eq(mapper.word), eq(one));
}
}
public class WordCountReducerTest {
private WordCountReducer reducer;
private Reducer.Context context;
@Before
public void init() throws IOException, InterruptedException {
reducer = new WordCountReducer();
context = mock(Reducer.Context.class);
}
@Test
public void testSingleWord() throws IOException, InterruptedException {
List<IntWritable> values = Arrays.asList(new IntWritable(1), new IntWritable(4), new IntWritable(7));
reducer.reduce(new Text("foo"), values, context);
verify(context).write(new Text("foo"), new IntWritable(12));
}
}
public class WordCountDriverTest {
private Configuration conf;
private Path input;
private Path output;
private FileSystem fs;
@Before
public void setup() throws IOException {
conf = new Configuration();
conf.set("fs.default.name", "file:///");
conf.set("mapred.job.tracker", "local");
input = new Path("src/test/resources/input");
output = new Path("target/output");
fs = FileSystem.getLocal(conf);
fs.delete(output, true);
}
@Test
public void test() throws Exception {
WordCount wordCount = new WordCount();
wordCount.setConf(conf);
int exitCode = wordCount.run(new String[] {input.toString(), output.toString()});
assertEquals(0, exitCode);
validateOuput();
}
private void validateOuput() throws IOException {
InputStream in = null;
try {
in = fs.open(new Path("target/output/part-r-00000"));
BufferedReader br = new BufferedReader(new InputStreamReader(in));
assertEquals("five\t1", br.readLine());
assertEquals("four\t1", br.readLine());
assertEquals("one\t3", br.readLine());
assertEquals("six\t1", br.readLine());
assertEquals("three\t1", br.readLine());
assertEquals("two\t2", br.readLine());
} finally {
IOUtils.closeStream(in);
}
}
}
在hadoop本身測試基礎上
hadoop說在0.21版本之前,利用MiniDFSCluster模擬hdfs叢集環境,看原始碼可以看到很多例子,比如TeraSort的tests;
之後,hadoop發行版中引入了一個新的測試框架,Large-Scale Automated Test Framework,該框架跟以前的測試框架不同之處在於,基於它之上的測試的開發是基於真正的叢集環境的系統層面的,取名叫做Herriot。
將TeraSort的程式碼和相關測試抽出來嘗試其測試過程,過程中發現很多類通過maven引不進來,比如MiniDFSCluster,後來就直接匯入相關jar包:hadoop-hdfs-2.8.3-tests.jar,該jar包在安裝的hadoop目錄的share檔案中可以找到。
package TeraSort;
import java.io.File;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileAlreadyExistsException;
import org.apache.hadoop.mapred.HadoopTestCase;
import org.apache.hadoop.util.ToolRunner;
import org.junit.After;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
public class TestTeraSort extends HadoopTestCase {
private static Log LOG = LogFactory.getLog(TestTeraSort.class);
public TestTeraSort()
throws IOException {
super(LOCAL_MR, LOCAL_FS, 1, 1);
}
@After
public void tearDown() throws Exception {
getFileSystem().delete(TEST_DIR, true);
super.tearDown();
}
// Input/Output paths for sort
private static final Path TEST_DIR = new Path(new File(
System.getProperty("test.build.data", "/tmp"), "terasort")
.getAbsoluteFile().toURI().toString());
private static final Path SORT_INPUT_PATH = new Path(TEST_DIR, "sortin");
private static final Path SORT_OUTPUT_PATH = new Path(TEST_DIR, "sortout");
private static final Path TERA_OUTPUT_PATH = new Path(TEST_DIR, "validate");
private static final String NUM_ROWS = "100";
private void runTeraGen(Configuration conf, Path sortInput) throws Exception {
String[] genArgs = {NUM_ROWS, sortInput.toString()};
// Run TeraGen
assertEquals(ToolRunner.run(conf, new TeraGen(), genArgs), 0);
}
private void runTeraSort(Configuration conf, Path sortInput, Path sortOutput) throws Exception {
// Setup command-line arguments to 'sort'
String[] sortArgs = {sortInput.toString(), sortOutput.toString()};
// Run Sort
assertEquals(ToolRunner.run(conf, new TeraSort(), sortArgs), 0);
}
private void runTeraValidator(Configuration job,Path sortOutput, Path valOutput) throws Exception {
String[] svArgs = {sortOutput.toString(), valOutput.toString()};
// Run Tera-Validator
assertEquals(ToolRunner.run(job, new TeraValidate(), svArgs), 0);
}
@Test
public void testTeraSort() throws Exception {
// Run TeraGen to generate input for 'terasort'
runTeraGen(createJobConf(), SORT_INPUT_PATH);
// Run teragen again to check for FAE
try {
runTeraGen(createJobConf(), SORT_INPUT_PATH);
fail("Teragen output overwritten!");
} catch (FileAlreadyExistsException fae) {
LOG.info("Expected exception: ", fae);
}
// Run terasort
runTeraSort(createJobConf(), SORT_INPUT_PATH, SORT_OUTPUT_PATH);
// Run terasort again to check for FAE
try {
runTeraSort(createJobConf(), SORT_INPUT_PATH, SORT_OUTPUT_PATH);
fail("Terasort output overwritten!");
} catch (FileAlreadyExistsException fae) {
LOG.info("Expected exception: ", fae);
}
// Run tera-validator to check if sort worked correctly
runTeraValidator(createJobConf(), SORT_OUTPUT_PATH,
TERA_OUTPUT_PATH);
}
@Test
public void testTeraSortWithLessThanTwoArgs() throws Exception {
String[] args = new String[1];
assertEquals(new TeraSort().run(args), 2);
}
}
測試率覆蓋
idea本身自帶有測試覆蓋率的,run test時選擇run “*Test*” with Coverage
學習過程中遇到的問題
編碼錯誤
@Override is not allowed when implementing interface method
Project Structure -> Modules -> Language level 選擇6
powermockito和mockito版本問題
錯誤1:java.lang.NoSuchMethodError:
org.mockito.internal.creation.MockSettingsImpl.setMockName(Lorg/mockito/mock/MockName;)Lorg/mockito/internal/creation/settings/CreationSettings;
錯誤2:java.lang.NoSuchMethodError:
org.powermock.reflect.internal.WhiteboxImpl.getOriginalUnmockedType(Ljava/lang/Class;)Ljava/lang/Class;
Mockito | PowerMock |
---|---|
2.0.0-beta - 2.0.42-beta | 1.6.5+ |
1.10.19 | 1.6.4 |
1.10.8 - 1.10.x | 1.6.2+ |
1.9.5-rc1 - 1.9.5 | 1.5.0 - 1.5.6 |
1.9.0-rc1 & 1.9.0 | 1.4.10 - 1.4.12 |
1.8.5 | 1.3.9 - 1.4.9 |
1.8.4 | 1.3.7 & 1.3.8 |
1.8.3 | 1.3.6 |
1.8.1 & 1.8.2 | 1.3.5 |
1.8 | 1.3 |
1.7 | 1.2.5 |
自己建的目錄不能new java類
點選該目錄,右鍵 -> Mark Directory as -> Sources Root