今天大家做的一個比賽題:有6個檔案,每個檔案裡大約200w整數,每行一個找出所有檔案裡最大的一個數字
有6個檔案,每個檔案裡大約200w整數,每行一個
找出所有檔案裡最大的一個數字
實現方法:
package com.yxie.test.data;
import com.magnanimityData.test.constant.MagnanimityDataConstant;
public class MainTest {
private static final int THREAD_COUNT = 6;
/***
* 差不多2250
*
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
long beginTime = System.currentTimeMillis();
// 外圍做任務分解,把一個耗時的任務分解為多個小任務,然後通過多執行緒方式計算,最後彙總結果。
ExecuteThread[] exeThreads = new ExecuteThread[THREAD_COUNT];
for (int i = 0; i < THREAD_COUNT; i++) {
exeThreads[i] = new ExecuteThread();
exeThreads[i].setFileName(MagnanimityDataConstant.READ_MAIN_FILE_PATH + i
+ MagnanimityDataConstant.READ_MAIN_FILE_PATH_PREFIX);
exeThreads[i].start();
}
String maxTotalValue = "0";
for (int i = 0; i < exeThreads.length; i++) {
String calResult = exeThreads[i].getResult();
if (calResult != null) {
// if (maxTotalValue.compareTo(calResult) < 0) {
// maxTotalValue = exeThreads[i].getResult();
// }
if (calResult.length() > maxTotalValue.length() && calResult.charAt(0) != '-') {
maxTotalValue = calResult;
} else if (calResult.length() == maxTotalValue.length()) {
if (calResult.compareTo(maxTotalValue) > 0) {
maxTotalValue = calResult;
}
}
// System.out.println("result:" + exeThreads[i].getResult());
}
}
System.out.println("maxTotalValue:" + maxTotalValue);
System.out.println("totalTime:" + (System.currentTimeMillis() - beginTime));
}
}
public class MagnanimityDataConstant {
public static final String READ_MAIN_FILE_PATH = "/home/xieyun/test/比賽/bigdata/bigdata";
public static final String READ_MAIN_FILE_PATH_PREFIX = ".txt";
public static final String MAIN_FILE_PATH = "/home/xieyun/test/比賽/bigdata/bigdata0.txt";
public static final String MAIN_FILE_PATH1 = "/home/xieyun/test/比賽/bigdata/bigdata1.txt";
public static final String MAIN_FILE_PATH2 = "/home/xieyun/test/比賽/bigdata/bigdata2.txt";
public static final String MAIN_FILE_PATH3 = "/home/xieyun/test/比賽/bigdata/bigdata3.txt";
public static final String MAIN_FILE_PATH4 = "/home/xieyun/test/比賽/bigdata/bigdata4.txt";
public static final String MAIN_FILE_PATH5 = "/home/xieyun/test/比賽/bigdata/bigdata5.txt";
}
package com.yxie.test.data;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
public class ExecuteThread extends Thread {
private volatile boolean isCompile = false;
private String maxValue;
private String fileName;
public String getFileName() {
return fileName;
}
public void setFileName(String fileName) {
this.fileName = fileName;
}
public synchronized String getResult() {
while (!isCompile) {
try {
this.wait();
} catch (InterruptedException e) {
}
}
return maxValue;
}
private synchronized void compileResult() {
isCompile = true;
this.notifyAll();
}
@Override
public void run() {
String maxVal = readPartFile();
this.maxValue = maxVal;
compileResult();
}
private String readPartFile() {
BufferedReader in = null;
String maxVal = "0";
try {
in = new BufferedReader(new FileReader(fileName));
String line = null;
while ((line = in.readLine()) != null) {
// System.out.println("line:"+line);
// if (maxVal.compareTo(line) < 0) {
// maxVal = line;
// }
if (line.length() > maxVal.length() && line.charAt(0) != '-') {
maxVal = line;
} else if (line.length() == maxVal.length()) {
if (line.compareTo(maxVal) > 0) {
maxVal = line;
}
}
}
} catch (IOException e) {
e.printStackTrace();
} finally {
if (in != null) {
try {
in.close();
} catch (IOException e) {
}
}
}
return maxVal;
}
}
但是,其中一個同事的執行效率比我們的都要高一些,後來分析發現:原來是我們比較方式有問題。我們以為儲存的是整數,所以就採用將字串轉換為Integer,然後進行比較。其實將整數字符串直接通過compareTo比較效能比Integer a = Integer.valueOf(b);if(a < b)這種方式好很多。200w資料,大約能節約30ms左右。
其實String.compareTo方法原始碼看,實現原理為: 將字串拆為單個字元char。然後在對每個字元進行比較。也就是按照ASCII碼比較。
Integer.valueOf會新建一個Integer。然後將字串拆分為每個字元。然後進行一系列校驗每個字元是否是整數。然後進行拼接。
所以以後要注意:整數字符串比較比轉換為Integer要好。