hadoop讀寫hdfs和操作hbase，把hbase內容按group by排序

阿新 • • 發佈：2019-02-05

package org.ucas.hbase;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.Map;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;

public class Hw1Grp2 {
	
	//hbase 表名
	private static final String TABLE_NAME = "Result";
       //列簇名
	private static final String COLMUN_FAMILY = "res";
	private HTable table;
    public HTable getTable() {
		return table;
	}
	public void setTable(HTable table) {
		this.table = table;
	}
	
        public BufferedReader readHdfs(String file) throws IOException, URISyntaxException{
		
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(URI.create(file), conf);
		Path path = new Path(file);
		FSDataInputStream inStream = fs.open(path);
		BufferedReader in = new BufferedReader(new InputStreamReader(inStream));
		return in;
	}

	public HTable createTable(String tableName) throws MasterNotRunningException,
              ZooKeeperConnectionException, IOException{
            Configuration configuration = HBaseConfiguration.create();
            HBaseAdmin hAdmin = new HBaseAdmin(configuration);
            if(hAdmin.tableExists(tableName)) {
               System.out.println("table is exists, delete exists table");
               hAdmin.disableTable(tableName);
               hAdmin.deleteTable(tableName);
            } else {
               System.out.println("table not exists");
            }
            HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName));
            HColumnDescriptor cf = new HColumnDescriptor(COLMUN_FAMILY);
            htd.addFamily(cf);
            hAdmin.createTable(htd);
            hAdmin.close();
            System.out.println("table create");
            return new HTable(configuration,tableName);
       }
       public void insert(String rowKey, String family, String qualifier, String value) throws IOException {
		Put put = new Put(rowKey.getBytes());
		put.add(family.getBytes(),qualifier.getBytes(),value.getBytes());
		table.put(put);
       }
	public void handleData(String file, int rowKey, Map<String, Integer> args) throws IOException, URISyntaxException {
		   String colStr = null;
		   BufferedReader buffer = readHdfs(file);
		   
		   //rowKey和count雜湊表
		   Map<String, Integer> mapCount = new HashMap<String, Integer>();
		   
		   //rowKey 的某列sum雜湊表
		   Map<String, Integer> mapSum = new HashMap<String, Integer>();
		   
		   //max雜湊表
		   Map<String, Integer> mapMax = new HashMap<String, Integer>();
		   
		   //avg雜湊表
		   Map<String, Float> mapAvg = new HashMap<String, Float>();
		   
		   //min雜湊表
		   Map<String, Integer> mapMin = new HashMap<String, Integer>();
		   int maxCol = -1, avgCol = -1, sumCol = -1, minCol = -1, countCol = -1;
		   
		   //根據傳進來的引數設定需要進行的聚合函式
		   if(args.containsKey("count")) {
			   countCol = args.get("count");
		   }
		   if(args.containsKey("avg")) {
			   avgCol = args.get("avg");
		   }
		   if(args.containsKey("max")) {
			   maxCol = args.get("max");
		   }
		   if(args.containsKey("sum")) {
			   sumCol = args.get("sum");
		   }
		   if(args.containsKey("min")) {
			   minCol = args.get("min");
		   }
		   //算出需要用到的聚合函式
		   String str;
		   while((str = buffer.readLine()) != null) {
			   String[] col = str.split("\\|");
			   if(mapCount.containsKey(col[rowKey])) {
					 mapCount.put(col[rowKey], mapCount.get(col[rowKey]) +1 );
			   } else {
					 mapCount.put(col[rowKey], 1);
			   }
			   if(sumCol != -1) {
				   if(mapSum.containsKey(col[rowKey])) {
					   mapSum.put(col[rowKey], mapSum.get(col[rowKey]) +Integer.parseInt(col[sumCol]) );
				   } else {
					   mapSum.put(col[rowKey], Integer.parseInt(col[sumCol]));
				   }
			   }
			   if(avgCol != -1) {
				   if(mapAvg.containsKey(col[rowKey])) {
					   mapAvg.put(col[rowKey], mapAvg.get(col[rowKey]) +Float.parseFloat(col[avgCol]) );
				   } else {
					   mapAvg.put(col[rowKey], Float.parseFloat(col[avgCol]));
				   }
			   }
			   if(maxCol != -1) {
				   if(mapMax.containsKey(col[rowKey])) {
					   if(Integer.parseInt(col[maxCol]) > mapMax.get(col[rowKey]))
					      mapMax.put(col[rowKey], Integer.parseInt(col[maxCol]));
				   } else {
					   mapMax.put(col[rowKey], Integer.parseInt(col[maxCol]));
				   }
			   }
			   if(minCol != -1) {
				   if(mapMin.containsKey(col[rowKey])) {
					   if(Integer.parseInt(col[minCol]) < mapMin.get(col[rowKey]))
					      mapMin.put(col[rowKey], Integer.parseInt(col[minCol]));
				   } else {
					   mapMin.put(col[rowKey], Integer.parseInt(col[minCol]));
				   }
			   }
		   }
		   //從hashmap中插入資料表
		   for(String key : mapCount.keySet()) {
                     if(countCol != -1) {
            	     colStr = "count";
            	       insert(key, "res", colStr, mapCount.get(key) + "");
	           }
			   if(avgCol != -1) {
				 colStr = "avg(R" + avgCol + ")";
				 mapAvg.put(key, (float)Math.round(mapAvg.get(key)/mapCount.get(key)*100)/100);
				 insert(key, "res", colStr, mapAvg.get(key) + "");
			   }
			   if(maxCol != -1) {
				 colStr = "max(R" + maxCol + ")";
				 insert(key, "res", colStr, mapMax.get(key) + "");
			   }
			   if(minCol != -1) {
				 colStr = "min(R" + minCol + ")";
				 insert(key, "res", colStr, mapMin.get(key) + "");
			   }
			   if(sumCol != -1) {
				 colStr = "sum(R" + sumCol + ")";
				 insert(key, "res", colStr, mapSum.get(key) + "");
			   }
		   }
		   System.out.println("handle data success");
	}
	public static void main(String[] args) throws IOException, URISyntaxException {
		/**
		 * 命令引數解析，解析出檔名，group by的列，需要求的聚合函式
		 */
		if(args.length != 3) {
			System.out.println("input args length error");
			System.exit(0);
		}
		String file = StringUtils.substringAfter(args[0], "=");
		if(file == null) {
			System.out.println("args error");
			System.exit(0);
		}
		String keyNum = StringUtils.substringAfter(args[1], "R");
		if(keyNum  == null) {
			System.out.println("args error");
			System.exit(0);
		}
		int rowKey = Integer.parseInt(keyNum);
		
		String colsName = StringUtils.substringAfter(args[2], ":");
		if(colsName == null) {
			System.out.println("args error");
			System.exit(0);
		}
		String[] cmdStr = colsName.split(",");
		Map<String, Integer> cmd = new HashMap<String, Integer>();
		for(int i = 0; i < cmdStr.length; i++) {
			if(!cmdStr[i].equals("count")) {
			    cmd.put(StringUtils.substringBefore(cmdStr[i], "("), Integer.parseInt(StringUtils.substringBetween(cmdStr[i],"R", ")")));
			} else {
				cmd.put(cmdStr[i], rowKey);
			}
		}
		System.out.println("file:" + file);
		for(String key : cmd.keySet()) {
			System.out.println(key + ":" + cmd.get(key));
		}
		Hw1Grp2 h = new Hw1Grp2();
		h.setTable(h.createTable(TABLE_NAME));
		h.handleData(file, rowKey, cmd);
		System.out.println("program is over");
	}
}

hadoop讀寫hdfs和操作hbase，把hbase內容按group by排序

package org.ucas.hbase; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import jav

關於hadoop HDFS進行讀寫文件操作的問題

close config system inf () str on() exception oca 問題： java無法鏈接報錯顯示被拒絕鏈接剛開始一直以為是自己hadoop沒有配好（或者自己的jar包沒有導入好），開始就走偏了導致時間浪費原因是：hadoop沒有開

自旋鎖，讀寫鎖和順序鎖的實現原理

並且保護表達 min 返回 create creat rwlock ini 常用的同步原語鎖，到多核處理器時代鎖已經是必不可少的同步方式之一了。無論設計多優秀的多線程數據結構，都避不開有競爭的臨界區，此時高效的鎖顯得至關重要。鎖的顆粒度是框架/程序設計者所關註的，

hadoop讀寫操作

新建專案：匯入libs：再hadoop解壓下的庫 2.7.2： https://download.csdn.net/download/ssllkkyyaa/10758406 檔案api測試： package com.example.demo; import

Hbase讀寫流程和定址機制

寫操作流程 (1) Client通過Zookeeper的排程，向RegionServer發出寫資料請求，在Region中寫資料。 (2) 資料被寫入Region的MemStore，直到MemStore達到預設閾值。 (3) MemStore中的資料被Flush成一個StoreFile。 (4) 隨著S

Spark -14：spark Hadoop 高可用模式下讀寫hdfs

第一種，通過配置檔案 val sc = new SparkContext() sc.hadoopConfiguration.set("fs.defaultFS", "hdfs://cl

Spark WordCount 讀寫hdfs檔案 (read file from hadoop hdfs and write output to hdfs)

create a scala project and a WordCount class as follow: package com.qiurc.test import org.apache.spark._ import SparkContext._ o

HBase建表高階屬性，hbase應用案例看行鍵設計，HBase和mapreduce結合，從Hbase中讀取資料、分析，寫入hdfs，從hdfs中讀取資料寫入Hbase，協處理器和二級索引

1. Hbase高階應用 1.1建表高階屬性下面幾個shell 命令在hbase操作中可以起到很到的作用，且主要體現在建表的過程中，看下面幾個create 屬性 1、 BLOOMFILTER 預設是NONE 是否使用布隆過慮及使用何種方式布隆

同步與互斥，讀寫鎖和互斥鎖

原文出處：http://blog.csdn.NET/u012884354/article/details/46691761 相交程序之間的關係主要有兩種，同步與互斥。所謂互斥，是指散佈在不同程序之間的若干程式片斷，當某個程序執行其中一個程式片段時，其它程序就不能執行它們之

Spring配置動態數據源-讀寫分離和多數據源

brush ride 常開 resolve ttr 表達 customer 事務管理 cda 　　在現在互聯網系統中，隨著用戶量的增長，單數據源通常無法滿足系統的負載要求。因此為了解決用戶量增長帶來的壓力，在數據庫層面會采用讀寫分離技術和數據庫拆分等技術。讀寫分離就是就是一

Go(day7 [終端讀寫| 文件操作 | 命令行參數 | Json序列化])

命令行 family lar defer copy dal tput count trunc 終端讀寫操作終端相關文件句柄常量os.Stdin:標準輸入os.Stdout:標準輸出os.Stderr:標準錯誤輸出終端讀寫示例://Sscanf 是從變量中讀取值package

Python讀寫文件操作

Python讀寫文件操作linux系統代碼實例：import os #導入模塊f = open('/root/xxoo.txt','w+') #在'/root路徑下創建xxoo.txt文件，並加入讀寫模式 w是寫模式， +是讀/寫模式f.write

python中文件讀寫的相關操作及應用

img write {} list r+ 密碼 else data adl 1：既讀又寫 1 with open(‘數據‘,mode=‘r+‘,encoding=‘utf-8‘) as f: 2 print(f.read()) #輸出結

阿里P8架構師談：資料庫分庫分表、讀寫分離的原理實現，使用場景

為什麼要分庫分表和讀寫分離？類似淘寶網這樣的網站，海量資料的儲存和訪問成為了系統設計的瓶頸問題，日益增長的業務資料，無疑對資料庫造成了相當大的負載，同時對於系統的穩定性和擴充套件性提出很高的要求。隨著時間和業務的發展，資料庫中的表會越來越多，表中的資料量也會越來越大，相應地，

tp5.0主從資料庫讀寫分離和主從理解

在配置中設定 'hostname' => '伺服器地址,伺服器地址', 'database' => 'linux', // 使用者名稱 'username' => 'root,root', // 密碼 'password' => '123,123456',

檔案記憶體對映mmap解決大檔案快速讀寫問題和程序間共享記憶體

mmap函式主要用途有三個： 1、將一個普通檔案對映到記憶體中，通常在需要對檔案進行頻繁讀寫時使用，這樣用記憶體讀寫取代I/O讀寫，以獲得較高的效能； 2、將特殊檔案進行匿名記憶體對映，可以為關聯程序提供共享記憶體空間； 3、為無關聯的程序提供共享記憶體空間，一般也是將一個普通檔案對映到

系統學習 Java IO (十四)----字元讀寫快取和回退 BufferedReader/BufferedWriter & PushbackReader

目錄：系統學習 Java IO---- 目錄，概覽 BufferedReader BufferedReader 類構造器接收一個 Reader 物件，為 Reader 例項提供緩衝。緩衝可以加快 IO 的速度。 BufferedReader 不是一次從網路或磁碟讀取一個字元，而是一次讀取一個更大的塊。

Java 讀寫 hdfs檔案或者目錄

1.讀取單個檔案 [java] view plain copy Date date = DateUtil.getSpecifiedDayBefore(); String&

【C++】C++ 檔案讀寫 ofstream和ifstream詳細用法

此文貌似值得一讀：https://www.cnblogs.com/hdk1993/p/5853233.html 原文自：https://www.cnblogs.com/batman425/p/3179520.html 原文出自【位元網】，轉載請保留原文連結

ubuntu16.04下 python程式設計修改資料夾許可權、讀寫資料庫等操作

1. python程式設計修改資料夾許可權 import os os.system('chmod 777 ', file) 擴充套件： Linux系統中，每個使用者的角色和許可權劃分的很細緻也很嚴格，每個檔案（目錄）都設有訪問許可許可權，利用這種機制來決定某個使用者通過某種方

hadoop讀寫hdfs和操作hbase，把hbase內容按group by排序

相關推薦