本文共 4418 字,大约阅读时间需要 14 分钟。
一、前提条件
Hive表已经创建好,并且远程访问Hive的代码已写好。
Hive表结构如下:
create external table clientdata(screen string, model string, userID string, country string, province string, city string, network string, time string) row format delimited fields terminated by '|' location '/clientdata';
查询Hive的Dao如下:
package cn.edu.shu.ces.chenjie.tianyi.hive.dao.impl;import java.sql.Connection;import java.sql.PreparedStatement;import java.sql.ResultSet;import java.sql.SQLException;import java.sql.Statement;import java.text.ParseException;import java.text.SimpleDateFormat;import java.util.ArrayList;import java.util.List;import cn.edu.shu.ces.chenjie.tianyi.hive.model.ClientData;import cn.edu.shu.ces.chenjie.tianyi.hive.utils.JDBCUtils;public class ClientDataDaoHiveImpl{ private static ListresultSetToList(ResultSet rs) throws SQLException, ParseException { List list = new ArrayList (); while (rs.next()) { String screen = rs.getString("screen"); String model = rs.getString("model"); String userID = rs.getString("userID"); String country = rs.getString("country"); String province = rs.getString("province"); String city = rs.getString("city"); String network = rs.getString("network"); String time = rs.getString("time"); ClientData clientData = new ClientData(); clientData.setCity(city); clientData.setCountry(country); clientData.setModel(model); clientData.setNetwork(network); clientData.setProvince(province); clientData.setScreen(screen); SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); clientData.setTime(formatter.parse(time)); clientData.setUserID(userID); list.add(clientData); System.out.println(clientData); } return list; } /*** * 返回所有的数据 * @return */ public static List list() { List list = new ArrayList (); Connection conn = null; Statement st = null; ResultSet rs = null; String sql = "select * from clientdata"; try { // 获取连接 conn = JDBCUtils.getConnection(); // 创建运行环境 st = conn.createStatement(); // 运行HQL rs = st.executeQuery(sql); // 处理数据 list = resultSetToList(rs); } catch (Exception e) { e.printStackTrace(); } finally { JDBCUtils.release(conn, st, rs); } return list; } /**** * 返回所有的数据 * @param page 指定页数 * @param pageSize 页长 * @return */ public static List list(int page, int pageSize) { List list = new ArrayList (); Connection conn = null; PreparedStatement ps = null; ResultSet rs = null; String sql = "select * from clientdata limit ?,?"; try { // 获取连接 conn = JDBCUtils.getConnection(); // 创建运行环境 ps = conn.prepareStatement(sql); ps.setInt(1, (page -1 ) * pageSize); ps.setInt(2, pageSize); // 运行HQL rs = ps.executeQuery(); // 处理数据 list = resultSetToList(rs); } catch (Exception e) { e.printStackTrace(); } finally { JDBCUtils.release(conn, ps, rs); } return list; } public static List listByID(String userID) { List list = new ArrayList (); Connection conn = null; PreparedStatement ps = null; ResultSet rs = null; //TODO //String sql = "select * from clientdata_part where userID = ?"; String sql = "select * from clientdata where userID = ?"; try { // 获取连接 conn = JDBCUtils.getConnection(); // 创建运行环境 ps = conn.prepareStatement(sql); ps.setString(1, userID); // 运行HQL rs = ps.executeQuery(); // 处理数据 list = resultSetToList(rs); } catch (Exception e) { e.printStackTrace(); } finally { JDBCUtils.release(conn, ps, rs); } return list; } /** * @param args */ public static void main(String[] args) { list(); //list(1,10); //listByID("919a6ea2e85301b7bcbe099be5ace41f"); }}
二、使用Java操作HBase的操作已写好
三、查询Hive后使用JavaAPI向HBase中写入数据
package cn.edu.shu.ces.chenjie.tianyi.hbase.hive2hbase;import java.io.IOException;import java.util.List;import cn.edu.shu.ces.chenjie.tianyi.hbase.dao.impl.ClientDataDaoHBaseImpl;import cn.edu.shu.ces.chenjie.tianyi.hive.model.ClientData;import cn.edu.shu.ces.chenjie.tianyi.hive.dao.impl.ClientDataDaoHiveImpl;public class ClientDataFromHive2Base{ public static void main(String[] args) throws IOException { ClientDataDaoHBaseImpl.createTable();//创建HBase表 int i = 1; while(true) { Listcds = ClientDataDaoHiveImpl.list(i,100000);//每次查询100000条数据 if(cds.size() == 0)//如果列表为空说明Hive表已经遍历完,可以终止循环 break; System.out.println("查询完毕,正在保存-------------------------------------->" + i * 1000000); ClientDataDaoHBaseImpl.saveList(cds);//保存列表 i ++;//下一页 } }}
四、问题分析:
Hive与HBase若部署在同一个集群上,可能造成大量资源消耗
转载地址:http://xuqrb.baihongyu.com/