Skip to content

Commit dfd7e28

Browse files
author
Zheng Tan
committed
add hive README
1 parent 34ca95b commit dfd7e28

File tree

8 files changed

+204
-4
lines changed

8 files changed

+204
-4
lines changed

hive/README.md

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
# Hive实验手册
2+
3+
### 0. 下载本实验所需要的Git工程
4+
5+
```
6+
cd /home/bigdata
7+
git clone https://github.com/bigdataguide/hadooptraining.git
8+
cd hadooptraining/hive
9+
```
10+
hive目录下包含三个子目录:
11+
* conf: hive的配置
12+
* command: 启动Hive服务的的命令和一些实验用的SQL语句
13+
* data中包含的是外部的一些数据,可以直接导入到Hive表中
14+
15+
**注意:本实验使用的用户主目录是bigdata, 你需要将目录替换为你自己的主目录,下同。**
16+
17+
18+
### 1. 下载安装Hadoop
19+
20+
参考之前的课程内容,下面假定我们的Hadoop目录安装在/home/bigdata/hadoop-2.7.3中,如果你不是安装在这个目录需要替换为你自己的目录
21+
22+
### 2. 配置Mysql
23+
```
24+
#Ubuntu
25+
sudo apt-get install mysql
26+
#CentOS
27+
sudo yum install mysql
28+
#启动Mysql
29+
sudo service mysqld start
30+
```
31+
32+
### 3. 安装Hive
33+
34+
#### 3.1 下载Hive二进制包
35+
```
36+
wget http://apache.mirrors.pair.com/hive/hive-2.1.1/apache-hive-2.1.1-bin.tar.gz /tmp/
37+
#解压hive到工作目录
38+
tar -zxvf /tmp/apache-hive-2.1.1-bin.tar.gz -C /home/bigdata/
39+
cd /home/bigdata/apache-hive-2.1.1-bin/
40+
```
41+
42+
#### 3.2 配置Hive:拷贝Git工程中的配置到Hive目录
43+
```
44+
cp /home/bigdata/hadooptraining/hive/conf/hive-env.sh /home/bigdata/apache-hive-2.1.1-bin/conf/
45+
cp /home/bigdata/hadooptraining/hive/conf/hive-site.xml /home/bigdata/apache-hive-2.1.1-bin/conf/
46+
```
47+
**配置文件的说明见附1,根据实际情况选择自己的主目录**
48+
49+
#### 3.3 启动Hive组件
50+
```
51+
export HADOOP_HOME=/home/bigdata/hadoop-2.7.3
52+
export PATH=/home/bigdata/apache-hive-2.1.1-bin:$PATH
53+
#启动MetaStore Server
54+
nohup hive --service metastore >> /home/bigdata/apache-hive-2.1.0-bin/logs/metastore.log 2>&1 &
55+
#启动HiveServer2
56+
nohup hive --service hiveserver2 >> /home/bigdata/apache-hive-2.1.0-bin/logs/hive.log 2>&1 &
57+
```
58+
### 4. 启动Hive
59+
#### 4.1 启动Hive CLI
60+
```
61+
hive
62+
```
63+
#### 4.2 启动Beeline CLI
64+
```
65+
beeline -n bigdata -pbigdata -u "jdbc:hive2://localhost:10000/default;auth=noSasl"
66+
#或者
67+
beeline
68+
beeline> !connect jdbc:hive2://localhost:10000/default bigdata bigdata
69+
```
70+
71+
### 附 配置文件说明
72+
在hive-env.sh中,我们配置了HADOOP_HOME目录,你需要将主目录替换为你自己的主目录
73+
HADOOP_HOME=/home/bigdata/hadoop-2.7.3
74+
75+
在hive-site.xml中,我们配置了:
76+
1)使用mysql存储元数据
77+
```xml
78+
<property>
79+
<name>javax.jdo.option.ConnectionURL</name>
80+
<value>jdbc:mysql://localhost/metastore?createDatabaseIfNotExist=true</value>
81+
</property>
82+
<property>
83+
<name>javax.jdo.option.ConnectionDriverName</name>
84+
<value>com.mysql.jdbc.Driver</value>
85+
</property>
86+
<property>
87+
<name>javax.jdo.option.ConnectionUserName</name>
88+
<value>root</value>
89+
</property>
90+
<property>
91+
<name>javax.jdo.option.ConnectionPassword</name>
92+
<value>root</value>
93+
</property>
94+
```
95+
2)hive在HDFS上的存储路径
96+
```xml
97+
<property>
98+
<name>hive.metastore.warehouse.dir</name>
99+
<value>/warehouse</value>
100+
</property>
101+
<property>
102+
<name>fs.defaultFS</name>
103+
<value>hdfs://bigdata:9000</value>
104+
</property>
105+
```
106+
3)metastore的端口
107+
```xml
108+
<property>
109+
        <name>hive.metastore.uris</name>
110+
        <value>thrift://bigdata:9083</value>
111+
</property>
112+
```
113+
4)HiveServer2的端口
114+
```xml
115+
<property>
116+
    <name>hive.server2.thrift.port</name>
117+
    <value>10000</value>
118+
</property>
119+
<property>
120+
<name>beeline.hs2.connection.user</name>
121+
<value>bigdata</value>
122+
</property>
123+
<property>
124+
<name>beeline.hs2.connection.password</name>
125+
<value>bigdata</value>
126+
</property>
127+
```
128+
5) 此外我们还配置自动创建Meta Store的数据库和表
129+
```xml
130+
<property>
131+
<name>datanucleus.autoCreateSchema</name>
132+
<value>true</value>
133+
</property>
134+
<property>
135+
<name>datanucleus.autoStartMechanism</name>
136+
<value>SchemaTable</value>
137+
</property>
138+
<property>
139+
<name>datanucleus.schema.autoCreateTables</name>
140+
<value>true</value>
141+
</property>
142+
```

hive/command/employees.sql

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
CREATE TABLE IF NOT EXISTS employees (
2+
name STRING,
3+
salary FLOAT,
4+
subordinates ARRAY<STRING>,
5+
decutions MAP<STRING, FLOAT>,
6+
address STRUCT<street:STRING, city:STRING, state:STRING, zip:INT>
7+
)
8+
ROW FORMAT DELIMITED
9+
FIELDS TERMINATED BY '\001'
10+
COLLECTION ITEMS TERMINATED BY '\002'
11+
MAP KEYS TERMINATED BY '\003'
12+
LINES TERMINATED BY '\n'
13+
STORED AS TEXTFILE;
14+
15+
-- LOAD DATA LOCAL INPATH '/home/bigdata/hadooop/training/hive/data/employees.txt' OVERWRITE INTO TABLE employees;

hive/command/employees_part.sql

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
CREATE TABLE IF NOT EXISTS employees_part (
2+
name STRING,
3+
salary FLOAT,
4+
subordinates ARRAY<STRING>,
5+
decutions MAP<STRING, FLOAT>,
6+
address STRUCT<street:STRING, city:STRING, state:STRING, zip:INT>
7+
)
8+
PARTITIONED BY (state STRING)
9+
ROW FORMAT DELIMITED
10+
FIELDS TERMINATED BY '\001'
11+
COLLECTION ITEMS TERMINATED BY '\002'
12+
MAP KEYS TERMINATED BY '\003'
13+
LINES TERMINATED BY '\n'
14+
STORED AS TEXTFILE;
15+
16+
-- LOAD DATA LOCAL INPATH '/home/bigdata/hadooptraining/hive/data/employees.txt'
17+
-- OVERWRITE INTO TABLE employees_part PARTITION(state='IL');
18+
19+
--INSERT INTO TABLE employees_part PARTITION(state = 'IL')
20+
--SELECT * FROM employees where address.state='IL';
21+
22+
-- FROM employees e
23+
-- INSERT OVERWRITE TABLE employees_part PARTITION(state = 'IL') SELECT e.* where e.address.state='IL'
24+
-- INSERT OVERWRITE TABLE employees_part PARTITION(state = 'CA') SELECT e.* where e.address.state='CA'
25+
-- INSERT OVERWRITE TABLE employees_part PARTITION(state = 'NY') SELECT e.* where e.address.state='NY';
26+
27+
FROM employees e
28+
INSERT OVERWRITE TABLE employees_part PARTITION(state) SELECT e.*,e.address.state
29+
-- INSERT OVERWRITE TABLE employees_part PARTITION(state = 'CA') SELECT * where e.address.state='CA'
30+
-- INSERT OVERWRITE TABLE employees_part PARTITION(state = 'NY') SELECT * where e.address.state='NY';

hive/command/skewed.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
CREATE TABLE T1(key STRING, val STRING)
2+
SKEWED BY (key, val) ON ((2, 12), (8, 18)) STORED AS TEXTFILE;

hive/command/weblog.sql

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
CREATE TABLE IF NOT EXISTS weblog (
2+
user_id INT,
3+
url STRING,
4+
source_ip STRING
5+
) PARTITIONED BY (dt STRING)
6+
CLUSTERED BY (user_id) INTO 96 BUCKETS;
7+

hive/conf/hive-env.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ export HADOOP_HEAPSIZE=512
4646

4747
# Set HADOOP_HOME to point to a specific hadoop install directory
4848
# HADOOP_HOME=${bin}/../../hadoop
49+
HADOOP_HOME=/home/bigdata/hadoop-2.7.3
4950

5051
# Hive Configuration Directory can be controlled by:
5152
# export HIVE_CONF_DIR=

hive/conf/hive-site.xml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,15 @@
1818
<configuration>
1919
<property>
2020
        <name>hive.metastore.uris</name>
21-
        <value>thrift://bigdata:9083</value>
21+
        <value>thrift://localhost:9083</value>
2222
</property>
2323
<property>
2424
    <name>hive.server2.thrift.port</name>
2525
    <value>10000</value>
2626
</property>
2727
<property>
2828
<name>javax.jdo.option.ConnectionURL</name>
29-
<value>jdbc:mysql://bigdata/metastore?createDatabaseIfNotExist=true</value>
29+
<value>jdbc:mysql://localhost/metastore?createDatabaseIfNotExist=true</value>
3030
</property>
3131
<property>
3232
<name>javax.jdo.option.ConnectionDriverName</name>
@@ -46,7 +46,7 @@
4646
</property>
4747
<property>
4848
<name>fs.defaultFS</name>
49-
<value>hdfs://bigdata:9000</value>
49+
<value>hdfs://localhost:9000</value>
5050
</property>
5151
<property>
5252
<name>datanucleus.autoCreateSchema</name>
@@ -56,7 +56,6 @@
5656
<name>datanucleus.autoStartMechanism</name>
5757
<value>SchemaTable</value>
5858
</property>
59-
datanucleus.schema.autoCreateTables
6059
<property>
6160
<name>datanucleus.schema.autoCreateTables</name>
6261
<value>true</value>

hive/data/employees.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
John Doe100000.0Mary SmithTodd JonesFederal Taxes.2State Taxes.05Insurance.11 Michigan Ave.ChicagoIL60600
2+
Mary Smith80000.0Bill KingFederal Taxes.2State Taxes.05Insurance.1100 Ontario St.ChicagoIL60601
3+
Todd Jones70000.0Federal Taxes.15State Taxes.03Insurance.1200 Chicago Ave.Oak ParkIL60700
4+
Bill King60000.0Federal Taxes.15State Taxes.03Insurance.1300 Obscure Dr.ObscuriaIL60100

0 commit comments

Comments
 (0)