Thursday, July 22, 2010

Hadoop install on AMIs centos!



cd /etc/yum.repos.d/
[root@ip-10-250-57-221 yum.repos.d]# ll
total 4
-rw-r--r-- 1 root root 2245 Oct 1 2009 CentOS-Base.repo
[root@ip-10-250-57-221 yum.repos.d]# wget http://archive.cloudera.com/redhat/cdh/cloudera-cdh3.repo
--2010-07-12 08:47:56-- http://archive.cloudera.com/redhat/cdh/cloudera-cdh3.repo
Resolving archive.cloudera.com... 184.73.170.21
Connecting to archive.cloudera.com|184.73.170.21|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 211 [text/plain]
Saving to: `cloudera-cdh3.repo'

100%[======================================>] 211 --.-K/s in 0s

2010-07-12 08:47:56 (40.2 MB/s) - `cloudera-cdh3.repo' saved [211/211]

[root@ip-10-250-57-221 yum.repos.d]# ls
CentOS-Base.repo cloudera-cdh3.repo
[root@ip-10-250-57-221 yum.repos.d]#


yum install java-1.6.0-openjdk hadoop-0.20
Loaded plugins: fastestmirror
Determining fastest mirrors
* addons: mirror.fdcservers.net
* base: centos.mirror.nac.net
* extras: mirror.vcu.edu
* updates: mirror.rackspace.com
addons | 951 B 00:00
addons/primary | 202 B 00:00
base | 2.1 kB 00:00
base/primary_db | 1.6 MB 00:00
cloudera-cdh3 | 951 B 00:00
cloudera-cdh3/primary | 18 kB 00:00
cloudera-cdh3 62/62
extras | 2.1 kB 00:00
extras/primary_db | 185 kB 00:06
updates | 1.9 kB 00:00
updates/primary_db | 296 kB 00:00
Setting up Install Process
Resolving Dependencies
--> Running transaction check
---> Package hadoop-0.20.noarch 0:0.20.2+320-1 set to be updated
---> Package java-1.6.0-openjdk.i386 1:1.6.0.0-1.11.b16.el5 set to be updated
--> Processing Dependency: jpackage-utils >= 1.7.3-1jpp.2 for package: java-1.6.0-openjdk
--> Processing Dependency: libgif.so.4 for package: java-1.6.0-openjdk
--> Processing Dependency: tzdata-java for package: java-1.6.0-openjdk
--> Running transaction check
---> Package giflib.i386 0:4.1.3-7.1.el5_3.1 set to be updated
---> Package jpackage-utils.noarch 0:1.7.3-1jpp.2.el5 set to be updated
---> Package tzdata-java.i386 0:2010i-1.el5 set to be updated
--> Finished Dependency Resolution

Dependencies Resolved

================================================================================
Package Arch Version Repository Size
================================================================================
Installing:
hadoop-0.20 noarch 0.20.2+320-1 cloudera-cdh3 21 M
java-1.6.0-openjdk i386 1:1.6.0.0-1.11.b16.el5 updates 37 M
Installing for dependencies:
giflib i386 4.1.3-7.1.el5_3.1 base 39 k
jpackage-utils noarch 1.7.3-1jpp.2.el5 base 61 k
tzdata-java i386 2010i-1.el5 updates 176 k

Transaction Summary
================================================================================
Install 5 Package(s)
Update 0 Package(s)
Remove 0 Package(s)

Total download size: 58 M
Is this ok [y/N]: y
Downloading Packages:
(1/5): giflib-4.1.3-7.1.el5_3.1.i386.rpm | 39 kB 00:00
(2/5): jpackage-utils-1.7.3-1jpp.2.el5.noarch.rpm | 61 kB 00:00
(3/5): tzdata-java-2010i-1.el5.i386.rpm | 176 kB 00:00
(4/5): hadoop-0.20-0.20.2+320-1.noarch.rpm | 21 MB 00:01
(5/5): java-1.6.0-openjdk-1.6.0.0-1.11.b16.el5.i386.rpm | 37 MB 00:03
--------------------------------------------------------------------------------
Total 12 MB/s | 58 MB 00:04
Running rpm_check_debug
Running Transaction Test
Finished Transaction Test
Transaction Test Succeeded
Running Transaction
Installing : giflib 1/5
Installing : jpackage-utils 2/5
Installing : tzdata-java 3/5
Installing : hadoop-0.20 4/5
Installing : java-1.6.0-openjdk 5/5

Installed:
hadoop-0.20.noarch 0:0.20.2+320-1
java-1.6.0-openjdk.i386 1:1.6.0.0-1.11.b16.el5

Dependency Installed:
giflib.i386 0:4.1.3-7.1.el5_3.1 jpackage-utils.noarch 0:1.7.3-1jpp.2.el5
tzdata-java.i386 0:2010i-1.el5

Complete!
[root@ip-10-250-57-221 yum.repos.d]#

[root@ip-10-250-57-221 conf]# vi hadoop-env.sh
# The java implementation to use. Required.
export JAVA_HOME=/usr/java/jdk1.6.0_14/ (remove # and set JAVA environment)


[root@ip-10-250-57-221 java]# vi ~/.bash_profile
# .bash_profile

# Get the aliases and functions
if [ -f ~/.bashrc ]; then
. ~/.bashrc
fi

# User specific environment and startup programs

PATH=$PATH:$HOME/bin:$JAVA_HOME/bin:
JAVA_HOME=/usr/java/jdk1.6.0_14/
export PATH
export JAVA_HOME
unset USERNAME

[root@ip-10-250-57-221 ~]# . .bash_profile
[root@ip-10-250-57-221 ~]# . .bash_profile
[root@ip-10-250-57-221 ~]# . .bash_profile
[root@ip-10-250-57-221 ~]# . .bash_profile
[root@ip-10-250-57-221 ~]# java -version
java version "1.6.0_14"
Java(TM) SE Runtime Environment (build 1.6.0_14-b08)
Java HotSpot(TM) Client VM (build 14.0-b16, mixed mode)
[root@ip-10-250-57-221 ~]# echo $JAVA_HOME
/usr/java/jdk1.6.0_14/
[root@ip-10-250-57-221 ~]# echo $PATH
/usr/kerberos/sbin:/usr/kerberos/bin:/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/home/ec2/bin:/home/ec2/bin:/root/bin:/home/ec2/bin:/root/bin:/usr/java/default/bin::/home/ec2/bin:/root/bin:/usr/java/jdk1.6.0_14//bin::/home/ec2/bin:/root/bin:/usr/java/jdk1.6.0_14//bin::/home/ec2/bin:/root/bin:/usr/java/jdk1.6.0_14//bin:


[root@ip-10-250-57-221 java]# vi ~/.bash_profile
# .bash_profile

# Get the aliases and functions
if [ -f ~/.bashrc ]; then
. ~/.bashrc
fi

# User specific environment and startup programs

PATH=$PATH:$HOME/bin:$JAVA_HOME/bin:
JAVA_HOME=/usr/java/jdk1.6.0_14/
HADOOP_HOME=/usr/lib/hadoop-0.20
export PATH
export JAVA_HOME
export HADOOP_HOME
unset USERNAME

echo $HADOOP_HOME
/usr/lib/hadoop-0.20
[root@ip-10-250-57-221 hadoop-0.20]# echo $PATH
/usr/kerberos/sbin:/usr/kerberos/bin:/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/home/ec2/bin:/home/ec2/bin:/root/bin:/home/ec2/bin:/root/bin:/usr/java/default/bin::/home/ec2/bin:/root/bin:/usr/java/jdk1.6.0_14//bin::/home/ec2/bin:/root/bin:/usr/java/jdk1.6.0_14//bin::/home/ec2/bin:/root/bin:/usr/java/jdk1.6.0_14//bin::/home/ec2/bin:/root/bin:/usr/java/jdk1.6.0_14//bin::/home/ec2/bin:/root/bin:/usr/java/jdk1.6.0_14//bin::/home/ec2/bin:/root/bin:/usr/java/jdk1.6.0_14//bin::/home/ec2/bin:/root/bin:/usr/java/jdk1.6.0_14//bin::/home/ec2/bin:/root/bin:/usr/java/jdk1.6.0_14//bin::/home/ec2/bin:/root/bin:/usr/java/jdk1.6.0_14//bin::/home/ec2/bin:/root/bin:/usr/java/jdk1.6.0_14//bin::/home/ec2/bin:/root/bin:/usr/java/jdk1.6.0_14//bin::/home/ec2/bin:/root/bin:/usr/java/jdk1.6.0_14//bin::/home/ec2/bin:/root/bin:/usr/java/jdk1.6.0_14//bin::/home/ec2/bin:/root/bin:/usr/java/jdk1.6.0_14//bin::/home/ec2/bin:/root/bin:/usr/java/jdk1.6.0_14//bin::/home/ec2/bin:/root/bin:/usr/java/jdk1.6.0_14//bin::/home/ec2/bin:/root/bin:/usr/java/jdk1.6.0_14//bin:


[root@ip-10-250-57-221 conf]# pwd
--> #/usr/lib/hadoop-0.20/conf
--> #conf/core-site.xml:
--> #
--> #
--> # fs.default.name
--> # hdfs://:9000
--> #
--> #
--> #conf/hdfs-site.xml:
--> #
--> #
--> # dfs.replication
--> # 1
--> #
--> #
--> #conf/mapred-site.xml:
--> #
--> #
--> # mapred.job.tracker
--> # :9001
--> #
--> #


Setup passphraseless ssh
Now check that you can ssh to the localhost without a passphrase:
# ssh localhost
If you cannot ssh to localhost without a passphrase, execute the following commands:
# ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
# cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys

Format a new distributed-filesystem:
# bin/hadoop namenode -format
Start the hadoop daemons:
# bin/start-all.sh

open port 50070 and 50030

Browse the web interface for the NameNode and the JobTracker; by default they are
available at:
NameNode - http://:50070/
JobTracker - http://:50030/




No comments: