update 1.2 changes

pull/63/head
bennojoy 11 years ago
parent cb8c309a9c
commit ef6fc4213c
  1. 30
      hadoop/README.md
  2. 49
      hadoop/group_vars/all
  3. 47
      hadoop/hadoop_vars/hadoop
  4. 2
      hadoop/roles/common/tasks/common.yml
  5. 2
      hadoop/roles/common/tasks/main.yml
  6. 2
      hadoop/roles/common/templates/hadoop_conf/core-site.xml.j2
  7. 14
      hadoop/roles/common/templates/hadoop_conf/hdfs-site.xml.j2
  8. 6
      hadoop/roles/common/templates/hadoop_conf/mapred-site.xml.j2
  9. 2
      hadoop/roles/common/templates/hadoop_ha_conf/core-site.xml.j2
  10. 32
      hadoop/roles/common/templates/hadoop_ha_conf/hdfs-site.xml.j2
  11. 28
      hadoop/roles/common/templates/hadoop_ha_conf/mapred-site.xml.j2
  12. 34
      hadoop/roles/common/templates/iptables.j2
  13. 52
      hadoop/roles/hadoop_primary/tasks/hadoop_master.yml
  14. 38
      hadoop/roles/hadoop_primary/tasks/hadoop_master_no_ha.yml
  15. 6
      hadoop/roles/hadoop_primary/tasks/main.yml
  16. 73
      hadoop/roles/hadoop_secondary/tasks/hadoop_secondary.yml
  17. 62
      hadoop/roles/hadoop_secondary/tasks/main.yml
  18. 2
      hadoop/roles/hadoop_slaves/tasks/main.yml
  19. 14
      hadoop/roles/hadoop_slaves/tasks/slaves.yml
  20. 20
      hadoop/roles/qjournal_servers/tasks/main.yml
  21. 4
      hadoop/roles/zookeeper_servers/templates/zoo.cfg.j2
  22. 6
      hadoop/roles/zookeeper_servers/vars/main.yml
  23. 22
      hadoop/site.yml

@ -68,11 +68,11 @@ The Playbooks have been tested using Ansible v1.2, and Centos 6.x (64 bit)
Modify group_vars/all to choose the interface for hadoop communication.
Optionally you change the hadoop specific parameter like port's or directories by editing hadoop_vars/hadoop file.
Optionally you change the hadoop specific parameter like port's or directories by editing group_vars/all file.
Before launching the deployment playbook make sure the inventory file ( hosts ) have be setup properly, Here's a sample:
[hadoop_master_primary]15yy
[hadoop_master_primary]
zhadoop1
[hadoop_master_secondary]
@ -124,12 +124,22 @@ and you should get a result where the standby has been promoted to the active st
### Running a mapreduce job on the cluster.
To run a mapreduce job on the cluster a sample playbook has been written, this playbook runs a job on the cluster which counts the occurance of the word 'hello' on an inputfile. A sample inputfile file has been created in the playbooks/inputfile file, modify the file to match your testing.
To deploy the mapreduce job run the following command.( Below -e server=<any of your hadoop master server>
To deploy the mapreduce job run the following script from any of the hadoop master nodes as user 'hdfs'. The job would count the number of occurance of the word 'hello' in the given inputfile. Eg: su - hdfs -c "/tmp/job.sh"
ansible-playbook -i hosts playbooks/job.yml -e server=zhadoop1
#!/bin/bash
cat > /tmp/inputfile << EOF
hello
sf
sdf
hello
sdf
sdf
EOF
hadoop fs -put /tmp/inputfile /inputfile
hadoop jar /usr/lib/hadoop-0.20-mapreduce/hadoop-examples.jar grep /inputfile /outputfile 'hello'
hadoop fs -get /outputfile /tmp/outputfile/
to verify the result read the file on your ansible server located at /tmp/zhadoop1/tmp/outputfile/part-00000, which should give you the count.
to verify the result read the file on server located at /tmp/outputfile/part-00000, which should give you the count.
###Scale the Cluster
@ -160,8 +170,12 @@ To deploy this cluster fill in the inventory file as follows:
hadoop2
hadoop3
and issue the following command:
and edit the group_vars/all file to disable HA:
ha_enabled: False
and run the following command:
ansible-playbook -i hosts site.yml -e ha_disabled=true --tags=no_ha
ansible-playbook -i hosts site.yml
The validity of the cluster can be checked by running the same mapreduce job that has documented above for an HA Hadoop Cluster

@ -1 +1,50 @@
iface: eth1
ha_enabled: False
hadoop:
#Variables for <core-site_xml> - common
fs_default_FS_port: 8020
nameservice_id: mycluster3
#Variables for <hdfs-site_xml>
dfs_permissions_superusergroup: hdfs
dfs_namenode_name_dir:
- /namedir1/
- /namedir2/
dfs_replication: 3
dfs_namenode_handler_count: 50
dfs_blocksize: 67108864
dfs_datanode_data_dir:
- /datadir1/
- /datadir2/
dfs_datanode_address_port: 50010
dfs_datanode_http_address_port: 50075
dfs_datanode_ipc_address_port: 50020
dfs_namenode_http_address_port: 50070
dfs_ha_zkfc_port: 8019
qjournal_port: 8485
qjournal_http_port: 8480
dfs_journalnode_edits_dir: /journaldir/
zookeeper_clientport: 2181
zookeeper_leader_port: 2888
zookeeper_election_port: 3888
#Variables for <mapred-site_xml> - common
mapred_job_tracker_ha_servicename: myjt3
mapred_job_tracker_http_address_port: 50030
mapred_task_tracker_http_address_port: 50060
mapred_job_tracker_port: 8021
mapred_ha_jobtracker_rpc-address_port: 8023
mapred_ha_zkfc_port: 8018
mapred_job_tracker_persist_jobstatus_dir: /jobdir/
mapred_local_dir:
- /mapred1/
- /mapred2/

@ -1,47 +0,0 @@
hadoop:
#Variables for <core-site.xml> - common
fs.default.FS.port: 8020
nameservice.id: mycluster2
#Variables for <hdfs-site.xml>
dfs.permissions.superusergroup: hdfs
dfs_namenode_name_dir:
- /namedir1/
- /namedir2/
dfs.replication: 3
dfs.namenode.handler.count: 50
dfs.blocksize: 67108864
dfs_datanode_data_dir:
- /datadir1/
- /datadir2/
dfs.datanode.address.port: 50010
dfs.datanode.http.address.port: 50075
dfs.datanode.ipc.address.port: 50020
dfs.namenode.http.address.port: 50070
dfs.ha.zkfc.port: 8019
qjournal.port: 8485
qjournal.http.port: 8480
dfs_journalnode_edits_dir: /journaldir/
zookeeper.clientport: 2181
zookeeper.leader_port: 2888
zookeeper.election_port: 3888
#Variables for <mapred-site.xml> - common
mapred.job.tracker.ha.servicename: myjt2
mapred.job.tracker.http.address.port: 50030
mapred.task.tracker.http.address.port: 50060
mapred.job.tracker.port: 8021
mapred.ha.jobtracker.rpc-address.port: 8023
mapred.ha.zkfc.port: 8018
mapred_job_tracker_persist_jobstatus_dir: /jobdir/
mapred_local_dir:
- /mapred1/
- /mapred2/

@ -9,11 +9,9 @@
- name: create a directory for java
file: state=directory path=/usr/java/
tags: link
- name: create a link for java
file: src=/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre state=link path=/usr/java/default
tags: link
- name: Create the hosts file for all machines
template: src=etc/hosts.j2 dest=/etc/hosts

@ -1,5 +1,5 @@
---
# The playbook for common tasks
- include: common.yml tags=slaves,no_ha
- include: common.yml tags=slaves

@ -20,6 +20,6 @@
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://{{ hostvars[groups['hadoop_masters'][0]]['ansible_hostname'] + ':' ~ hadoop['fs.default.FS.port'] }}/</value>
<value>hdfs://{{ hostvars[groups['hadoop_masters'][0]]['ansible_hostname'] + ':' ~ hadoop['fs_default_FS_port'] }}/</value>
</property>
</configuration>

@ -20,31 +20,31 @@
<configuration>
<property>
<name>dfs.blocksize</name>
<value>{{ hadoop['dfs.blocksize'] }}</value>
<value>{{ hadoop['dfs_blocksize'] }}</value>
</property>
<property>
<name>dfs.permissions.superusergroup</name>
<value>{{ hadoop['dfs.permissions.superusergroup'] }}</value>
<value>{{ hadoop['dfs_permissions_superusergroup'] }}</value>
</property>
<property>
<name>dfs.namenode.http.address</name>
<value>0.0.0.0:{{ hadoop['dfs.namenode.http.address.port'] }}</value>
<value>0.0.0.0:{{ hadoop['dfs_namenode_http_address_port'] }}</value>
</property>
<property>
<name>dfs.datanode.address</name>
<value>0.0.0.0:{{ hadoop['dfs.datanode.address.port'] }}</value>
<value>0.0.0.0:{{ hadoop['dfs_datanode_address_port'] }}</value>
</property>
<property>
<name>dfs.datanode.http.address</name>
<value>0.0.0.0:{{ hadoop['dfs.datanode.http.address.port'] }}</value>
<value>0.0.0.0:{{ hadoop['dfs_datanode_http_address_port'] }}</value>
</property>
<property>
<name>dfs.datanode.ipc.address</name>
<value>0.0.0.0:{{ hadoop['dfs.datanode.ipc.address.port'] }}</value>
<value>0.0.0.0:{{ hadoop['dfs_datanode_ipc_address_port'] }}</value>
</property>
<property>
<name>dfs.replication</name>
<value>{{ hadoop['dfs.replication'] }}</value>
<value>{{ hadoop['dfs_replication'] }}</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>

@ -2,7 +2,7 @@
<property>
<name>mapred.job.tracker</name>
<value>{{ hostvars[groups['hadoop_masters'][0]]['ansible_hostname'] }}:{{ hadoop['mapred.job.tracker.port'] }}</value>
<value>{{ hostvars[groups['hadoop_masters'][0]]['ansible_hostname'] }}:{{ hadoop['mapred_job_tracker_port'] }}</value>
</property>
<property>
@ -12,11 +12,11 @@
<property>
<name>mapred.task.tracker.http.address</name>
<value>0.0.0.0:{{ hadoop['mapred.task.tracker.http.address.port'] }}</value>
<value>0.0.0.0:{{ hadoop['mapred_task_tracker_http_address_port'] }}</value>
</property>
<property>
<name>mapred.job.tracker.http.address</name>
<value>0.0.0.0:{{ hadoop['mapred.job.tracker.http.address.port'] }}</value>
<value>0.0.0.0:{{ hadoop['mapred_job_tracker_http_address_port'] }}</value>
</property>
</configuration>

@ -20,6 +20,6 @@
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://{{ hadoop['nameservice.id'] }}/</value>
<value>hdfs://{{ hadoop['nameservice_id'] }}/</value>
</property>
</configuration>

@ -19,19 +19,19 @@
<configuration>
<property>
<name>dfs.nameservices</name>
<value>{{ hadoop['nameservice.id'] }}</value>
<value>{{ hadoop['nameservice_id'] }}</value>
</property>
<property>
<name>dfs.ha.namenodes.{{ hadoop['nameservice.id'] }}</name>
<name>dfs.ha.namenodes.{{ hadoop['nameservice_id'] }}</name>
<value>{{ groups.hadoop_masters | join(',') }}</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>{{ hadoop['dfs.blocksize'] }}</value>
<value>{{ hadoop['dfs_blocksize'] }}</value>
</property>
<property>
<name>dfs.permissions.superusergroup</name>
<value>{{ hadoop['dfs.permissions.superusergroup'] }}</value>
<value>{{ hadoop['dfs_permissions_superusergroup'] }}</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
@ -39,31 +39,31 @@
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>{{ groups.zookeeper_servers | join(':' ~ hadoop['zookeeper.clientport'] + ',') }}:{{ hadoop['zookeeper.clientport'] }}</value>
<value>{{ groups.zookeeper_servers | join(':' ~ hadoop['zookeeper_clientport'] + ',') }}:{{ hadoop['zookeeper_clientport'] }}</value>
</property>
{% for host in groups['hadoop_masters'] %}
<property>
<name>dfs.namenode.rpc-address.{{ hadoop['nameservice.id'] }}.{{ host }}</name>
<value>{{ host }}:{{ hadoop['fs.default.FS.port'] }}</value>
<name>dfs.namenode.rpc-address.{{ hadoop['nameservice_id'] }}.{{ host }}</name>
<value>{{ host }}:{{ hadoop['fs_default_FS_port'] }}</value>
</property>
{% endfor %}
{% for host in groups['hadoop_masters'] %}
<property>
<name>dfs.namenode.http-address.{{ hadoop['nameservice.id'] }}.{{ host }}</name>
<value>{{ host }}:{{ hadoop['dfs.namenode.http.address.port'] }}</value>
<name>dfs.namenode.http-address.{{ hadoop['nameservice_id'] }}.{{ host }}</name>
<value>{{ host }}:{{ hadoop['dfs_namenode_http_address_port'] }}</value>
</property>
{% endfor %}
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://{{ groups.qjournal_servers | join(':' ~ hadoop['qjournal.port'] + ';') }}:{{ hadoop['qjournal.port'] }}/{{ hadoop['nameservice.id'] }}</value>
<value>qjournal://{{ groups.qjournal_servers | join(':' ~ hadoop['qjournal_port'] + ';') }}:{{ hadoop['qjournal_port'] }}/{{ hadoop['nameservice_id'] }}</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>{{ hadoop['dfs_journalnode_edits_dir'] }}</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.{{ hadoop['nameservice.id'] }}</name>
<name>dfs.client.failover.proxy.provider.{{ hadoop['nameservice_id'] }}</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
@ -73,24 +73,24 @@
<property>
<name>dfs.ha.zkfc.port</name>
<value>{{ hadoop['dfs.ha.zkfc.port'] }}</value>
<value>{{ hadoop['dfs_ha_zkfc_port'] }}</value>
</property>
<property>
<name>dfs.datanode.address</name>
<value>0.0.0.0:{{ hadoop['dfs.datanode.address.port'] }}</value>
<value>0.0.0.0:{{ hadoop['dfs_datanode_address_port'] }}</value>
</property>
<property>
<name>dfs.datanode.http.address</name>
<value>0.0.0.0:{{ hadoop['dfs.datanode.http.address.port'] }}</value>
<value>0.0.0.0:{{ hadoop['dfs_datanode_http_address_port'] }}</value>
</property>
<property>
<name>dfs.datanode.ipc.address</name>
<value>0.0.0.0:{{ hadoop['dfs.datanode.ipc.address.port'] }}</value>
<value>0.0.0.0:{{ hadoop['dfs_datanode_ipc_address_port'] }}</value>
</property>
<property>
<name>dfs.replication</name>
<value>{{ hadoop['dfs.replication'] }}</value>
<value>{{ hadoop['dfs_replication'] }}</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>

@ -2,11 +2,11 @@
<property>
<name>mapred.job.tracker</name>
<value>{{ hadoop['mapred.job.tracker.ha.servicename'] }}</value>
<value>{{ hadoop['mapred_job_tracker_ha_servicename'] }}</value>
</property>
<property>
<name>mapred.jobtrackers.{{ hadoop['mapred.job.tracker.ha.servicename'] }}</name>
<name>mapred.jobtrackers.{{ hadoop['mapred_job_tracker_ha_servicename'] }}</name>
<value>{{ groups['hadoop_masters'] | join(',') }}</value>
<description>Comma-separated list of JobTracker IDs.</description>
</property>
@ -18,7 +18,7 @@
<property>
<name>mapred.ha.zkfc.port</name>
<value>{{ hadoop['mapred.ha.zkfc.port'] }}</value>
<value>{{ hadoop['mapred_ha_zkfc_port'] }}</value>
</property>
<property>
@ -28,31 +28,31 @@
<property>
<name>ha.zookeeper.quorum</name>
<value>{{ groups.zookeeper_servers | join(':' ~ hadoop['zookeeper.clientport'] + ',') }}:{{ hadoop['zookeeper.clientport'] }}</value>
<value>{{ groups.zookeeper_servers | join(':' ~ hadoop['zookeeper_clientport'] + ',') }}:{{ hadoop['zookeeper_clientport'] }}</value>
</property>
{% for host in groups['hadoop_masters'] %}
<property>
<name>mapred.jobtracker.rpc-address.{{ hadoop['mapred.job.tracker.ha.servicename'] }}.{{ host }}</name>
<value>{{ host }}:{{ hadoop['mapred.job.tracker.port'] }}</value>
<name>mapred.jobtracker.rpc-address.{{ hadoop['mapred_job_tracker_ha_servicename'] }}.{{ host }}</name>
<value>{{ host }}:{{ hadoop['mapred_job_tracker_port'] }}</value>
</property>
{% endfor %}
{% for host in groups['hadoop_masters'] %}
<property>
<name>mapred.job.tracker.http.address.{{ hadoop['mapred.job.tracker.ha.servicename'] }}.{{ host }}</name>
<value>0.0.0.0:{{ hadoop['mapred.job.tracker.http.address.port'] }}</value>
<name>mapred.job.tracker.http.address.{{ hadoop['mapred_job_tracker_ha_servicename'] }}.{{ host }}</name>
<value>0.0.0.0:{{ hadoop['mapred_job_tracker_http_address_port'] }}</value>
</property>
{% endfor %}
{% for host in groups['hadoop_masters'] %}
<property>
<name>mapred.ha.jobtracker.rpc-address.{{ hadoop['mapred.job.tracker.ha.servicename'] }}.{{ host }}</name>
<value>{{ host }}:{{ hadoop['mapred.ha.jobtracker.rpc-address.port'] }}</value>
<name>mapred.ha.jobtracker.rpc-address.{{ hadoop['mapred_job_tracker_ha_servicename'] }}.{{ host }}</name>
<value>{{ host }}:{{ hadoop['mapred_ha_jobtracker_rpc-address_port'] }}</value>
</property>
{% endfor %}
{% for host in groups['hadoop_masters'] %}
<property>
<name>mapred.ha.jobtracker.http-redirect-address.{{ hadoop['mapred.job.tracker.ha.servicename'] }}.{{ host }}</name>
<value>{{ host }}:{{ hadoop['mapred.job.tracker.http.address.port'] }}</value>
<name>mapred.ha.jobtracker.http-redirect-address.{{ hadoop['mapred_job_tracker_ha_servicename'] }}.{{ host }}</name>
<value>{{ host }}:{{ hadoop['mapred_job_tracker_http_address_port'] }}</value>
</property>
{% endfor %}
@ -77,7 +77,7 @@
</property>
<property>
<name>mapred.client.failover.proxy.provider.{{ hadoop['mapred.job.tracker.ha.servicename'] }}</name>
<name>mapred.client.failover.proxy.provider.{{ hadoop['mapred_job_tracker_ha_servicename'] }}</name>
<value>org.apache.hadoop.mapred.ConfiguredFailoverProxyProvider</value>
</property>
@ -114,7 +114,7 @@
<property>
<name>mapred.task.tracker.http.address</name>
<value>0.0.0.0:{{ hadoop['mapred.task.tracker.http.address.port'] }}</value>
<value>0.0.0.0:{{ hadoop['mapred_task_tracker_http_address_port'] }}</value>
</property>
</configuration>

@ -1,33 +1,33 @@
# Firewall configuration written by system-config-firewall
# Manual customization of this file is not recommended.
# Manual customization of this file is not recommended_
*filter
:INPUT ACCEPT [0:0]
:FORWARD ACCEPT [0:0]
:OUTPUT ACCEPT [0:0]
{% if 'hadoop_masters' in group_names %}
-A INPUT -p tcp --dport {{ hadoop['fs.default.FS.port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['dfs.namenode.http.address.port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['mapred.job.tracker.port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['mapred.job.tracker.http.address.port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['mapred.ha.jobtracker.rpc-address.port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['mapred.ha.zkfc.port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['dfs.ha.zkfc.port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['fs_default_FS_port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['dfs_namenode_http_address_port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['mapred_job_tracker_port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['mapred_job_tracker_http_address_port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['mapred_ha_jobtracker_rpc-address_port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['mapred_ha_zkfc_port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['dfs_ha_zkfc_port'] }} -j ACCEPT
{% endif %}
{% if 'hadoop_slaves' in group_names %}
-A INPUT -p tcp --dport {{ hadoop['dfs.datanode.address.port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['dfs.datanode.http.address.port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['dfs.datanode.ipc.address.port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['mapred.task.tracker.http.address.port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['dfs_datanode_address_port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['dfs_datanode_http_address_port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['dfs_datanode_ipc_address_port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['mapred_task_tracker_http_address_port'] }} -j ACCEPT
{% endif %}
{% if 'qjournal_servers' in group_names %}
-A INPUT -p tcp --dport {{ hadoop['qjournal.port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['qjournal.http.port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['qjournal_port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['qjournal_http_port'] }} -j ACCEPT
{% endif %}
{% if 'zookeeper_servers' in group_names %}
-A INPUT -p tcp --dport {{ hadoop['zookeeper.clientport'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['zookeeper.leader_port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['zookeeper.election_port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['zookeeper_clientport'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['zookeeper_leader_port'] }} -j ACCEPT
-A INPUT -p tcp --dport {{ hadoop['zookeeper_election_port'] }} -j ACCEPT
{% endif %}
-A INPUT -m state --state ESTABLISHED,RELATED -j ACCEPT
-A INPUT -p icmp -j ACCEPT

@ -2,23 +2,15 @@
# Playbook for Hadoop master servers
- name: Install the namenode and jobtracker packages
yum: name=${item} state=installed
with_items:
- hadoop-0.20-mapreduce-jobtracker
- hadoop-hdfs-namenode
when_set: $ha_disabled
- name: Install the namenode and jobtracker packages
yum: name=${item} state=installed
yum: name={{ item }} state=installed
with_items:
- hadoop-0.20-mapreduce-jobtrackerha
- hadoop-hdfs-namenode
- hadoop-hdfs-zkfc
- hadoop-0.20-mapreduce-zkfc
when_unset: $ha_disabled
- name: Copy the hadoop configuration files
template: src=roles/common/templates/hadoop_ha_conf/${item}.j2 dest=/etc/hadoop/conf/${item}
template: src=roles/common/templates/hadoop_ha_conf/{{ item }}.j2 dest=/etc/hadoop/conf/{{ item }}
with_items:
- core-site.xml
- hadoop-metrics.properties
@ -29,48 +21,18 @@
- slaves
- ssl-client.xml.example
- ssl-server.xml.example
when_unset: $ha_disabled
notify: restart hadoopha master services
- name: Copy the hadoop configuration files for no ha
template: src=roles/common/templates/hadoop_conf/${item}.j2 dest=/etc/hadoop/conf/${item}
with_items:
- core-site.xml
- hadoop-metrics.properties
- hadoop-metrics2.properties
- hdfs-site.xml
- log4j.properties
- mapred-site.xml
- slaves
- ssl-client.xml.example
- ssl-server.xml.example
when_set: $ha_disabled
notify: restart hadoop master services
- name: Create the data directory for the namenode metadata
file: path=${item} owner=hdfs group=hdfs state=directory
with_items: ${hadoop.dfs_namenode_name_dir}
file: path={{ item }} owner=hdfs group=hdfs state=directory
with_items: hadoop.dfs_namenode_name_dir
- name: Create the data directory for the jobtracker ha
file: path=${item} owner=mapred group=mapred state=directory
with_items: ${hadoop.mapred_job_tracker_persist_jobstatus_dir}
when_unset: $ha_disabled
file: path={{ item }} owner=mapred group=mapred state=directory
with_items: hadoop.mapred_job_tracker_persist_jobstatus_dir
- name: Format the namenode
shell: creates=/usr/lib/hadoop/namenode.formatted su - hdfs -c "hadoop namenode -format"; touch /usr/lib/hadoop/namenode.formatted
- name: start hadoop namenode services
service: name=${item} state=started
with_items:
- hadoop-hdfs-namenode
- name: Give permissions for mapred users
shell: creates=/usr/lib/hadoop/fs.initialized su - hdfs -c "hadoop fs -chown hdfs:hadoop /"; su - hdfs -c "hadoop fs -chmod 0774 /"; touch /usr/lib/hadoop/namenode.initialized
when_set: $ha_disabled
- name: start hadoop jobtracker services
service: name=${item} state=started
with_items:
- hadoop-0.20-mapreduce-jobtracker
when_set: $ha_disabled
service: name=hadoop-hdfs-namenode state=started

@ -0,0 +1,38 @@
---
# Playbook for Hadoop master servers
- name: Install the namenode and jobtracker packages
yum: name={{ item }} state=installed
with_items:
- hadoop-0.20-mapreduce-jobtracker
- hadoop-hdfs-namenode
- name: Copy the hadoop configuration files for no ha
template: src=roles/common/templates/hadoop_conf/{{ item }}.j2 dest=/etc/hadoop/conf/{{ item }}
with_items:
- core-site.xml
- hadoop-metrics.properties
- hadoop-metrics2.properties
- hdfs-site.xml
- log4j.properties
- mapred-site.xml
- slaves
- ssl-client.xml.example
- ssl-server.xml.example
notify: restart hadoop master services
- name: Create the data directory for the namenode metadata
file: path={{ item }} owner=hdfs group=hdfs state=directory
with_items: hadoop.dfs_namenode_name_dir
- name: Format the namenode
shell: creates=/usr/lib/hadoop/namenode.formatted su - hdfs -c "hadoop namenode -format"; touch /usr/lib/hadoop/namenode.formatted
- name: start hadoop namenode services
service: name=hadoop-hdfs-namenode state=started
- name: Give permissions for mapred users
shell: creates=/usr/lib/hadoop/fs.initialized su - hdfs -c "hadoop fs -chown hdfs:hadoop /"; su - hdfs -c "hadoop fs -chmod 0774 /"; touch /usr/lib/hadoop/namenode.initialized
- name: start hadoop jobtracker services
service: name=hadoop-0.20-mapreduce-jobtracker state=started

@ -1,5 +1,9 @@
---
# Playbook for Hadoop master primary servers
- include: hadoop_master.yml tags=no_ha
- include: hadoop_master.yml
when: ha_enabled
- include: hadoop_master_no_ha.yml
when: not ha_enabled

@ -1,73 +0,0 @@
---
# Playbook for Hadoop master secondary server
- name: Install the namenode and jobtracker packages
yum: name=${item} state=installed
with_items:
- hadoop-0.20-mapreduce-jobtrackerha
- hadoop-hdfs-namenode
- hadoop-hdfs-zkfc
- hadoop-0.20-mapreduce-zkfc
- name: Copy the hadoop configuration files
template: src=roles/common/templates/hadoop_ha_conf/${item}.j2 dest=/etc/hadoop/conf/${item}
with_items:
- core-site.xml
- hadoop-metrics.properties
- hadoop-metrics2.properties
- hdfs-site.xml
- log4j.properties
- mapred-site.xml
- slaves
- ssl-client.xml.example
- ssl-server.xml.example
when_unset: $ha_disabled
notify: restart hadoopha master services
- name: Create the data directory for the namenode metadata
file: path=${item} owner=hdfs group=hdfs state=directory
with_items: ${hadoop.dfs_namenode_name_dir}
- name: Create the data directory for the jobtracker ha
file: path=${item} owner=mapred group=mapred state=directory
with_items: ${hadoop.mapred_job_tracker_persist_jobstatus_dir}
- name: Initialize the secodary namenode
shell: creates=/usr/lib/hadoop/namenode.formatted su - hdfs -c "hadoop namenode -bootstrapStandby"; touch /usr/lib/hadoop/namenode.formatted
- name: start hadoop namenode services
service: name=${item} state=started
with_items:
- hadoop-hdfs-namenode
- name: Initialize the zkfc for namenode
shell: creates=/usr/lib/hadoop/zkfc.formatted su - hdfs -c "hdfs zkfc -formatZK"; touch /usr/lib/hadoop/zkfc.formatted
register: nn_result
- name: restart zkfc for namenode
service: name=hadoop-hdfs-zkfc state=restarted
delegate_to: ${item}
with_items: ${groups.hadoop_masters}
when_set: $nn_result and $nn_result.changed
- name: Give permissions for mapred users
shell: creates=/usr/lib/hadoop/fs.initialized su - hdfs -c "hadoop fs -chown hdfs:hadoop /"; su - hdfs -c "hadoop fs -chmod 0774 /"; touch /usr/lib/hadoop/namenode.initialized
- name: Initialize the zkfc for jobtracker
shell: creates=/usr/lib/hadoop/zkfcjob.formatted su - mapred -c "hadoop mrzkfc -formatZK"; touch /usr/lib/hadoop/zkfcjob.formatted
register: jt_result
- name: restart zkfc for jobtracker
service: name=hadoop-0.20-mapreduce-zkfc state=restarted
delegate_to: ${item}
with_items: ${groups.hadoop_masters}
when_set: $jt_result and $jt_result.changed
- name: start hadoop Jobtracker services
service: name=hadoop-0.20-mapreduce-jobtrackerha state=started
delegate_to: ${item}
with_items: ${groups.hadoop_masters}
when_set: $jt_result and $jt_result.changed

@ -1,4 +1,64 @@
---
# Playbook for Hadoop master secondary server
- include: hadoop_secondary.yml
- name: Install the namenode and jobtracker packages
yum: name=${item} state=installed
with_items:
- hadoop-0.20-mapreduce-jobtrackerha
- hadoop-hdfs-namenode
- hadoop-hdfs-zkfc
- hadoop-0.20-mapreduce-zkfc
- name: Copy the hadoop configuration files
template: src=roles/common/templates/hadoop_ha_conf/{{ item }}.j2 dest=/etc/hadoop/conf/{{ item }}
with_items:
- core-site.xml
- hadoop-metrics.properties
- hadoop-metrics2.properties
- hdfs-site.xml
- log4j.properties
- mapred-site.xml
- slaves
- ssl-client.xml.example
- ssl-server.xml.example
notify: restart hadoopha master services
- name: Create the data directory for the namenode metadata
file: path={{ item }} owner=hdfs group=hdfs state=directory
with_items: hadoop.dfs_namenode_name_dir
- name: Create the data directory for the jobtracker ha
file: path={{ item }} owner=mapred group=mapred state=directory
with_items: hadoop.mapred_job_tracker_persist_jobstatus_dir
- name: Initialize the secodary namenode
shell: creates=/usr/lib/hadoop/namenode.formatted su - hdfs -c "hadoop namenode -bootstrapStandby"; touch /usr/lib/hadoop/namenode.formatted
- name: start hadoop namenode services
service: name=hadoop-hdfs-namenode state=started
- name: Initialize the zkfc for namenode
shell: creates=/usr/lib/hadoop/zkfc.formatted su - hdfs -c "hdfs zkfc -formatZK"; touch /usr/lib/hadoop/zkfc.formatted
- name: start zkfc for namenodes
service: name=hadoop-hdfs-zkfc state=started
delegate_to: ${item}
with_items: groups.hadoop_masters
- name: Give permissions for mapred users
shell: creates=/usr/lib/hadoop/fs.initialized su - hdfs -c "hadoop fs -chown hdfs:hadoop /"; su - hdfs -c "hadoop fs -chmod 0774 /"; touch /usr/lib/hadoop/namenode.initialized
- name: Initialize the zkfc for jobtracker
shell: creates=/usr/lib/hadoop/zkfcjob.formatted su - mapred -c "hadoop mrzkfc -formatZK"; touch /usr/lib/hadoop/zkfcjob.formatted
- name: start zkfc for jobtracker
service: name=hadoop-0.20-mapreduce-zkfc state=started
delegate_to: ${item}
with_items: groups.hadoop_masters
- name: start hadoop Jobtracker services
service: name=hadoop-0.20-mapreduce-jobtrackerha state=started
delegate_to: ${item}
with_items: groups.hadoop_masters

@ -1,4 +1,4 @@
---
# Playbook for Hadoop slave servers
- include: slaves.yml tags=slaves,no_ha
- include: slaves.yml tags=slaves

@ -19,7 +19,7 @@
- slaves
- ssl-client.xml.example
- ssl-server.xml.example
when_unset: $ha_disabled
when: ha_enabled
notify: restart hadoop slave services
- name: Copy the hadoop configuration files for non ha
@ -34,19 +34,19 @@
- slaves
- ssl-client.xml.example
- ssl-server.xml.example
when_set: $ha_disabled
when: not ha_enabled
notify: restart hadoop slave services
- name: Create the data directory for the slave nodes to store the data
file: path=${item} owner=hdfs group=hdfs state=directory
with_items: ${hadoop.dfs_datanode_data_dir}
file: path={{ item }} owner=hdfs group=hdfs state=directory
with_items: hadoop.dfs_datanode_data_dir
- name: Create the data directory for the slave nodes for mapreduce
file: path=${item} owner=mapred group=mapred state=directory
with_items: ${hadoop.mapred_local_dir}
file: path={{ item }} owner=mapred group=mapred state=directory
with_items: hadoop.mapred_local_dir
- name: start hadoop slave services
service: name=${item} state=restarted
service: name={{ item }} state=started
with_items:
- hadoop-0.20-mapreduce-tasktracker
- hadoop-hdfs-datanode

@ -5,10 +5,10 @@
yum: name=hadoop-hdfs-journalnode state=installed
- name: Create folder for Journaling
file: path=${hadoop.dfs_journalnode_edits_dir} state=directory owner=hdfs group=hdfs
file: path={{ hadoop.dfs_journalnode_edits_dir }} state=directory owner=hdfs group=hdfs
- name: Copy the hadoop configuration files
template: src=roles/common/templates/hadoop_ha_conf/${item}.j2 dest=/etc/hadoop/conf/${item}
template: src=roles/common/templates/hadoop_ha_conf/{{ item }}.j2 dest=/etc/hadoop/conf/{{ item }}
with_items:
- core-site.xml
- hadoop-metrics.properties
@ -19,20 +19,4 @@
- slaves
- ssl-client.xml.example
- ssl-server.xml.example
when_unset: $ha_disabled
notify: restart qjournal services
- name: Copy the non ha hadoop configuration files
template: src=roles/common/templates/hadoop_conf/${item}.j2 dest=/etc/hadoop/conf/${item}
with_items:
- core-site.xml
- hadoop-metrics.properties
- hadoop-metrics2.properties
- hdfs-site.xml
- log4j.properties
- mapred-site.xml
- slaves
- ssl-client.xml.example
- ssl-server.xml.example
when_set: $ha_disabled
notify: restart qjournal services

@ -1,9 +1,9 @@
tickTime=2000
dataDir=/var/lib/zookeeper/
clientPort={{ hadoop['zookeeper.clientport'] }}
clientPort={{ hadoop['zookeeper_clientport'] }}
initLimit=5
syncLimit=2
{% for host in groups['zookeeper_servers'] %}
server.{{ hostvars[host].zoo_id }}={{ host }}:{{ hadoop['zookeeper.leader_port'] }}:{{ hadoop['zookeeper.election_port'] }}
server.{{ hostvars[host].zoo_id }}={{ host }}:{{ hadoop['zookeeper_leader_port'] }}:{{ hadoop['zookeeper_election_port'] }}
{% endfor %}

@ -1,6 +0,0 @@
---
# Vars for Zookeeper
clientport: 2181
leader_port: 2888
election_port: 3888

@ -3,39 +3,27 @@
- hosts: all
vars_files:
- hadoop_vars/hadoop
roles:
- common
- hosts: zookeeper_servers
vars_files:
- hadoop_vars/hadoop
roles:
- zookeeper_servers
- { role: zookeeper_servers, when: ha_enabled }
- hosts: qjournal_servers
vars_files:
- hadoop_vars/hadoop
roles:
- qjournal_servers
- { role: qjournal_servers, when: ha_enabled }
- hosts: hadoop_master_primary
vars_files:
- hadoop_vars/hadoop
roles:
- hadoop_primary
- { role: hadoop_primary }
- hosts: hadoop_master_secondary
vars_files:
- hadoop_vars/hadoop
roles:
- hadoop_secondary
- { role: hadoop_secondary, when: ha_enabled }
- hosts: hadoop_slaves
vars_files:
- hadoop_vars/hadoop
roles:
- hadoop_slaves
- { role: hadoop_slaves }