diff --git a/hadoop/README.md b/hadoop/README.md index 985ef88..601a3df 100644 --- a/hadoop/README.md +++ b/hadoop/README.md @@ -68,11 +68,11 @@ The Playbooks have been tested using Ansible v1.2, and Centos 6.x (64 bit) Modify group_vars/all to choose the interface for hadoop communication. -Optionally you change the hadoop specific parameter like port's or directories by editing hadoop_vars/hadoop file. +Optionally you change the hadoop specific parameter like port's or directories by editing group_vars/all file. Before launching the deployment playbook make sure the inventory file ( hosts ) have be setup properly, Here's a sample: - [hadoop_master_primary]15yy + [hadoop_master_primary] zhadoop1 [hadoop_master_secondary] @@ -124,12 +124,22 @@ and you should get a result where the standby has been promoted to the active st ### Running a mapreduce job on the cluster. -To run a mapreduce job on the cluster a sample playbook has been written, this playbook runs a job on the cluster which counts the occurance of the word 'hello' on an inputfile. A sample inputfile file has been created in the playbooks/inputfile file, modify the file to match your testing. -To deploy the mapreduce job run the following command.( Below -e server= +To deploy the mapreduce job run the following script from any of the hadoop master nodes as user 'hdfs'. The job would count the number of occurance of the word 'hello' in the given inputfile. Eg: su - hdfs -c "/tmp/job.sh" - ansible-playbook -i hosts playbooks/job.yml -e server=zhadoop1 + #!/bin/bash + cat > /tmp/inputfile << EOF + hello + sf + sdf + hello + sdf + sdf + EOF + hadoop fs -put /tmp/inputfile /inputfile + hadoop jar /usr/lib/hadoop-0.20-mapreduce/hadoop-examples.jar grep /inputfile /outputfile 'hello' + hadoop fs -get /outputfile /tmp/outputfile/ -to verify the result read the file on your ansible server located at /tmp/zhadoop1/tmp/outputfile/part-00000, which should give you the count. +to verify the result read the file on server located at /tmp/outputfile/part-00000, which should give you the count. ###Scale the Cluster @@ -160,8 +170,12 @@ To deploy this cluster fill in the inventory file as follows: hadoop2 hadoop3 -and issue the following command: +and edit the group_vars/all file to disable HA: + + ha_enabled: False + +and run the following command: - ansible-playbook -i hosts site.yml -e ha_disabled=true --tags=no_ha + ansible-playbook -i hosts site.yml The validity of the cluster can be checked by running the same mapreduce job that has documented above for an HA Hadoop Cluster diff --git a/hadoop/group_vars/all b/hadoop/group_vars/all index 3ad1763..8e902a1 100644 --- a/hadoop/group_vars/all +++ b/hadoop/group_vars/all @@ -1 +1,50 @@ iface: eth1 + +ha_enabled: False + +hadoop: + +#Variables for - common + + fs_default_FS_port: 8020 + nameservice_id: mycluster3 + +#Variables for + + dfs_permissions_superusergroup: hdfs + dfs_namenode_name_dir: + - /namedir1/ + - /namedir2/ + dfs_replication: 3 + dfs_namenode_handler_count: 50 + dfs_blocksize: 67108864 + dfs_datanode_data_dir: + - /datadir1/ + - /datadir2/ + dfs_datanode_address_port: 50010 + dfs_datanode_http_address_port: 50075 + dfs_datanode_ipc_address_port: 50020 + dfs_namenode_http_address_port: 50070 + dfs_ha_zkfc_port: 8019 + qjournal_port: 8485 + qjournal_http_port: 8480 + dfs_journalnode_edits_dir: /journaldir/ + zookeeper_clientport: 2181 + zookeeper_leader_port: 2888 + zookeeper_election_port: 3888 + +#Variables for - common + mapred_job_tracker_ha_servicename: myjt3 + mapred_job_tracker_http_address_port: 50030 + mapred_task_tracker_http_address_port: 50060 + mapred_job_tracker_port: 8021 + mapred_ha_jobtracker_rpc-address_port: 8023 + mapred_ha_zkfc_port: 8018 + mapred_job_tracker_persist_jobstatus_dir: /jobdir/ + mapred_local_dir: + - /mapred1/ + - /mapred2/ + + + + diff --git a/hadoop/hadoop_vars/hadoop b/hadoop/hadoop_vars/hadoop deleted file mode 100644 index 7b011e4..0000000 --- a/hadoop/hadoop_vars/hadoop +++ /dev/null @@ -1,47 +0,0 @@ - -hadoop: - -#Variables for - common - - fs.default.FS.port: 8020 - nameservice.id: mycluster2 - -#Variables for - - dfs.permissions.superusergroup: hdfs - dfs_namenode_name_dir: - - /namedir1/ - - /namedir2/ - dfs.replication: 3 - dfs.namenode.handler.count: 50 - dfs.blocksize: 67108864 - dfs_datanode_data_dir: - - /datadir1/ - - /datadir2/ - dfs.datanode.address.port: 50010 - dfs.datanode.http.address.port: 50075 - dfs.datanode.ipc.address.port: 50020 - dfs.namenode.http.address.port: 50070 - dfs.ha.zkfc.port: 8019 - qjournal.port: 8485 - qjournal.http.port: 8480 - dfs_journalnode_edits_dir: /journaldir/ - zookeeper.clientport: 2181 - zookeeper.leader_port: 2888 - zookeeper.election_port: 3888 - -#Variables for - common - mapred.job.tracker.ha.servicename: myjt2 - mapred.job.tracker.http.address.port: 50030 - mapred.task.tracker.http.address.port: 50060 - mapred.job.tracker.port: 8021 - mapred.ha.jobtracker.rpc-address.port: 8023 - mapred.ha.zkfc.port: 8018 - mapred_job_tracker_persist_jobstatus_dir: /jobdir/ - mapred_local_dir: - - /mapred1/ - - /mapred2/ - - - - diff --git a/hadoop/roles/common/tasks/common.yml b/hadoop/roles/common/tasks/common.yml index 5b4c9d1..08a8bed 100644 --- a/hadoop/roles/common/tasks/common.yml +++ b/hadoop/roles/common/tasks/common.yml @@ -9,11 +9,9 @@ - name: create a directory for java file: state=directory path=/usr/java/ - tags: link - name: create a link for java file: src=/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre state=link path=/usr/java/default - tags: link - name: Create the hosts file for all machines template: src=etc/hosts.j2 dest=/etc/hosts diff --git a/hadoop/roles/common/tasks/main.yml b/hadoop/roles/common/tasks/main.yml index 1fe5398..ef6677f 100644 --- a/hadoop/roles/common/tasks/main.yml +++ b/hadoop/roles/common/tasks/main.yml @@ -1,5 +1,5 @@ --- # The playbook for common tasks -- include: common.yml tags=slaves,no_ha +- include: common.yml tags=slaves diff --git a/hadoop/roles/common/templates/hadoop_conf/core-site.xml.j2 b/hadoop/roles/common/templates/hadoop_conf/core-site.xml.j2 index 6f837c5..75ac8f6 100644 --- a/hadoop/roles/common/templates/hadoop_conf/core-site.xml.j2 +++ b/hadoop/roles/common/templates/hadoop_conf/core-site.xml.j2 @@ -20,6 +20,6 @@ fs.defaultFS - hdfs://{{ hostvars[groups['hadoop_masters'][0]]['ansible_hostname'] + ':' ~ hadoop['fs.default.FS.port'] }}/ + hdfs://{{ hostvars[groups['hadoop_masters'][0]]['ansible_hostname'] + ':' ~ hadoop['fs_default_FS_port'] }}/ diff --git a/hadoop/roles/common/templates/hadoop_conf/hdfs-site.xml.j2 b/hadoop/roles/common/templates/hadoop_conf/hdfs-site.xml.j2 index 022118e..0c537fc 100644 --- a/hadoop/roles/common/templates/hadoop_conf/hdfs-site.xml.j2 +++ b/hadoop/roles/common/templates/hadoop_conf/hdfs-site.xml.j2 @@ -20,31 +20,31 @@ dfs.blocksize - {{ hadoop['dfs.blocksize'] }} + {{ hadoop['dfs_blocksize'] }} dfs.permissions.superusergroup - {{ hadoop['dfs.permissions.superusergroup'] }} + {{ hadoop['dfs_permissions_superusergroup'] }} dfs.namenode.http.address - 0.0.0.0:{{ hadoop['dfs.namenode.http.address.port'] }} + 0.0.0.0:{{ hadoop['dfs_namenode_http_address_port'] }} dfs.datanode.address - 0.0.0.0:{{ hadoop['dfs.datanode.address.port'] }} + 0.0.0.0:{{ hadoop['dfs_datanode_address_port'] }} dfs.datanode.http.address - 0.0.0.0:{{ hadoop['dfs.datanode.http.address.port'] }} + 0.0.0.0:{{ hadoop['dfs_datanode_http_address_port'] }} dfs.datanode.ipc.address - 0.0.0.0:{{ hadoop['dfs.datanode.ipc.address.port'] }} + 0.0.0.0:{{ hadoop['dfs_datanode_ipc_address_port'] }} dfs.replication - {{ hadoop['dfs.replication'] }} + {{ hadoop['dfs_replication'] }} dfs.namenode.name.dir diff --git a/hadoop/roles/common/templates/hadoop_conf/mapred-site.xml.j2 b/hadoop/roles/common/templates/hadoop_conf/mapred-site.xml.j2 index b684fb5..0941698 100644 --- a/hadoop/roles/common/templates/hadoop_conf/mapred-site.xml.j2 +++ b/hadoop/roles/common/templates/hadoop_conf/mapred-site.xml.j2 @@ -2,7 +2,7 @@ mapred.job.tracker - {{ hostvars[groups['hadoop_masters'][0]]['ansible_hostname'] }}:{{ hadoop['mapred.job.tracker.port'] }} + {{ hostvars[groups['hadoop_masters'][0]]['ansible_hostname'] }}:{{ hadoop['mapred_job_tracker_port'] }} @@ -12,11 +12,11 @@ mapred.task.tracker.http.address - 0.0.0.0:{{ hadoop['mapred.task.tracker.http.address.port'] }} + 0.0.0.0:{{ hadoop['mapred_task_tracker_http_address_port'] }} mapred.job.tracker.http.address - 0.0.0.0:{{ hadoop['mapred.job.tracker.http.address.port'] }} + 0.0.0.0:{{ hadoop['mapred_job_tracker_http_address_port'] }} diff --git a/hadoop/roles/common/templates/hadoop_ha_conf/core-site.xml.j2 b/hadoop/roles/common/templates/hadoop_ha_conf/core-site.xml.j2 index 2db7dff..62f355d 100644 --- a/hadoop/roles/common/templates/hadoop_ha_conf/core-site.xml.j2 +++ b/hadoop/roles/common/templates/hadoop_ha_conf/core-site.xml.j2 @@ -20,6 +20,6 @@ fs.defaultFS - hdfs://{{ hadoop['nameservice.id'] }}/ + hdfs://{{ hadoop['nameservice_id'] }}/ diff --git a/hadoop/roles/common/templates/hadoop_ha_conf/hdfs-site.xml.j2 b/hadoop/roles/common/templates/hadoop_ha_conf/hdfs-site.xml.j2 index 64168f3..7dadd91 100644 --- a/hadoop/roles/common/templates/hadoop_ha_conf/hdfs-site.xml.j2 +++ b/hadoop/roles/common/templates/hadoop_ha_conf/hdfs-site.xml.j2 @@ -19,19 +19,19 @@ dfs.nameservices - {{ hadoop['nameservice.id'] }} + {{ hadoop['nameservice_id'] }} - dfs.ha.namenodes.{{ hadoop['nameservice.id'] }} + dfs.ha.namenodes.{{ hadoop['nameservice_id'] }} {{ groups.hadoop_masters | join(',') }} dfs.blocksize - {{ hadoop['dfs.blocksize'] }} + {{ hadoop['dfs_blocksize'] }} dfs.permissions.superusergroup - {{ hadoop['dfs.permissions.superusergroup'] }} + {{ hadoop['dfs_permissions_superusergroup'] }} dfs.ha.automatic-failover.enabled @@ -39,31 +39,31 @@ ha.zookeeper.quorum - {{ groups.zookeeper_servers | join(':' ~ hadoop['zookeeper.clientport'] + ',') }}:{{ hadoop['zookeeper.clientport'] }} + {{ groups.zookeeper_servers | join(':' ~ hadoop['zookeeper_clientport'] + ',') }}:{{ hadoop['zookeeper_clientport'] }} {% for host in groups['hadoop_masters'] %} - dfs.namenode.rpc-address.{{ hadoop['nameservice.id'] }}.{{ host }} - {{ host }}:{{ hadoop['fs.default.FS.port'] }} + dfs.namenode.rpc-address.{{ hadoop['nameservice_id'] }}.{{ host }} + {{ host }}:{{ hadoop['fs_default_FS_port'] }} {% endfor %} {% for host in groups['hadoop_masters'] %} - dfs.namenode.http-address.{{ hadoop['nameservice.id'] }}.{{ host }} - {{ host }}:{{ hadoop['dfs.namenode.http.address.port'] }} + dfs.namenode.http-address.{{ hadoop['nameservice_id'] }}.{{ host }} + {{ host }}:{{ hadoop['dfs_namenode_http_address_port'] }} {% endfor %} dfs.namenode.shared.edits.dir - qjournal://{{ groups.qjournal_servers | join(':' ~ hadoop['qjournal.port'] + ';') }}:{{ hadoop['qjournal.port'] }}/{{ hadoop['nameservice.id'] }} + qjournal://{{ groups.qjournal_servers | join(':' ~ hadoop['qjournal_port'] + ';') }}:{{ hadoop['qjournal_port'] }}/{{ hadoop['nameservice_id'] }} dfs.journalnode.edits.dir {{ hadoop['dfs_journalnode_edits_dir'] }} - dfs.client.failover.proxy.provider.{{ hadoop['nameservice.id'] }} + dfs.client.failover.proxy.provider.{{ hadoop['nameservice_id'] }} org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider @@ -73,24 +73,24 @@ dfs.ha.zkfc.port - {{ hadoop['dfs.ha.zkfc.port'] }} + {{ hadoop['dfs_ha_zkfc_port'] }} dfs.datanode.address - 0.0.0.0:{{ hadoop['dfs.datanode.address.port'] }} + 0.0.0.0:{{ hadoop['dfs_datanode_address_port'] }} dfs.datanode.http.address - 0.0.0.0:{{ hadoop['dfs.datanode.http.address.port'] }} + 0.0.0.0:{{ hadoop['dfs_datanode_http_address_port'] }} dfs.datanode.ipc.address - 0.0.0.0:{{ hadoop['dfs.datanode.ipc.address.port'] }} + 0.0.0.0:{{ hadoop['dfs_datanode_ipc_address_port'] }} dfs.replication - {{ hadoop['dfs.replication'] }} + {{ hadoop['dfs_replication'] }} dfs.namenode.name.dir diff --git a/hadoop/roles/common/templates/hadoop_ha_conf/mapred-site.xml.j2 b/hadoop/roles/common/templates/hadoop_ha_conf/mapred-site.xml.j2 index fc46214..4a839c9 100644 --- a/hadoop/roles/common/templates/hadoop_ha_conf/mapred-site.xml.j2 +++ b/hadoop/roles/common/templates/hadoop_ha_conf/mapred-site.xml.j2 @@ -2,11 +2,11 @@ mapred.job.tracker - {{ hadoop['mapred.job.tracker.ha.servicename'] }} + {{ hadoop['mapred_job_tracker_ha_servicename'] }} - mapred.jobtrackers.{{ hadoop['mapred.job.tracker.ha.servicename'] }} + mapred.jobtrackers.{{ hadoop['mapred_job_tracker_ha_servicename'] }} {{ groups['hadoop_masters'] | join(',') }} Comma-separated list of JobTracker IDs. @@ -18,7 +18,7 @@ mapred.ha.zkfc.port - {{ hadoop['mapred.ha.zkfc.port'] }} + {{ hadoop['mapred_ha_zkfc_port'] }} @@ -28,31 +28,31 @@ ha.zookeeper.quorum - {{ groups.zookeeper_servers | join(':' ~ hadoop['zookeeper.clientport'] + ',') }}:{{ hadoop['zookeeper.clientport'] }} + {{ groups.zookeeper_servers | join(':' ~ hadoop['zookeeper_clientport'] + ',') }}:{{ hadoop['zookeeper_clientport'] }} {% for host in groups['hadoop_masters'] %} - mapred.jobtracker.rpc-address.{{ hadoop['mapred.job.tracker.ha.servicename'] }}.{{ host }} - {{ host }}:{{ hadoop['mapred.job.tracker.port'] }} + mapred.jobtracker.rpc-address.{{ hadoop['mapred_job_tracker_ha_servicename'] }}.{{ host }} + {{ host }}:{{ hadoop['mapred_job_tracker_port'] }} {% endfor %} {% for host in groups['hadoop_masters'] %} - mapred.job.tracker.http.address.{{ hadoop['mapred.job.tracker.ha.servicename'] }}.{{ host }} - 0.0.0.0:{{ hadoop['mapred.job.tracker.http.address.port'] }} + mapred.job.tracker.http.address.{{ hadoop['mapred_job_tracker_ha_servicename'] }}.{{ host }} + 0.0.0.0:{{ hadoop['mapred_job_tracker_http_address_port'] }} {% endfor %} {% for host in groups['hadoop_masters'] %} - mapred.ha.jobtracker.rpc-address.{{ hadoop['mapred.job.tracker.ha.servicename'] }}.{{ host }} - {{ host }}:{{ hadoop['mapred.ha.jobtracker.rpc-address.port'] }} + mapred.ha.jobtracker.rpc-address.{{ hadoop['mapred_job_tracker_ha_servicename'] }}.{{ host }} + {{ host }}:{{ hadoop['mapred_ha_jobtracker_rpc-address_port'] }} {% endfor %} {% for host in groups['hadoop_masters'] %} - mapred.ha.jobtracker.http-redirect-address.{{ hadoop['mapred.job.tracker.ha.servicename'] }}.{{ host }} - {{ host }}:{{ hadoop['mapred.job.tracker.http.address.port'] }} + mapred.ha.jobtracker.http-redirect-address.{{ hadoop['mapred_job_tracker_ha_servicename'] }}.{{ host }} + {{ host }}:{{ hadoop['mapred_job_tracker_http_address_port'] }} {% endfor %} @@ -77,7 +77,7 @@ - mapred.client.failover.proxy.provider.{{ hadoop['mapred.job.tracker.ha.servicename'] }} + mapred.client.failover.proxy.provider.{{ hadoop['mapred_job_tracker_ha_servicename'] }} org.apache.hadoop.mapred.ConfiguredFailoverProxyProvider @@ -114,7 +114,7 @@ mapred.task.tracker.http.address - 0.0.0.0:{{ hadoop['mapred.task.tracker.http.address.port'] }} + 0.0.0.0:{{ hadoop['mapred_task_tracker_http_address_port'] }} diff --git a/hadoop/roles/common/templates/iptables.j2 b/hadoop/roles/common/templates/iptables.j2 index 2a2b368..f9814fc 100644 --- a/hadoop/roles/common/templates/iptables.j2 +++ b/hadoop/roles/common/templates/iptables.j2 @@ -1,33 +1,33 @@ # Firewall configuration written by system-config-firewall -# Manual customization of this file is not recommended. +# Manual customization of this file is not recommended_ *filter :INPUT ACCEPT [0:0] :FORWARD ACCEPT [0:0] :OUTPUT ACCEPT [0:0] {% if 'hadoop_masters' in group_names %} --A INPUT -p tcp --dport {{ hadoop['fs.default.FS.port'] }} -j ACCEPT --A INPUT -p tcp --dport {{ hadoop['dfs.namenode.http.address.port'] }} -j ACCEPT --A INPUT -p tcp --dport {{ hadoop['mapred.job.tracker.port'] }} -j ACCEPT --A INPUT -p tcp --dport {{ hadoop['mapred.job.tracker.http.address.port'] }} -j ACCEPT --A INPUT -p tcp --dport {{ hadoop['mapred.ha.jobtracker.rpc-address.port'] }} -j ACCEPT --A INPUT -p tcp --dport {{ hadoop['mapred.ha.zkfc.port'] }} -j ACCEPT --A INPUT -p tcp --dport {{ hadoop['dfs.ha.zkfc.port'] }} -j ACCEPT +-A INPUT -p tcp --dport {{ hadoop['fs_default_FS_port'] }} -j ACCEPT +-A INPUT -p tcp --dport {{ hadoop['dfs_namenode_http_address_port'] }} -j ACCEPT +-A INPUT -p tcp --dport {{ hadoop['mapred_job_tracker_port'] }} -j ACCEPT +-A INPUT -p tcp --dport {{ hadoop['mapred_job_tracker_http_address_port'] }} -j ACCEPT +-A INPUT -p tcp --dport {{ hadoop['mapred_ha_jobtracker_rpc-address_port'] }} -j ACCEPT +-A INPUT -p tcp --dport {{ hadoop['mapred_ha_zkfc_port'] }} -j ACCEPT +-A INPUT -p tcp --dport {{ hadoop['dfs_ha_zkfc_port'] }} -j ACCEPT {% endif %} {% if 'hadoop_slaves' in group_names %} --A INPUT -p tcp --dport {{ hadoop['dfs.datanode.address.port'] }} -j ACCEPT --A INPUT -p tcp --dport {{ hadoop['dfs.datanode.http.address.port'] }} -j ACCEPT --A INPUT -p tcp --dport {{ hadoop['dfs.datanode.ipc.address.port'] }} -j ACCEPT --A INPUT -p tcp --dport {{ hadoop['mapred.task.tracker.http.address.port'] }} -j ACCEPT +-A INPUT -p tcp --dport {{ hadoop['dfs_datanode_address_port'] }} -j ACCEPT +-A INPUT -p tcp --dport {{ hadoop['dfs_datanode_http_address_port'] }} -j ACCEPT +-A INPUT -p tcp --dport {{ hadoop['dfs_datanode_ipc_address_port'] }} -j ACCEPT +-A INPUT -p tcp --dport {{ hadoop['mapred_task_tracker_http_address_port'] }} -j ACCEPT {% endif %} {% if 'qjournal_servers' in group_names %} --A INPUT -p tcp --dport {{ hadoop['qjournal.port'] }} -j ACCEPT --A INPUT -p tcp --dport {{ hadoop['qjournal.http.port'] }} -j ACCEPT +-A INPUT -p tcp --dport {{ hadoop['qjournal_port'] }} -j ACCEPT +-A INPUT -p tcp --dport {{ hadoop['qjournal_http_port'] }} -j ACCEPT {% endif %} {% if 'zookeeper_servers' in group_names %} --A INPUT -p tcp --dport {{ hadoop['zookeeper.clientport'] }} -j ACCEPT --A INPUT -p tcp --dport {{ hadoop['zookeeper.leader_port'] }} -j ACCEPT --A INPUT -p tcp --dport {{ hadoop['zookeeper.election_port'] }} -j ACCEPT +-A INPUT -p tcp --dport {{ hadoop['zookeeper_clientport'] }} -j ACCEPT +-A INPUT -p tcp --dport {{ hadoop['zookeeper_leader_port'] }} -j ACCEPT +-A INPUT -p tcp --dport {{ hadoop['zookeeper_election_port'] }} -j ACCEPT {% endif %} -A INPUT -m state --state ESTABLISHED,RELATED -j ACCEPT -A INPUT -p icmp -j ACCEPT diff --git a/hadoop/roles/hadoop_primary/tasks/hadoop_master.yml b/hadoop/roles/hadoop_primary/tasks/hadoop_master.yml index 44c13c0..67632ec 100644 --- a/hadoop/roles/hadoop_primary/tasks/hadoop_master.yml +++ b/hadoop/roles/hadoop_primary/tasks/hadoop_master.yml @@ -2,23 +2,15 @@ # Playbook for Hadoop master servers - name: Install the namenode and jobtracker packages - yum: name=${item} state=installed - with_items: - - hadoop-0.20-mapreduce-jobtracker - - hadoop-hdfs-namenode - when_set: $ha_disabled - -- name: Install the namenode and jobtracker packages - yum: name=${item} state=installed + yum: name={{ item }} state=installed with_items: - hadoop-0.20-mapreduce-jobtrackerha - hadoop-hdfs-namenode - hadoop-hdfs-zkfc - hadoop-0.20-mapreduce-zkfc - when_unset: $ha_disabled - name: Copy the hadoop configuration files - template: src=roles/common/templates/hadoop_ha_conf/${item}.j2 dest=/etc/hadoop/conf/${item} + template: src=roles/common/templates/hadoop_ha_conf/{{ item }}.j2 dest=/etc/hadoop/conf/{{ item }} with_items: - core-site.xml - hadoop-metrics.properties @@ -29,48 +21,18 @@ - slaves - ssl-client.xml.example - ssl-server.xml.example - when_unset: $ha_disabled notify: restart hadoopha master services -- name: Copy the hadoop configuration files for no ha - template: src=roles/common/templates/hadoop_conf/${item}.j2 dest=/etc/hadoop/conf/${item} - with_items: - - core-site.xml - - hadoop-metrics.properties - - hadoop-metrics2.properties - - hdfs-site.xml - - log4j.properties - - mapred-site.xml - - slaves - - ssl-client.xml.example - - ssl-server.xml.example - when_set: $ha_disabled - notify: restart hadoop master services - - name: Create the data directory for the namenode metadata - file: path=${item} owner=hdfs group=hdfs state=directory - with_items: ${hadoop.dfs_namenode_name_dir} + file: path={{ item }} owner=hdfs group=hdfs state=directory + with_items: hadoop.dfs_namenode_name_dir - name: Create the data directory for the jobtracker ha - file: path=${item} owner=mapred group=mapred state=directory - with_items: ${hadoop.mapred_job_tracker_persist_jobstatus_dir} - when_unset: $ha_disabled - + file: path={{ item }} owner=mapred group=mapred state=directory + with_items: hadoop.mapred_job_tracker_persist_jobstatus_dir - name: Format the namenode shell: creates=/usr/lib/hadoop/namenode.formatted su - hdfs -c "hadoop namenode -format"; touch /usr/lib/hadoop/namenode.formatted - name: start hadoop namenode services - service: name=${item} state=started - with_items: - - hadoop-hdfs-namenode - -- name: Give permissions for mapred users - shell: creates=/usr/lib/hadoop/fs.initialized su - hdfs -c "hadoop fs -chown hdfs:hadoop /"; su - hdfs -c "hadoop fs -chmod 0774 /"; touch /usr/lib/hadoop/namenode.initialized - when_set: $ha_disabled - -- name: start hadoop jobtracker services - service: name=${item} state=started - with_items: - - hadoop-0.20-mapreduce-jobtracker - when_set: $ha_disabled + service: name=hadoop-hdfs-namenode state=started diff --git a/hadoop/roles/hadoop_primary/tasks/hadoop_master_no_ha.yml b/hadoop/roles/hadoop_primary/tasks/hadoop_master_no_ha.yml new file mode 100644 index 0000000..3508c92 --- /dev/null +++ b/hadoop/roles/hadoop_primary/tasks/hadoop_master_no_ha.yml @@ -0,0 +1,38 @@ +--- +# Playbook for Hadoop master servers + +- name: Install the namenode and jobtracker packages + yum: name={{ item }} state=installed + with_items: + - hadoop-0.20-mapreduce-jobtracker + - hadoop-hdfs-namenode + +- name: Copy the hadoop configuration files for no ha + template: src=roles/common/templates/hadoop_conf/{{ item }}.j2 dest=/etc/hadoop/conf/{{ item }} + with_items: + - core-site.xml + - hadoop-metrics.properties + - hadoop-metrics2.properties + - hdfs-site.xml + - log4j.properties + - mapred-site.xml + - slaves + - ssl-client.xml.example + - ssl-server.xml.example + notify: restart hadoop master services + +- name: Create the data directory for the namenode metadata + file: path={{ item }} owner=hdfs group=hdfs state=directory + with_items: hadoop.dfs_namenode_name_dir + +- name: Format the namenode + shell: creates=/usr/lib/hadoop/namenode.formatted su - hdfs -c "hadoop namenode -format"; touch /usr/lib/hadoop/namenode.formatted + +- name: start hadoop namenode services + service: name=hadoop-hdfs-namenode state=started + +- name: Give permissions for mapred users + shell: creates=/usr/lib/hadoop/fs.initialized su - hdfs -c "hadoop fs -chown hdfs:hadoop /"; su - hdfs -c "hadoop fs -chmod 0774 /"; touch /usr/lib/hadoop/namenode.initialized + +- name: start hadoop jobtracker services + service: name=hadoop-0.20-mapreduce-jobtracker state=started diff --git a/hadoop/roles/hadoop_primary/tasks/main.yml b/hadoop/roles/hadoop_primary/tasks/main.yml index f8eba61..bfc20ca 100644 --- a/hadoop/roles/hadoop_primary/tasks/main.yml +++ b/hadoop/roles/hadoop_primary/tasks/main.yml @@ -1,5 +1,9 @@ --- # Playbook for Hadoop master primary servers -- include: hadoop_master.yml tags=no_ha +- include: hadoop_master.yml + when: ha_enabled + +- include: hadoop_master_no_ha.yml + when: not ha_enabled diff --git a/hadoop/roles/hadoop_secondary/tasks/hadoop_secondary.yml b/hadoop/roles/hadoop_secondary/tasks/hadoop_secondary.yml deleted file mode 100644 index 18de1ae..0000000 --- a/hadoop/roles/hadoop_secondary/tasks/hadoop_secondary.yml +++ /dev/null @@ -1,73 +0,0 @@ ---- -# Playbook for Hadoop master secondary server - - -- name: Install the namenode and jobtracker packages - yum: name=${item} state=installed - with_items: - - hadoop-0.20-mapreduce-jobtrackerha - - hadoop-hdfs-namenode - - hadoop-hdfs-zkfc - - hadoop-0.20-mapreduce-zkfc - -- name: Copy the hadoop configuration files - template: src=roles/common/templates/hadoop_ha_conf/${item}.j2 dest=/etc/hadoop/conf/${item} - with_items: - - core-site.xml - - hadoop-metrics.properties - - hadoop-metrics2.properties - - hdfs-site.xml - - log4j.properties - - mapred-site.xml - - slaves - - ssl-client.xml.example - - ssl-server.xml.example - when_unset: $ha_disabled - notify: restart hadoopha master services - -- name: Create the data directory for the namenode metadata - file: path=${item} owner=hdfs group=hdfs state=directory - with_items: ${hadoop.dfs_namenode_name_dir} - -- name: Create the data directory for the jobtracker ha - file: path=${item} owner=mapred group=mapred state=directory - with_items: ${hadoop.mapred_job_tracker_persist_jobstatus_dir} - - -- name: Initialize the secodary namenode - shell: creates=/usr/lib/hadoop/namenode.formatted su - hdfs -c "hadoop namenode -bootstrapStandby"; touch /usr/lib/hadoop/namenode.formatted - -- name: start hadoop namenode services - service: name=${item} state=started - with_items: - - hadoop-hdfs-namenode - -- name: Initialize the zkfc for namenode - shell: creates=/usr/lib/hadoop/zkfc.formatted su - hdfs -c "hdfs zkfc -formatZK"; touch /usr/lib/hadoop/zkfc.formatted - register: nn_result - -- name: restart zkfc for namenode - service: name=hadoop-hdfs-zkfc state=restarted - delegate_to: ${item} - with_items: ${groups.hadoop_masters} - when_set: $nn_result and $nn_result.changed - -- name: Give permissions for mapred users - shell: creates=/usr/lib/hadoop/fs.initialized su - hdfs -c "hadoop fs -chown hdfs:hadoop /"; su - hdfs -c "hadoop fs -chmod 0774 /"; touch /usr/lib/hadoop/namenode.initialized - -- name: Initialize the zkfc for jobtracker - shell: creates=/usr/lib/hadoop/zkfcjob.formatted su - mapred -c "hadoop mrzkfc -formatZK"; touch /usr/lib/hadoop/zkfcjob.formatted - register: jt_result - -- name: restart zkfc for jobtracker - service: name=hadoop-0.20-mapreduce-zkfc state=restarted - delegate_to: ${item} - with_items: ${groups.hadoop_masters} - when_set: $jt_result and $jt_result.changed - -- name: start hadoop Jobtracker services - service: name=hadoop-0.20-mapreduce-jobtrackerha state=started - delegate_to: ${item} - with_items: ${groups.hadoop_masters} - when_set: $jt_result and $jt_result.changed - diff --git a/hadoop/roles/hadoop_secondary/tasks/main.yml b/hadoop/roles/hadoop_secondary/tasks/main.yml index d58cdb9..c16d0fe 100644 --- a/hadoop/roles/hadoop_secondary/tasks/main.yml +++ b/hadoop/roles/hadoop_secondary/tasks/main.yml @@ -1,4 +1,64 @@ --- # Playbook for Hadoop master secondary server -- include: hadoop_secondary.yml + +- name: Install the namenode and jobtracker packages + yum: name=${item} state=installed + with_items: + - hadoop-0.20-mapreduce-jobtrackerha + - hadoop-hdfs-namenode + - hadoop-hdfs-zkfc + - hadoop-0.20-mapreduce-zkfc + +- name: Copy the hadoop configuration files + template: src=roles/common/templates/hadoop_ha_conf/{{ item }}.j2 dest=/etc/hadoop/conf/{{ item }} + with_items: + - core-site.xml + - hadoop-metrics.properties + - hadoop-metrics2.properties + - hdfs-site.xml + - log4j.properties + - mapred-site.xml + - slaves + - ssl-client.xml.example + - ssl-server.xml.example + notify: restart hadoopha master services + +- name: Create the data directory for the namenode metadata + file: path={{ item }} owner=hdfs group=hdfs state=directory + with_items: hadoop.dfs_namenode_name_dir + +- name: Create the data directory for the jobtracker ha + file: path={{ item }} owner=mapred group=mapred state=directory + with_items: hadoop.mapred_job_tracker_persist_jobstatus_dir + + +- name: Initialize the secodary namenode + shell: creates=/usr/lib/hadoop/namenode.formatted su - hdfs -c "hadoop namenode -bootstrapStandby"; touch /usr/lib/hadoop/namenode.formatted + +- name: start hadoop namenode services + service: name=hadoop-hdfs-namenode state=started + +- name: Initialize the zkfc for namenode + shell: creates=/usr/lib/hadoop/zkfc.formatted su - hdfs -c "hdfs zkfc -formatZK"; touch /usr/lib/hadoop/zkfc.formatted + +- name: start zkfc for namenodes + service: name=hadoop-hdfs-zkfc state=started + delegate_to: ${item} + with_items: groups.hadoop_masters + +- name: Give permissions for mapred users + shell: creates=/usr/lib/hadoop/fs.initialized su - hdfs -c "hadoop fs -chown hdfs:hadoop /"; su - hdfs -c "hadoop fs -chmod 0774 /"; touch /usr/lib/hadoop/namenode.initialized + +- name: Initialize the zkfc for jobtracker + shell: creates=/usr/lib/hadoop/zkfcjob.formatted su - mapred -c "hadoop mrzkfc -formatZK"; touch /usr/lib/hadoop/zkfcjob.formatted + +- name: start zkfc for jobtracker + service: name=hadoop-0.20-mapreduce-zkfc state=started + delegate_to: ${item} + with_items: groups.hadoop_masters + +- name: start hadoop Jobtracker services + service: name=hadoop-0.20-mapreduce-jobtrackerha state=started + delegate_to: ${item} + with_items: groups.hadoop_masters diff --git a/hadoop/roles/hadoop_slaves/tasks/main.yml b/hadoop/roles/hadoop_slaves/tasks/main.yml index 5294ab4..9056ea2 100644 --- a/hadoop/roles/hadoop_slaves/tasks/main.yml +++ b/hadoop/roles/hadoop_slaves/tasks/main.yml @@ -1,4 +1,4 @@ --- # Playbook for Hadoop slave servers -- include: slaves.yml tags=slaves,no_ha +- include: slaves.yml tags=slaves diff --git a/hadoop/roles/hadoop_slaves/tasks/slaves.yml b/hadoop/roles/hadoop_slaves/tasks/slaves.yml index 0856f0c..2807bac 100644 --- a/hadoop/roles/hadoop_slaves/tasks/slaves.yml +++ b/hadoop/roles/hadoop_slaves/tasks/slaves.yml @@ -19,7 +19,7 @@ - slaves - ssl-client.xml.example - ssl-server.xml.example - when_unset: $ha_disabled + when: ha_enabled notify: restart hadoop slave services - name: Copy the hadoop configuration files for non ha @@ -34,19 +34,19 @@ - slaves - ssl-client.xml.example - ssl-server.xml.example - when_set: $ha_disabled + when: not ha_enabled notify: restart hadoop slave services - name: Create the data directory for the slave nodes to store the data - file: path=${item} owner=hdfs group=hdfs state=directory - with_items: ${hadoop.dfs_datanode_data_dir} + file: path={{ item }} owner=hdfs group=hdfs state=directory + with_items: hadoop.dfs_datanode_data_dir - name: Create the data directory for the slave nodes for mapreduce - file: path=${item} owner=mapred group=mapred state=directory - with_items: ${hadoop.mapred_local_dir} + file: path={{ item }} owner=mapred group=mapred state=directory + with_items: hadoop.mapred_local_dir - name: start hadoop slave services - service: name=${item} state=restarted + service: name={{ item }} state=started with_items: - hadoop-0.20-mapreduce-tasktracker - hadoop-hdfs-datanode diff --git a/hadoop/roles/qjournal_servers/tasks/main.yml b/hadoop/roles/qjournal_servers/tasks/main.yml index 347bb10..86fa9e3 100644 --- a/hadoop/roles/qjournal_servers/tasks/main.yml +++ b/hadoop/roles/qjournal_servers/tasks/main.yml @@ -5,10 +5,10 @@ yum: name=hadoop-hdfs-journalnode state=installed - name: Create folder for Journaling - file: path=${hadoop.dfs_journalnode_edits_dir} state=directory owner=hdfs group=hdfs + file: path={{ hadoop.dfs_journalnode_edits_dir }} state=directory owner=hdfs group=hdfs - name: Copy the hadoop configuration files - template: src=roles/common/templates/hadoop_ha_conf/${item}.j2 dest=/etc/hadoop/conf/${item} + template: src=roles/common/templates/hadoop_ha_conf/{{ item }}.j2 dest=/etc/hadoop/conf/{{ item }} with_items: - core-site.xml - hadoop-metrics.properties @@ -19,20 +19,4 @@ - slaves - ssl-client.xml.example - ssl-server.xml.example - when_unset: $ha_disabled - notify: restart qjournal services - -- name: Copy the non ha hadoop configuration files - template: src=roles/common/templates/hadoop_conf/${item}.j2 dest=/etc/hadoop/conf/${item} - with_items: - - core-site.xml - - hadoop-metrics.properties - - hadoop-metrics2.properties - - hdfs-site.xml - - log4j.properties - - mapred-site.xml - - slaves - - ssl-client.xml.example - - ssl-server.xml.example - when_set: $ha_disabled notify: restart qjournal services diff --git a/hadoop/roles/zookeeper_servers/templates/zoo.cfg.j2 b/hadoop/roles/zookeeper_servers/templates/zoo.cfg.j2 index 540f894..a5e3f9f 100644 --- a/hadoop/roles/zookeeper_servers/templates/zoo.cfg.j2 +++ b/hadoop/roles/zookeeper_servers/templates/zoo.cfg.j2 @@ -1,9 +1,9 @@ tickTime=2000 dataDir=/var/lib/zookeeper/ -clientPort={{ hadoop['zookeeper.clientport'] }} +clientPort={{ hadoop['zookeeper_clientport'] }} initLimit=5 syncLimit=2 {% for host in groups['zookeeper_servers'] %} -server.{{ hostvars[host].zoo_id }}={{ host }}:{{ hadoop['zookeeper.leader_port'] }}:{{ hadoop['zookeeper.election_port'] }} +server.{{ hostvars[host].zoo_id }}={{ host }}:{{ hadoop['zookeeper_leader_port'] }}:{{ hadoop['zookeeper_election_port'] }} {% endfor %} diff --git a/hadoop/roles/zookeeper_servers/vars/main.yml b/hadoop/roles/zookeeper_servers/vars/main.yml deleted file mode 100644 index 32486ad..0000000 --- a/hadoop/roles/zookeeper_servers/vars/main.yml +++ /dev/null @@ -1,6 +0,0 @@ ---- -# Vars for Zookeeper - -clientport: 2181 -leader_port: 2888 -election_port: 3888 diff --git a/hadoop/site.yml b/hadoop/site.yml index 3f3f532..59a075f 100644 --- a/hadoop/site.yml +++ b/hadoop/site.yml @@ -3,39 +3,27 @@ - hosts: all - vars_files: - - hadoop_vars/hadoop roles: - common - hosts: zookeeper_servers - vars_files: - - hadoop_vars/hadoop roles: - - zookeeper_servers + - { role: zookeeper_servers, when: ha_enabled } - hosts: qjournal_servers - vars_files: - - hadoop_vars/hadoop roles: - - qjournal_servers + - { role: qjournal_servers, when: ha_enabled } - hosts: hadoop_master_primary - vars_files: - - hadoop_vars/hadoop roles: - - hadoop_primary + - { role: hadoop_primary } - hosts: hadoop_master_secondary - vars_files: - - hadoop_vars/hadoop roles: - - hadoop_secondary + - { role: hadoop_secondary, when: ha_enabled } - hosts: hadoop_slaves - vars_files: - - hadoop_vars/hadoop roles: - - hadoop_slaves + - { role: hadoop_slaves }