HA LVM cluster build - Seamus Murray's Notes about Linux and System Engineering

RHEL 5.7 HA-LVM cluster setup guide

version 201600508 created by Seamus Murray

After manually building the server via multiple RDP sessions and KVM consoles you sometimes have typos or duplicate key strokes in config files. Before going further, you should ensure that all the hostname's IP address gateways etc are correct..

ensure the host name is correct and a FQDN in /etc/sysconfig/network
ensure hostname is not in any of the ifcfg-ethx files
ensure hostname is not listed against loopback address in /etc/hosts #RedHat installer put this in by default
restart network services, run hostname command manually or reboot if necessary

Ensure NTP is working correctly,

You may need to manually sync the time

service ntp stop
ntpdate 192.168.100.1 
ntpdate 192.168.100.2 
service ntp start

Important , Due to the application vendor support requirements, the cluster servers need to remain on RHEL 5.7. ie, they cannot be updated to RHEL 5.8 or 5.9 To ensure the hosts stay on RHEL 5.7 there has been a dedicated software channel created for this specific project. The hosts have to be registered against this specific channel.... if they subscribe to the main RHEL 5.x channel they will be updated to the latest package versions which will make then unsupportable from the application vendors perspective.

rhel-x86_64-server-5_7
Clone Red Hat Network Tools for RHEL Server (v.5.7 64-bit x86_64)
rhel-x86_64-server-cluster-5_7

Download and install the certificates for the signed RPM packages

wget --no-check-certificate https://satellite1.local/pub/rhn-org-trusted-ssl-cert-1.0-1.noarch.rpm
wget --no-check-certificate https://satellite1.local/pub/rpm-gpg-key-1.0-7.noarch.rpm
rpm -Uvh rhn-org-trusted-ssl-cert-1.0-1.noarch.rpm rpm-gpg-key-corp-1.0-7.noarch.rpm
sed -i 's/^\(sslCACert=\).*/\1\/usr\/share\/rhn\/RHN-ORG-TRUSTED-SSL-CERT/' /etc/sysconfig/rhn/up2date

Ensure the that the RedHat certificate is installed # not installed by default

rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-redhat-release

if not you may get errors like...

Public key for mkinitrd-5.1.19.6-79.el5.i386.rpm is not installed

Register the server with the RedHat satellite server using one of the following keys

rhnreg_ks --activationkey 1-bc5fb717c87abcdefghijklm --serverUrl https://satellite1.local/XMLRPC
rhnreg_ks --activationkey 1-5bbf25c4abcdefghijklm123 --serverUrl https://satellite1.local/XMLRPC

check if the repo access is working

yum clean all
yum list

Install the addons...

yum install java-1.6.0-openjdk
yum install samba3x-winbind  iptables lsof krb5-workstation pam_passwdqc xauth
yum install ipv6-disable

Fixing the inconsistent path ordering when using friendly_names and a separate var partition on RHEL 5

cause

RHEL 5 multipath bindings file is located in var
/var isn't mounted before multipath is configured
redhat does not include /var/lib/ in the initial ram disk which breaks multipath on boot

solution (work around)

Edit the /etc/multipath.conf

add the following lines to multipath config to set netapp specific settings

defaults {
    user_friendly_names yes
    bindings_file /etc/multipath/bindings ##changed 
    flush_on_last_del       yes
    max_fds max
    pg_prio_calc    avg
    queue_without_daemon    no
}


devices {

    device {
            vendor                  "NETAPP"
            product                 "LUN"
            path_checker            tur
            path_selector           "round-robin 0"
            getuid_callout          "/sbin/scsi_id -g -u -s /block/%n"
#           prio_callout            "/sbin/mpath_prio_ontap /dev/%n"
            prio_callout            "/sbin/mpath_prio_alua /dev/%n"
#           features                "1 queue_if_no_path"
            features                "3 queue_if_no_path pg_init_retries 50"
#           hardware_handler        "0"
            hardware_handler        "1 alua"
            path_grouping_policy    group_by_prio
            failback                immediate
            rr_weight               uniform
            rr_min_io               128
    }

}




mkdir /etc/multipath
cp /var/lib/multipath/bindings /etc/multipath/bindings
cd /boot
cp initrd*img initrd*img.multipath-bak
mkinitrd -f initrd-`uname -r`.img `uname -r`
ls -ltr  #make sure the file is correct size there seems to be a bug where it intermittently creates a initrd of half the size

edit the multipath bindings and ensure the shared luns have the same friendly names across all nodes recreate initial ram disk image reboot all nodes and ensure all nodes see the luns at same friendly name

cat /etc/multipath/bindings
# Multipath bindings, Version : 1.0
# NOTE: this file is automatically maintained by the multipath program.
# You should not need to edit this file in normal circumstances.
#
# Format:
# alias wwid
#
mpsys 360a9800037542d73442443123456755
mpath1 360a9800037542d73442443123456757
mpath2 360a9800037542d73442443123456759
mpath3 360a9800037542d7344244312345672f
mpath4 360a9800037542d73442443123456762
mpath5 360a9800037542d73442443123456764
mpath6 360a9800037542d73442443123456766

Final multipath.conf file should look somthing like

defaults { user_friendly_names yes bindings_file /etc/multipath/bindings flush_on_last_del yes max_fds max pg_prio_calc avg queue_without_daemon no }

blacklist {
        devnode "^(ram|raw|loop|fd|md|dm-|sr|scd|st)[0-9]*"
        devnode "^(hd|xvd|vd)[a-z]*"
#        wwid "*"
}

# Make sure our multipath devices are enabled.

blacklist_exceptions {
        wwid "360a9800037542d73442443123456755"
}

multipaths {
        multipath {
        wwid  360a9800037542d73442443123456755
        alias mpsys
        }
}


devices {

       device {
                vendor                  "NETAPP"
                product                 "LUN"
                path_checker            tur
                path_selector           "round-robin 0"
                getuid_callout          "/sbin/scsi_id -g -u -s /block/%n"
#               prio_callout            "/sbin/mpath_prio_ontap /dev/%n"
                prio_callout            "/sbin/mpath_prio_alua /dev/%n"
#               features                "1 queue_if_no_path"
                features                "3 queue_if_no_path pg_init_retries 50"
#               hardware_handler        "0"
                hardware_handler        "1 alua"
                path_grouping_policy    group_by_prio
                failback                immediate
                rr_weight               uniform
                rr_min_io               128
       }

}

Change fstab

#/dev/mapper/mpath0p1    /boot                   ext3    defaults        1 2
/dev/mapper/mpsysp1    /boot                   ext3    defaults        1 2

Cluster preconfiguration...

Ensure NTP is working correctly,

you may need to manually sync the time service ntp stop ntpdate 192.168.0.1 ntpdate 192.168.0.2 service ntp start

Install cluster software and fencing tools

yum install OpenIPMI-tools.x86_64 cman rgmanager lucci ricci

Disable the cluster from starting up... until you have finished the config

chkconfig --levels 2345 cman off
chkconfig --levels 2345 rgmanager off
chkconfig --levels 2345 ricci off
chkconfig --levels 2345 luci off

Edit firewall rules to allow cluster traffic

iptables -I INPUT --protocol tcp --dport 22 -j ACCEPT
# Cluster
iptables -I INPUT -p udp -m state --state NEW -m multiport --dports 5404,5405 -j ACCEPT
iptables -I INPUT -p tcp -m state --state NEW -m multiport --dports 11111 -j ACCEPT
iptables -I INPUT -p udp -m state --state NEW -m multiport --dports 50007 -j ACCEPT
iptables -I INPUT -p tcp -m state --state NEW -m multiport --dports 50008 -j ACCEPT

turn off firewall for private nic iptables -I INPUT -i eth1 -j ACCEPT

save the iptables so that they will be persistant

Configure the private/heartbeat nic

*add multicast route for private/heartbeat

/etc/sysconfig/network-scripts/route-eth1
239.0.0.0/4 dev eth1

Define which multicast address to use for the cluster

239.192.0.1 for App 1
239.192.0.2 for App 2
239.192.0.3 for App 3

Add the private hostnames/domains to /etc/hosts

# cluster nodes
192.168.0.1 hostname1.local  hostname1
10.10.10.1  hostname1.private  hostname1-priv
192.168.0.2 hostname2.local  hostname2
10.10.10.2  hostname2.private  hostname2-priv
192.168.0.3 hostname-vip.local  hostname-vip

if you want to use luci warning due to the way the RedHat Satellite server is configured you cannot perform rpm or yum group installs you have to specify the packages individually, because of this you can not use ricci/luci to create a cluster config You can use ricci/luci to import an existing cluster Therefore if you really need the GUI create a simple cluster with no resources or services first then import it into lucci

Starting ricci/luci

if you have not previously configured a luci account run " /usr/sbin/luci_admin init" and set a password start up ricci on both servers and luci on one server setup putty forwarding from the rdp jump host local port 80 to 127.0.0.1:8084 on the host you will run luci

start browser window https://127.0.0.1:80

Simple cluster.conf file to get you started

<?xml version="1.0"?>
<cluster alias="cluster1" config_version="1" name="cluster1">
        <fence_daemon clean_start="0" post_fail_delay="0" post_join_delay="3"/>
        <clusternodes>
                <clusternode name="hostname1.private" nodeid="1" votes="1">
                        <fence>
                                <method name="1">
                                        <device name="ipmi-hostname1"/>
                                </method>
                        </fence>
                </clusternode>
                <clusternode name="hostname2.private" nodeid="2" votes="1">
                        <fence>
                                <method name="1">
                                        <device name="ipmi-hostname2"/>
                                </method>
                        </fence>
                </clusternode>
        </clusternodes>
        <cman expected_votes="1" two_node="1">
                <multicast addr="239.192.0.1"/>
        </cman>
        <fencedevices>
                <fencedevice agent="fence_ipmilan" auth="password" ipaddr="192.168.1.1" login="test-user" name="ipmi-hostname1" passwd="test-password" delay="30"/>
                <fencedevice agent="fence_ipmilan" auth="password" ipaddr="192.168.1.2" login="test-user" name="ipmi-hostname2" passwd="test-password"/>=
        </fencedevices>
        <rm>
                <failoverdomains>
                        <failoverdomain name="FailDomain" ordered="1" restricted="1">
                                <failoverdomainnode name="hostname1.private" priority="1"/>
                                <failoverdomainnode name="hostname2.private" priority="2"/>
                        </failoverdomain>
                </failoverdomains>
                <resources>
                </resources>
                <service autostart="1" domain="FailDomain" exclusive="0" name="servicel">
                </service>
        </rm>
</cluster>

scp cluster.conf hostname1:/etc/cluster/cluster.conf

ccs_tool update cluster.conf

Configure fencing

Setup ipmi profile on the hardware
Test to see if fencing works

echo -e "ipaddr=192.168.1.1 \nlogin= \npasswd= \naction=status" | fence_ipmilan
Add the fencing details to the cluster.conf file

service rgmanager start service cman start clustat cman_tool status tail -f /var/log/messages
manual fence override fence_ack_manual -e -n hostname1.private
manaul service relocation clusvcadm -r ServiceName
ifconfig will not display a VIP you have to run

ip address show

you may want to disable the acpi daemon otherwise your server may not switch off fast enough

chkconfig --level 234 5 acpid off
chkconfig --levels 2345 cman off
chkconfig --levels 2345 rgmanager off
chkconfig --levels 2345 ricci off
chkconfig --levels 2345 luci off

setup HA-LVM

...refer to section F.2. Configuring HA-LVM Failover with Tagging from RHEL 5 Cluster admin guide

edit /etc/lvm/lvm.conf

Ensure that the locking_type parameter in the global section of the /etc/lvm/lvm.conf file is set to the value '1'.
Edit the volume_list field in the /etc/lvm/lvm.conf file. Include the name of your root volume group and your hostname as listed in the /etc/cluster/cluster.conf file preceded by @. The hostname to include here is the machine on which you are editing the lvm.conf file, not any remote hostname. Note that this string MUST match the node name given in the cluster.conf file. Below is a sample entry from the /etc/lvm/lvm.conf file:

volume_list = [ "vgsys", "@hostname1.private" ]

Create the PV VG LV and filesystems from one of the nodes

pvcreate /dev/mapper/mpath1
pvcreate /dev/mapper/mpath2
pvcreate /dev/mapper/mpath3
pvcreate /dev/mapper/mpath4
pvcreate /dev/mapper/mpath5
pvcreate /dev/mapper/mpath6


vgcreate vg_shared_pc /dev/mapper/mpath1
vgcreate vg_shared_db /dev/mapper/mpath2
vgcreate vg_shared_arch /dev/mapper/mpath3
vgcreate vg_shared_logs /dev/mapper/mpath4
vgcreate vg_shared_data /dev/mapper/mpath5
vgcreate vg_shared_backup /dev/mapper/mpath6


lvcreate -l 100%FREE -n lv_shared_pc vg_shared_pc
lvcreate -l 100%FREE -n lv_shared_db vg_shared_db
lvcreate -l 100%FREE -n lv_shared_arch vg_shared_arch
lvcreate -l 100%FREE -n lv_shared_logs vg_shared_logs
lvcreate -l 100%FREE -n lv_shared_data vg_shared_data
lvcreate -l 100%FREE -n lv_shared_backup vg_shared_backup


mkfs.ext3 /dev/vg_shared_pc/lv_shared_pc
mkfs.ext3 /dev/vg_shared_db/lv_shared_db
mkfs.ext3 /dev/vg_shared_arch/lv_shared_arch
mkfs.ext3 /dev/vg_shared_logs/lv_shared_logs
mkfs.ext3 /dev/vg_shared_data/lv_shared_data
mkfs.ext3 /dev/vg_shared_backup/lv_shared_backup


mkdir /opt/pc
mkdir /opt/pc_db
mkdir /opt/pc_arch
mkdir /opt/pc_logs
mkdir /opt/pc_data
mkdir /opt/pc_backup