From 7a7f801aad7b13b81833b4417b0ff5ee17694ffc Mon Sep 17 00:00:00 2001 From: Jonathan Brassow <jbrassow@redhat.com> Date: Thu, 29 Apr 2010 14:07:04 -0500 Subject: [PATCH] HA LVM: Use CLVM with local machine kernel targets (bz 585229) When a logical volume is activated in a cluster exclusively, the kernel targets used are single machine targets. This means we can use CLVM to protect the LVM metadata and still better align ourselves with active/passive application stacks. Making this change also simplifies HA LVM setup. There is no more setting up tags, volume_list entries, or updating the initrd. Updating HA-LVM in this way also addresses the following bugs: - 509368 - 583769 - 572311 --- rgmanager/src/resources/lvm.sh | 52 ++----------- rgmanager/src/resources/lvm_by_lv.sh | 75 +++++++++++++++++- rgmanager/src/resources/lvm_by_vg.sh | 147 +++++++++++++++++++++++++++++++++- 3 files changed, 221 insertions(+), 53 deletions(-) diff --git a/rgmanager/src/resources/lvm.sh b/rgmanager/src/resources/lvm.sh index ffb0b21..a972aaf 100755 --- a/rgmanager/src/resources/lvm.sh +++ b/rgmanager/src/resources/lvm.sh @@ -36,19 +36,6 @@ export LC_ALL LANG PATH rv=0 ################################################################################ -# clvm_check -# -################################################################################ -function clvm_check -{ - if [[ $(vgs -o attr --noheadings $1) =~ .....c ]]; then - return 1 - fi - - return 0 -} - -################################################################################ # ha_lvm_proper_setup_check # ################################################################################ @@ -106,18 +93,10 @@ function ha_lvm_proper_setup_check case $1 in start) - ## - # We can safely ignore clustered volume groups (VGs handled by CLVM) - ## - if ! clvm_check $OCF_RESKEY_vg_name; then - ocf_log notice "$OCF_RESKEY_vg_name is a cluster volume. Ignoring..." - exit 0 + if ! [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then + ha_lvm_proper_setup_check || exit 1 fi - ha_lvm_proper_setup_check || exit 1 - - rv=0 - if [ -z $OCF_RESKEY_lv_name ]; then vg_start || exit 1 else @@ -133,20 +112,13 @@ status|monitor) else lv_status || exit 1 fi - rv=0 ;; stop) - ## - # We can safely ignore clustered volume groups (VGs handled by CLVM) - ## - if ! clvm_check $OCF_RESKEY_vg_name; then - ocf_log notice "$OCF_RESKEY_vg_name is a cluster volume. Ignoring..." - exit 0 - fi - - if ! ha_lvm_proper_setup_check; then - ocf_log err "WARNING: An improper setup can cause data corruption!" + if ! [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then + if ! ha_lvm_proper_setup_check; then + ocf_log err "WARNING: An improper setup can cause data corruption!" + fi fi if [ -z $OCF_RESKEY_lv_name ]; then @@ -154,35 +126,23 @@ stop) else lv_stop || exit 1 fi - rv=0 ;; recover|restart) $0 stop || exit $OCF_ERR_GENERIC $0 start || exit $OCF_ERR_GENERIC - rv=0 ;; meta-data) cat `echo $0 | sed 's/^\(.*\)\.sh$/\1.metadata/'` - rv=0 ;; validate-all|verify-all) - ## - # We can safely ignore clustered volume groups (VGs handled by CLVM) - ## - if ! clvm_check $OCF_RESKEY_vg_name; then - ocf_log notice "$OCF_RESKEY_vg_name is a cluster volume. Ignoring..." - exit 0 - fi - if [ -z $OCF_RESKEY_lv_name ]; then vg_verify || exit 1 else lv_verify || exit 1 fi - rv=0 ;; *) echo "usage: $0 {start|status|monitor|stop|restart|meta-data|verify-all}" diff --git a/rgmanager/src/resources/lvm_by_lv.sh b/rgmanager/src/resources/lvm_by_lv.sh index 1937b9f..da15a15 100644 --- a/rgmanager/src/resources/lvm_by_lv.sh +++ b/rgmanager/src/resources/lvm_by_lv.sh @@ -101,10 +101,22 @@ lv_activate_resilient() fi } +lv_status_clustered() +{ + # + # Check if device is active + # + if [[ ! $(lvs -o attr --noheadings $lv_path) =~ ....a. ]]; then + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + # lv_status # # Is the LV active? -lv_status() +lv_status_single() { declare lv_path="$OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" declare dev="/dev/$lv_path" @@ -167,6 +179,16 @@ lv_status() return $OCF_SUCCESS } +function lv_status +{ + # We pass in the VG name to see of the logical volume is clustered + if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then + lv_status_clustered + else + lv_status_single + fi +} + # lv_activate_and_tag lv_activate_and_tag() { @@ -334,7 +356,29 @@ lv_activate() return $OCF_SUCCESS } -function lv_start +function lv_start_clustered +{ + if ! lvchange -aey $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name; then + ocf_log err "Failed to activate logical volume, $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" + ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" + + if ! lvconvert --repair --use-policies $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name; then + ocf_log err "Failed to cleanup $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" + return $OCF_ERR_GENERIC + fi + + if ! lvchange -aey $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name; then + ocf_log err "Failed second attempt to activate $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name" + return $OCF_ERR_GENERIC + fi + + ocf_log notice "Second attempt to activate $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name successful" + return $OCF_SUCCESS + fi + return $OCF_SUCCESS +} + +function lv_start_single { if ! lvs $OCF_RESKEY_vg_name >& /dev/null; then lv_count=0 @@ -355,7 +399,22 @@ function lv_start return 0 } -function lv_stop +function lv_start +{ + # We pass in the VG name to see of the logical volume is clustered + if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then + lv_start_clustered + else + lv_start_single + fi +} + +function lv_stop_clustered +{ + lvchange -aln $OCF_RESKEY_vg_name/$OCF_RESKEY_lv_name +} + +function lv_stop_single { if ! lv_activate stop; then return 1 @@ -363,3 +422,13 @@ function lv_stop return 0 } + +function lv_stop +{ + # We pass in the VG name to see of the logical volume is clustered + if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then + lv_stop_clustered + else + lv_stop_single + fi +} diff --git a/rgmanager/src/resources/lvm_by_vg.sh b/rgmanager/src/resources/lvm_by_vg.sh index 014dcbf..08d8c98 100755 --- a/rgmanager/src/resources/lvm_by_vg.sh +++ b/rgmanager/src/resources/lvm_by_vg.sh @@ -85,14 +85,18 @@ function strip_and_add_tag return $OCF_SUCCESS } +function vg_status_clustered +{ + return $OCF_SUCCESS +} + # vg_status # # Are all the LVs active? -function vg_status +function vg_status_single { local i local dev - local readdev local my_name=$(local_node_name) # @@ -144,13 +148,88 @@ function vg_status return $OCF_SUCCESS } +## +# Main status function for volume groups +## +function vg_status +{ + if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then + vg_status_clustered + else + vg_status_single + fi +} + function vg_verify { # Anything to verify? return $OCF_SUCCESS } -function vg_start +function vg_start_clustered +{ + local a + local results + local all_pvs + local resilience + + ocf_log info "Starting volume group, $OCF_RESKEY_vg_name" + + if ! vgchange -aey $OCF_RESKEY_vg_name; then + ocf_log err "Failed to activate volume group, $OCF_RESKEY_vg_name" + ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name" + + if ! vgreduce --removemissing $OCF_RESKEY_vg_name; then + ocf_log err "Failed to make $OCF_RESKEY_vg_name consistent" + return $OCF_ERR_GENERIC + fi + + if ! vgchange -aey $OCF_RESKEY_vg_name; then + ocf_log err "Failed second attempt to activate $OCF_RESKEY_vg_name" + return $OCF_ERR_GENERIC + fi + + ocf_log notice "Second attempt to activate $OCF_RESKEY_vg_name successful" + return $OCF_SUCCESS + else + # The activation commands succeeded, but did they do anything? + # Make sure all the logical volumes are active + results=(`lvs -o name,attr --noheadings 2> /dev/null $OCF_RESKEY_vg_name`) + a=0 + while [ ! -z ${results[$a]} ]; do + if [[ ! ${results[$(($a + 1))]} =~ ....a. ]]; then + all_pvs=(`pvs --noheadings -o name 2> /dev/null`) + resilience=" --config devices{filter=[" + for i in ${all_pvs[*]}; do + resilience=$resilience'"a|'$i'|",' + done + resilience=$resilience"\"r|.*|\"]}" + + vgchange -aey $OCF_RESKEY_vg_name $resilience + break + fi + a=$(($a + 2)) + done + + # We need to check the LVs again if we made the command resilient + if [ ! -z $resilience ]; then + results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name $resilience 2> /dev/null`) + a=0 + while [ ! -z ${results[$a]} ]; do + if [[ ! ${results[$(($a + 1))]} =~ ....a. ]]; then + ocf_log err "Failed to activate $OCF_RESKEY_vg_name" + return $OCF_ERR_GENERIC + fi + a=$(($a + 2)) + done + ocf_log err "Orphan storage device in $OCF_RESKEY_vg_name slowing operations" + fi + fi + + return $OCF_SUCCESS +} + +function vg_start_single { local a local results @@ -238,7 +317,55 @@ function vg_start return $OCF_SUCCESS } -function vg_stop +## +# Main start function for volume groups +## +function vg_start +{ + if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then + vg_start_clustered + else + vg_start_single + fi +} + +function vg_stop_clustered +{ + local a + local results + typeset self_fence="" + + case ${OCF_RESKEY_self_fence} in + "yes") self_fence=1 ;; + 1) self_fence=1 ;; + *) self_fence="" ;; + esac + + # Shut down the volume group + # Do we need to make this resilient? + vgchange -aln $OCF_RESKEY_vg_name + + # Make sure all the logical volumes are inactive + results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name 2> /dev/null`) + a=0 + while [ ! -z ${results[$a]} ]; do + if [[ ${results[$(($a + 1))]} =~ ....a. ]]; then + if [ "$self_fence" ]; then + ocf_log err "Unable to deactivate $lv_path REBOOT" + sync + reboot -fn + else + ocf_log err "Logical volume $OCF_RESKEY_vg_name/${results[$a]} failed to shutdown" + fi + return $OCF_ERR_GENERIC + fi + a=$(($a + 2)) + done + + return $OCF_SUCCESS +} + +function vg_stop_single { local a local results @@ -279,3 +406,15 @@ function vg_stop return $OCF_SUCCESS } + +## +# Main stop function for volume groups +## +function vg_stop +{ + if [[ $(vgs -o attr --noheadings $OCF_RESKEY_vg_name) =~ .....c ]]; then + vg_stop_clustered + else + vg_stop_single + fi +} -- 1.6.2.5