66
77set -u
88
9- ERROR_TAR_FAILED=5
10- ERROR_PROCFS_SAVE_FAILED=6
11- ERROR_INVALID_ARGUMENT=10
9+ EXT_SUCCESS=0
10+ EXT_GENERAL=1
11+ EXT_LOCKFAIL=2
12+ EXT_RECVSIG=3
13+ EXT_RETRY=4
14+ EXT_TAR_FAILED=5
15+ EXT_PROCFS_SAVE_FAILED=6
16+ EXT_INVALID_ARGUMENT=10
1217
1318TAR=tar
1419MKDIR=mkdir
@@ -39,21 +44,38 @@ USER=${USER:-root}
3944TIMEOUT_MIN=" 5"
4045SKIP_BCMCMD=0
4146SAVE_STDERR=true
42- RETURN_CODE=0
47+ RETURN_CODE=$EXT_SUCCESS
4348DEBUG_DUMP=false
4449
50+ # lock dirs/files
51+ LOCKDIR=" /tmp/techsupport-lock"
52+ PIDFILE=" ${LOCKDIR} /PID"
53+
54+ # Remove lock directory and exit, let user decide if they want to retry
55+ rm_lock_and_exit ()
56+ {
57+ $RM $V -rf ${LOCKDIR}
58+ exit $EXT_RETRY
59+ }
60+
61+ handle_exit ()
62+ {
63+ ECODE=$?
64+ echo " Removing lock. Exit: $ECODE " >&2
65+ $RM $V -rf ${LOCKDIR}
66+ }
67+
4568handle_signal ()
4669{
4770 echo " Generate Dump received interrupt" >&2
4871 $RM $V -rf $TARDIR
49- exit 1
72+ exit $EXT_RECVSIG
5073}
51- trap ' handle_signal' SIGINT
5274
5375handle_error () {
5476 if [ " $1 " != " 0" ]; then
5577 echo " ERR: RC:-$1 observed on line $2 " >&2
56- RETURN_CODE=1
78+ RETURN_CODE=$EXT_GENERAL
5779 fi
5880}
5981
@@ -103,7 +125,7 @@ save_bcmcmd() {
103125 filepath=" ${filepath} .gz"
104126 fi
105127 ($TAR $V -rhf $TARFILE -C $DUMPDIR " $tarpath " \
106- || abort " ${ERROR_TAR_FAILED } " " tar append operation failed. Aborting to prevent data loss." ) \
128+ || abort " ${EXT_TAR_FAILED } " " tar append operation failed. Aborting to prevent data loss." ) \
107129 && $RM $V -rf " $filepath "
108130 end_t=$( date +%s%3N)
109131 echo " [ save_bcmcmd:$cmd ] : $(( $end_t - $start_t )) msec" >> $TECHSUPPORT_TIME_INFO
@@ -213,7 +235,7 @@ save_cmd() {
213235 fi
214236
215237 ($TAR $V -rhf $TARFILE -C $DUMPDIR " $tarpath " \
216- || abort " ${ERROR_TAR_FAILED } " " tar append operation failed. Aborting to prevent data loss." ) \
238+ || abort " ${EXT_TAR_FAILED } " " tar append operation failed. Aborting to prevent data loss." ) \
217239 && $RM $V -rf " $filepath "
218240 end_t=$( date +%s%3N)
219241 echo " [ save_cmd:$cmd ] : $(( $end_t - $start_t )) msec" >> $TECHSUPPORT_TIME_INFO
@@ -766,7 +788,7 @@ save_file() {
766788
767789 if $do_tar_append ; then
768790 ($TAR $V -rhf $TARFILE -C $DUMPDIR " $tar_path " \
769- || abort " ${ERROR_PROCFS_SAVE_FAILED } " " tar append operation failed. Aborting to prevent data loss." ) \
791+ || abort " ${EXT_PROCFS_SAVE_FAILED } " " tar append operation failed. Aborting to prevent data loss." ) \
770792 && $RM $V -f " $gz_path "
771793 fi
772794 end_t=$( date +%s%3N)
@@ -975,7 +997,7 @@ save_log_files() {
975997
976998 # Append the log folder to the main tarball
977999 ($TAR $V -rhf $TARFILE -C $DUMPDIR ${BASE} /log \
978- || abort " ${ERROR_TAR_FAILED } " " tar append operation failed. Aborting for safety" ) \
1000+ || abort " ${EXT_TAR_FAILED } " " tar append operation failed. Aborting for safety" ) \
9791001 && $RM $V -rf $TARDIR /log
9801002 end_t=$( date +%s%3N)
9811003 echo " [ TAR /var/log Files ] : $(( $end_t - $start_t )) msec" >> $TECHSUPPORT_TIME_INFO
@@ -1004,7 +1026,7 @@ save_warmboot_files() {
10041026
10051027 ($TAR $V --warning=no-file-removed -rhf $TARFILE -C $DUMPDIR --mode=+rw \
10061028 $BASE /warmboot \
1007- || abort " ${ERROR_TAR_FAILED } " " Tar append operation failed. Aborting for safety." ) \
1029+ || abort " ${EXT_TAR_FAILED } " " Tar append operation failed. Aborting for safety." ) \
10081030 && $RM $V -rf $TARDIR
10091031 fi
10101032 end_t=$( date +%s%3N)
@@ -1144,11 +1166,6 @@ main() {
11441166 trap ' handle_error $? $LINENO' ERR
11451167 local start_t=0
11461168 local end_t=0
1147- if [ ` whoami` != root ] && ! $NOOP ;
1148- then
1149- echo " $0 : must be run as root (or in sudo)" >&2
1150- exit 10
1151- fi
11521169 NUM_ASICS=$( get_asic_count)
11531170 ${CMD_PREFIX} renice +5 -p $$ >> /dev/null
11541171 ${CMD_PREFIX} ionice -c 2 -n 5 -p $$ >> /dev/null
@@ -1174,7 +1191,7 @@ main() {
11741191 /proc/softirqs /proc/stat /proc/swaps /proc/sysvipc /proc/timer_list \
11751192 /proc/uptime /proc/version /proc/vmallocinfo /proc/vmstat \
11761193 /proc/zoneinfo \
1177- || abort " ${ERROR_PROCFS_SAVE_FAILED } " " Proc saving operation failed. Aborting for safety."
1194+ || abort " ${EXT_PROCFS_SAVE_FAILED } " " Proc saving operation failed. Aborting for safety."
11781195 end_t=$( date +%s%3N)
11791196 echo " [ Capture Proc State ] : $(( $end_t - $start_t )) msec" >> $TECHSUPPORT_TIME_INFO
11801197
@@ -1307,7 +1324,7 @@ main() {
13071324 --exclude=" */etc/ssl/certs/*" \
13081325 --exclude=" */etc/ssl/private/*" \
13091326 $BASE /etc \
1310- || abort " ${ERROR_TAR_FAILED } " " Tar append operation failed. Aborting for safety." ) \
1327+ || abort " ${EXT_TAR_FAILED } " " Tar append operation failed. Aborting for safety." ) \
13111328 && $RM $V -rf $TARDIR
13121329 end_t=$( date +%s%3N)
13131330 echo " [ TAR /etc Files ] : $(( $end_t - $start_t )) msec" >> $TECHSUPPORT_TIME_INFO
@@ -1473,7 +1490,7 @@ while getopts ":xnvhzas:t:r:d" opt; do
14731490 ;;
14741491 h)
14751492 usage
1476- exit 0
1493+ exit $EXT_SUCCESS
14771494 ;;
14781495 v)
14791496 # echo commands about to be run to stderr
@@ -1501,7 +1518,7 @@ while getopts ":xnvhzas:t:r:d" opt; do
15011518 s)
15021519 SINCE_DATE=" ${OPTARG} "
15031520 # validate date expression
1504- date --date=" ${SINCE_DATE} " & > /dev/null || abort " ${ERROR_INVALID_ARGUMENT } " " Invalid date expression passed: '${SINCE_DATE} '"
1521+ date --date=" ${SINCE_DATE} " & > /dev/null || abort " ${EXT_INVALID_ARGUMENT } " " Invalid date expression passed: '${SINCE_DATE} '"
15051522 ;;
15061523 t)
15071524 TIMEOUT_MIN=" ${OPTARG} "
@@ -1514,9 +1531,47 @@ while getopts ":xnvhzas:t:r:d" opt; do
15141531 ;;
15151532 /? )
15161533 echo " Invalid option: -$OPTARG " >&2
1517- exit 1
1534+ exit $EXT_GENERAL
15181535 ;;
15191536 esac
15201537done
15211538
1522- main
1539+ # Check permissions before proceeding further
1540+ if [ ` whoami` != root ] && ! $NOOP ;
1541+ then
1542+ echo " $0 : must be run as root (or in sudo)" >&2
1543+ exit $EXT_INVALID_ARGUMENT
1544+ fi
1545+
1546+ # #
1547+ # # Attempt Locking
1548+ # #
1549+
1550+ if mkdir " ${LOCKDIR} " & > /dev/null; then
1551+ trap ' handle_exit' EXIT
1552+ echo " $$ " > " ${PIDFILE} "
1553+ # This handler will exit the script upon receiving these interrupts
1554+ # Trap configured on EXIT will be triggered by the exit from handle_signal function
1555+ trap ' handle_signal' SIGINT SIGHUP SIGQUIT SIGTERM
1556+ echo " Lock succesfully accquired and installed signal handlers"
1557+ # Proceed with the actual code
1558+ main
1559+ else
1560+ # lock failed, check if the other PID is alive
1561+ PID_PROG=" $( cat " ${PIDFILE} " ) "
1562+
1563+ if [ $? != 0 ]; then
1564+ # Another instance is probably about to remove the lock or PIDfile doesn't exist
1565+ rm_lock_and_exit
1566+ fi
1567+
1568+ if ! kill -0 $PID_PROG & > /dev/null; then
1569+ # Lock is stale
1570+ echo " Removing stale lock of nonexistant PID ${PID_PROG} "
1571+ rm_lock_and_exit
1572+ else
1573+ # Lock is valid and the other instance is active. Exit Now
1574+ echo " Accquiring lock failed, PID ${PID_PROG} is active" >&2
1575+ exit $EXT_LOCKFAIL
1576+ fi
1577+ fi
0 commit comments