Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 21 additions & 9 deletions script_tools/agent_tools/agent2/setup_agent.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ get_cpu_arch () {
fi
}

get_cpu_arch "uname -p" || get_cpu_arch "uname -m" || arch || fail get_cpu_arch "Failed to get CPU arch, please contact the developer."
get_cpu_arch "uname -p" || get_cpu_arch "uname -m" || get_cpu_arch "arch" || fail get_cpu_arch "Failed to get CPU arch, please contact the developer."


get_os_info () {
Expand Down Expand Up @@ -81,6 +81,9 @@ get_os_type () {
elif [[ "${OS_INFO,,}" =~ "hat" ]]; then
OS_TYPE="redhat"
RC_LOCAL_FILE="/etc/rc.d/rc.local"
else
OS_TYPE="other"
RC_LOCAL_FILE="/etc/rc.d/rc.local"
fi
}

Expand Down Expand Up @@ -313,19 +316,17 @@ check_heathz_by_gse () {
}

remove_crontab () {
if [ $IS_SUPER == false ]; then
return
fi

local tmpcron
tmpcron=$(mktemp "$TMP_DIR"/cron.XXXXXXX)

crontab -l | grep -v "bin/gsectl" >"$tmpcron"
crontab "$tmpcron" && rm -f "$tmpcron"

# 下面这段代码是为了确保修改的crontab能立即生效
if pgrep -x crond &>/dev/null; then
pkill -HUP -x crond
# 下面这段代码是为了确保修改的crontab立即生效
if [ $IS_SUPER == true ]; then
if pgrep -x crond &>/dev/null; then
pkill -HUP -x crond
fi
fi
}

Expand Down Expand Up @@ -470,6 +471,16 @@ stop_agent () {
done
}

remove_directory () {
for dir in "$@"; do
if [ -d "$dir" ]; then
log remove_directory - "trying to remove directory [${dir}]"
rm -rf "$dir"
log remove_directory - "directory [${dir}] removed"
fi
done
}

clean_up_agent_directory () {
for dir in "${AGENT_CLEAN_UP_DIRS[@]}"; do
rm -rf "${AGENT_SETUP_PATH}"/"${dir}"
Expand All @@ -490,7 +501,8 @@ remove_agent () {

if [[ "$REMOVE" == "TRUE" ]]; then
unregister_agent_id
clean_up_agent_directory
remove_directory "$AGENT_SETUP_PATH" "$GSE_AGENT_RUN_DIR" "$GSE_AGENT_DATA_DIR" "$GSE_AGENT_LOG_DIR"

log remove_agent DONE "agent removed"
exit 0
fi
Expand Down
28 changes: 18 additions & 10 deletions script_tools/agent_tools/agent2/setup_agent.zsh
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ get_cpu_arch () {
elif [[ "${CPU_ARCH}" =~ "x86" || "${CPU_ARCH}" =~ ^i[3456]86 ]]; then
CPU_ARCH="x86"
return 0
elif [[ "${CPU_ARCH}" =~ "aarch" ]]; then
elif [[ "${CPU_ARCH}" =~ "aarch" || "${CPU_ARCH}" =~ "arm64" ]]; then
return 0
else
return 1
fi
}

get_cpu_arch "uname -m" || fail get_cpu_arch "Failed to get CPU arch, please contact the developer."
get_cpu_arch "uname -m" || get_cpu_arch "arch" || fail get_cpu_arch "Failed to get CPU arch, please contact the developer."


get_os_info () {
Expand Down Expand Up @@ -313,19 +313,17 @@ check_heathz_by_gse () {
}

remove_crontab () {
if [ $IS_SUPER == false ]; then
return
fi

local tmpcron
tmpcron=$(mktemp "$TMP_DIR"/cron.XXXXXXX)

crontab -l | grep -v "bin/gsectl" >"$tmpcron"
crontab "$tmpcron" && rm -f "$tmpcron"

# 下面这段代码是为了确保修改的crontab能立即生效
if pgrep -x crond &>/dev/null; then
pkill -HUP -x crond
# 下面这段代码是为了确保修改的crontab立即生效
if [ $IS_SUPER == true ]; then
if pgrep -x crond &>/dev/null; then
pkill -HUP -x crond
fi
fi
}

Expand Down Expand Up @@ -482,6 +480,16 @@ stop_agent () {
done
}

remove_directory () {
for dir in "$@"; do
if [ -d "$dir" ]; then
log remove_directory - "trying to remove directory [${dir}]"
rm -rf "$dir"
log remove_directory - "directory [${dir}] removed"
fi
done
}

clean_up_agent_directory () {
for dir in "${AGENT_CLEAN_UP_DIRS[@]}"; do
rm -rf "${AGENT_SETUP_PATH}"/"${dir}"
Expand All @@ -501,7 +509,7 @@ remove_agent () {

if [[ "$REMOVE" == "TRUE" ]]; then
unregister_agent_id
clean_up_agent_directory
remove_directory "$AGENT_SETUP_PATH" "$GSE_AGENT_RUN_DIR" "$GSE_AGENT_DATA_DIR" "$GSE_AGENT_LOG_DIR"
log remove_agent DONE "agent removed"
exit 0
fi
Expand Down
27 changes: 19 additions & 8 deletions script_tools/agent_tools/agent2/setup_proxy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ get_os_type () {
elif [[ "${OS_INFO,,}" =~ "hat" ]]; then
OS_TYPE="redhat"
RC_LOCAL_FILE="/etc/rc.d/rc.local"
else
OS_TYPE="other"
RC_LOCAL_FILE="/etc/rc.d/rc.local"
fi
}

Expand Down Expand Up @@ -311,19 +314,17 @@ report_mkdir () {
}

remove_crontab () {
if [ $IS_SUPER == false ]; then
return
fi

local tmpcron
tmpcron=$(mktemp "$TMP_DIR"/cron.XXXXXXX)

crontab -l | grep -v "bin/gsectl" >"$tmpcron"
crontab "$tmpcron" && rm -f "$tmpcron"

# 下面这段代码是为了确保修改的crontab能立即生效
if pgrep -x crond &>/dev/null; then
pkill -HUP -x crond
# 下面这段代码是为了确保修改的crontab立即生效
if [ $IS_SUPER == true ]; then
if pgrep -x crond &>/dev/null; then
pkill -HUP -x crond
fi
fi
}

Expand Down Expand Up @@ -514,6 +515,16 @@ stop_proxy () {
done
}

remove_directory () {
for dir in "$@"; do
if [ -d "$dir" ]; then
log remove_directory - "trying to remove directory [${dir}]"
rm -rf "$dir"
log remove_directory - "directory [${dir}] removed"
fi
done
}

clean_up_proxy_directory () {
for dir in "${PROXY_CLEAN_UP_DIRS[@]}"; do
rm -rf "${AGENT_SETUP_PATH}"/"${dir}"
Expand All @@ -528,7 +539,7 @@ remove_proxy () {

if [[ "$REMOVE" == "TRUE" ]]; then
unregister_agent_id SKIP
clean_up_proxy_directory
remove_directory "$AGENT_SETUP_PATH" "$GSE_AGENT_RUN_DIR" "$GSE_AGENT_DATA_DIR" "$GSE_AGENT_LOG_DIR"
log remove_proxy DONE "proxy removed"
exit 0
else
Expand Down
68 changes: 32 additions & 36 deletions script_tools/gsectl/agent/aix/gsectl
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,8 @@ log () {

local opt=

if [ "${1:0:1}" == "-" ]; then
first_char=$(expr substr "$1" 1 1)
if [ "$first_char" = "-" ]; then
opt=$1
shift 1
else
Expand All @@ -201,6 +202,9 @@ watch_by_binary () {
# 如果文件存在,则读取文件中记录的次数
if [ -f $LAST_RUN_FILE ]; then
run_count=$(cat $LAST_RUN_FILE)
if [ -z "$run_count" ]; then
run_count=0
fi
else
run_count=0
fi
Expand All @@ -223,16 +227,16 @@ watch_by_binary () {
# 检查上一次脚本是否存在
if [ -f $VAR_RUN_DIR/gsectl_check_agent_status.pid ]; then
pid=`cat $VAR_RUN_DIR/gsectl_check_agent_status.pid`
if [ -d "/proc/$pid" ]; then
log "`date +'%F %T.%N'` Last Script: $0 Detection status: PID:$pid is until running , no longer checking the status of the module: ${module}"
if ps -p $pid >/dev/null 2>&1; then
log "`date +'%F %T'` Last Script: $0 Detection status: PID:$pid is until running , no longer checking the status of the module: ${module}"
return
else
# 如果超过阈值,则发出告警
if [ $run_count -ge $THRESHOLD ]; then
log "`date +'%F %T.%N'` Script: $0 Detection status: Failed to start the process, exceeded $run_count cycles, no longer checking the status of the module: ${module}"
log "`date +'%F %T'` Script: $0 Detection status: Failed to start the process, exceeded $run_count cycles, no longer checking the status of the module: ${module}"
return
else
log "`date +'%F %T.%N'` The previous script: $0 watch has ended, starting a new detection"
log "`date +'%F %T'` The previous script: $0 watch has ended, starting a new detection"
fi
fi
fi
Expand All @@ -249,13 +253,13 @@ watch_by_binary () {
stop_by_binary
start_by_binary
if [ $? -ne 0 ];then
log "`date +'%F %T.%N'` Process failed to start, increment counter"
log "`date +'%F %T'` Process failed to start, increment counter"
run_count=$((run_count + 1))
echo $run_count > $LAST_RUN_FILE
fi
else
if [ $run_count -ne 0 ];then
log "`date +'%F %T.%N'` The previous script: $0 Detection ${module} status is Running , then reset the count"
log "`date +'%F %T'` The previous script: $0 Detection ${module} status is Running , then reset the count"
echo 0 > $LAST_RUN_FILE
fi
fi
Expand Down Expand Up @@ -346,21 +350,6 @@ healthz_by_rclocal () {
return
}

check_rc_file () {
RC_LOCAL_FILE="/etc/rc.local"
if [ -f "$RC_LOCAL_FILE" ]; then
return 0
elif [ -f "/etc/rc.d/rc.local" ]; then
RC_LOCAL_FILE="/etc/rc.d/rc.local"
elif [ -f "/etc/init.d/rc.local" ]; then
RC_LOCAL_FILE="/etc/init.d/rc.local"
elif [ -f "/etc/init.d/boot.local" ]; then
RC_LOCAL_FILE="/etc/init.d/boot.local"
else
RC_LOCAL_FILE="`ls -l "/etc/rc.local" | awk '{print $NF}'`"
fi
}

add_startup_to_boot () {

# 非root用户无法操作rclocal
Expand All @@ -374,15 +363,18 @@ add_startup_to_boot () {
# 添加启动项到 rc.local
echo "Check startup items, and if not existing, add the [${module}] startup item to rc.local"

check_rc_file
local rcfile=$RC_LOCAL_FILE
local rcfile=/etc/rc.local

chmod +x $rcfile
if [ -f $rcfile ];then
# 先删后加,避免重复
grep -v "${WORK_HOME}/bin/gsectl" "$rcfile" > "${rcfile}.tmp"
# 用临时文件覆盖原文件
mv "${rcfile}.tmp" "$rcfile"
else
touch "$rcfile" && chmod +x "$rcfile"
fi

# 先删后加,避免重复
rm ${rcfile}.bak && cp ${rcfile} ${rcfile}.bak
sed "\|${WORK_HOME}/bin/gsectl start ${module}|d" $rcfile > ${rcfile}.tmp
rm $rcfile && mv ${rcfile}.tmp ${rcfile}
chmod +x $rcfile

echo "[ -f ${WORK_HOME}/bin/gsectl ] && ${WORK_HOME}/bin/gsectl start ${module} 1>>/var/log/${INSTALL_ENV}_${node_type}.log 2>&1" >>$rcfile
}
Expand Down Expand Up @@ -411,14 +403,12 @@ remove_crontab () {
local tmpcron
local datatemp=$(date +%s)

crontab -l | grep -v "$AGENT_SETUP_PATH/bin/gsectl" > /tmp/cron.$datatemp
crontab -l | grep -v "$WORK_HOME/bin/gsectl" > /tmp/cron.$datatemp
crontab /tmp/cron.$datatemp && rm -f /tmp/cron.$datatemp

# 下面这段代码是为了确保修改的crontab能立即生效
if [ $IS_SUPER == true ]; then
ps -eo pid,comm | grep cron |awk '{print$1}' | xargs kill -9
else
crontab -l | crontab -
fi
}

Expand All @@ -428,7 +418,8 @@ get_process_runtime () {

sleep 3

for i in {1..20}
local i=1
while [ $i -le 20 ];
do
set -A tmp_gse_agent_master_pids $(ps -eo ppid,pid,args | awk '$1 == 1 && $3 ~ /gse_agent/ {print $2}' | xargs)

Expand All @@ -453,6 +444,7 @@ get_process_runtime () {
fi
fi
done
i=`expr $i + 1`
done
return $p_status
}
Expand Down Expand Up @@ -493,7 +485,8 @@ __status (){
local action=$1

# 最多等待20s来判断是否真正启动成功
for i in {0..20}; do
local i=0
while [ $i -le 20 ]; do
if [ "$action" == "stop" ];then
if [ $(ps -eo pid,comm,args | grep gse_agent |egrep "${WORK_HOME}" |wc -l) -eq 0 ];then
echo "gse_agent $action $action success"
Expand All @@ -516,16 +509,18 @@ __status (){
return 3
fi
elif [ "$action" == "reload" ];then
for i in {0..5}; do
local wait_reload=0
while [ $wait_reload -le 5 ]; do
get_process_runtime
if [ $? -eq 0 ];then
break
elif [ $? -ne 0 ];then
sleep 2
elif [ $i -eq 5 ];then
elif [ $wait_reload -eq 5 ];then
echo "gse_agent $action failed"
return 3
fi
wait_reload=`expr $wait_reload + 1`
done
fi

Expand All @@ -538,6 +533,7 @@ __status (){
sleep 2
fi
fi
i=`expr $i + 1`
done
}

Expand Down
Loading
Loading