启动 emqx 卡在 relx_nodetool "ping" 直到超时

环境信息

  • EMQX 版本:5.0.4
  • 操作系统及版本:ubuntu 16.04
  • 其他:压缩包安装

问题描述

启动成功过 emqx,暂未明因何操作导致启动不成功。用 ./emqx start 会长时间卡住不动,用 bash -x emqx start 会看到一直在调用 ping 的操作。中断 start 后可在 dashboard 登录(已经启动成功?)。

配置文件及日志

lemon@lemon-NUC8i3BEH:~/robot_ros_application/scripts/emqx$ env DEBUG=1 bin/emqx start
++++ readlink bin/emqx
++++ echo bin/emqx
+++ dirname bin/emqx
++ cd bin/..
++ pwd -P
+ RUNNER_ROOT_DIR=/home/lemon/robot_ros_application/scripts/emqx
+ . /home/lemon/robot_ros_application/scripts/emqx/releases/emqx_vars
++ REL_VSN=5.0.4
++ ERTS_VSN=12.2.1
++ ERL_OPTS=
++ RUNNER_BIN_DIR=/home/lemon/robot_ros_application/scripts/emqx/bin
++ RUNNER_LOG_DIR=/home/lemon/robot_ros_application/scripts/emqx/log
++ RUNNER_LIB_DIR=/home/lemon/robot_ros_application/scripts/emqx/lib
++ EMQX_ETC_DIR=/home/lemon/robot_ros_application/scripts/emqx/etc
++ RUNNER_USER=
++ IS_ELIXIR=no
++ SCHEMA_MOD=emqx_conf_schema
++ IS_ENTERPRISE=no
++ export EMQX_DESCRIPTION=EMQX
++ EMQX_DESCRIPTION=EMQX
++ REL_NAME=emqx
+ export RUNNER_ROOT_DIR
+ export EMQX_ETC_DIR
+ export REL_VSN
+ export SCHEMA_MOD
+ RUNNER_SCRIPT=/home/lemon/robot_ros_application/scripts/emqx/bin/emqx
+ CODE_LOADING_MODE=embedded
+ REL_DIR=/home/lemon/robot_ros_application/scripts/emqx/releases/5.0.4
++ whoami
+ WHOAMI=lemon
+ mkdir -p /home/lemon/robot_ros_application/scripts/emqx/log
+ export HOCON_ENV_OVERRIDE_PREFIX=EMQX_
+ HOCON_ENV_OVERRIDE_PREFIX=EMQX_
+ export ERTS_DIR=/home/lemon/robot_ros_application/scripts/emqx/erts-12.2.1
+ ERTS_DIR=/home/lemon/robot_ros_application/scripts/emqx/erts-12.2.1
+ export BINDIR=/home/lemon/robot_ros_application/scripts/emqx/erts-12.2.1/bin
+ BINDIR=/home/lemon/robot_ros_application/scripts/emqx/erts-12.2.1/bin
+ export EMU=beam
+ EMU=beam
+ export PROGNAME=erl
+ PROGNAME=erl
+ export ERTS_LIB_DIR=/home/lemon/robot_ros_application/scripts/emqx/lib
+ ERTS_LIB_DIR=/home/lemon/robot_ros_application/scripts/emqx/lib
+ DYNLIBS_DIR=/home/lemon/robot_ros_application/scripts/emqx/dynlibs
+ COMMAND=start
+ '[' -z start ']'
+ '[' start = help ']'
+ '[' '' = help ']'
+ case "${COMMAND}" in
+ IS_BOOT_COMMAND=yes
+ '[' -d /home/lemon/robot_ros_application/scripts/emqx/erts-12.2.1/lib ']'
+ check_user start
+ '[' '' ']'
+ ES=0
+ '[' 0 -ne 0 ']'
+ COMPATIBILITY_CHECK='
    io:format("BEAM_OK~n", []),
    try
        [_|_] = L = crypto:info_lib(),
        io:format("CRYPTO_OK ~0p~n", [L])
    catch
        _ : _ ->
            %% so logger has the chance to log something
            timer:sleep(100),
            halt(1)

    end,
    try
        mnesia_hook:module_info(),
        io:format("MNESIA_OK~n", [])
    catch
        _ : _ ->
            io:format("WARNING: Mnesia app has no post-coommit hook support~n", []),
            halt(2)
    end,
    halt(0).
'
+ '[' yes = yes ']'
++ cat /home/lemon/robot_ros_application/scripts/emqx/releases/5.0.4/BUILD_INFO
+ BUILD_INFO='arch: "x86_64-pc-linux-gnu"
wordsize: 64
os: "ubuntu16.04"
erlang: "24.2.1-1"
elixir: "none"
relform: "tgz"'
++ compatiblity_info
+ COMPATIBILITY_INFO='BEAM_OK
CRYPTO_OK [{<<"OpenSSL">>,268443775,<<"OpenSSL 1.0.2g  1 Mar 2016">>}]
MNESIA_OK'
+ echo -e 'BEAM_OK
CRYPTO_OK [{<<"OpenSSL">>,268443775,<<"OpenSSL 1.0.2g  1 Mar 2016">>}]
MNESIA_OK'
+ grep -q CRYPTO_OK
+ '[' 1 -eq 1 ']'
+ set -x
++ ulimit -n
+ ULIMIT_F=1048576
+ '[' 1048576 -lt 1024 ']'
+ SED_REPLACE='sed -i '
+ case $(sed --help 2>&1) in
++ sed --help
+ SED_REPLACE='sed -i '
+ CONF_KEYS=('node.data_dir' 'node.name' 'node.cookie' 'node.db_backend' 'cluster.proto_dist')
+ '[' no = yes ']'
+ '[' yes = yes ']'
+ '[' '' = '' ']'
++ call_hocon -s emqx_conf_schema -c /home/lemon/robot_ros_application/scripts/emqx/etc/emqx.conf multi_get node.data_dir node.name node.cookie node.db_backend cluster.proto_dist
++ call_nodetool hocon -s emqx_conf_schema -c /home/lemon/robot_ros_application/scripts/emqx/etc/emqx.conf multi_get node.data_dir node.name node.cookie node.db_backend cluster.proto_dist
++ /home/lemon/robot_ros_application/scripts/emqx/erts-12.2.1/bin/escript /home/lemon/robot_ros_application/scripts/emqx/bin/nodetool hocon -s emqx_conf_schema -c /home/lemon/robot_ros_application/scripts/emqx/etc/emqx.conf multi_get node.data_dir node.name node.cookie node.db_backend cluster.proto_dist
[debug]: ConfFiles: {["/home/lemon/robot_ros_application/scripts/emqx/etc/emqx.conf"],[]}
+ EMQX_BOOT_CONFIGS='node.data_dir="data"
node.name="emqx@127.0.0.1"
node.cookie="emqxsecretcookie"
node.db_backend=rlog
cluster.proto_dist=inet_tcp'
+ export EMQX_BOOT_CONFIGS
+ EPMD_ARGS='-start_epmd false -epmd_module ekka_epmd -proto_dist ekka'
++ get_boot_config cluster.proto_dist
++ path_to_value=cluster.proto_dist
++ echo -e 'node.data_dir="data"
node.name="emqx@127.0.0.1"
node.cookie="emqxsecretcookie"
node.db_backend=rlog
cluster.proto_dist=inet_tcp'
++ grep cluster.proto_dist=
++ sed -e s/cluster.proto_dist=//g
++ tr -d '"'
+ PROTO_DIST=inet_tcp
+ export EKKA_PROTO_DIST_MOD=inet_tcp
+ EKKA_PROTO_DIST_MOD=inet_tcp
+ '[' inet_tcp = inet_tls ']'
++ get_boot_config node.data_dir
++ path_to_value=node.data_dir
++ echo -e 'node.data_dir="data"
node.name="emqx@127.0.0.1"
node.cookie="emqxsecretcookie"
node.db_backend=rlog
cluster.proto_dist=inet_tcp'
++ grep node.data_dir=
++ tr -d '"'
++ sed -e s/node.data_dir=//g
+ DATA_DIR=data
+ DATA_DIR=data
+ [[ data != /* ]]
+ DATA_DIR=/home/lemon/robot_ros_application/scripts/emqx/data
+ CONFIGS_DIR=/home/lemon/robot_ros_application/scripts/emqx/data/configs
+ mkdir -p /home/lemon/robot_ros_application/scripts/emqx/data/configs
+ '[' -n '' ']'
+ NAME=
+ '[' -z '' ']'
+ '[' yes = yes ']'
++ get_boot_config node.name
++ path_to_value=node.name
++ echo -e 'node.data_dir="data"
node.name="emqx@127.0.0.1"
node.cookie="emqxsecretcookie"
node.db_backend=rlog
cluster.proto_dist=inet_tcp'
++ grep node.name=
++ tr -d '"'
++ sed -e s/node.name=//g
+ NAME=emqx@127.0.0.1
+ '[' -z emqx@127.0.0.1 ']'
+ case "$NAME" in
+ NAME_TYPE=-name
++ echo emqx@127.0.0.1
++ awk -F@ '{print $1}'
+ SHORT_NAME=emqx
+ echo emqx
+ grep -q '^[0-9A-Za-z_\-]\+$'
+ export ESCRIPT_NAME=emqx
+ ESCRIPT_NAME=emqx
+ PIPE_DIR=//home/lemon/robot_ros_application/scripts/emqx/data/lemon_erl_pipes/emqx@127.0.0.1/
+ '[' -n '' ']'
+ COOKIE=
+ '[' -z '' ']'
+ '[' yes = yes ']'
++ get_boot_config node.cookie
++ path_to_value=node.cookie
++ echo -e 'node.data_dir="data"
node.name="emqx@127.0.0.1"
node.cookie="emqxsecretcookie"
node.db_backend=rlog
cluster.proto_dist=inet_tcp'
++ grep node.cookie=
++ tr -d '"'
++ sed -e s/node.cookie=//g
+ COOKIE=emqxsecretcookie
+ '[' -z emqxsecretcookie ']'
+ [[ yes == \y\e\s ]]
++ get_boot_config node.db_backend
++ path_to_value=node.db_backend
++ echo -e 'node.data_dir="data"
node.name="emqx@127.0.0.1"
node.cookie="emqxsecretcookie"
node.db_backend=rlog
cluster.proto_dist=inet_tcp'
++ grep node.db_backend=
++ tr -d '"'
++ sed -e s/node.db_backend=//g
+ [[ rlog == \r\l\o\g ]]
+ echo -e 'BEAM_OK
CRYPTO_OK [{<<"OpenSSL">>,268443775,<<"OpenSSL 1.0.2g  1 Mar 2016">>}]
MNESIA_OK'
+ grep -q MNESIA_OK
+ cd /home/lemon/robot_ros_application/scripts/emqx
+ case "${COMMAND}" in
+ relx_nodetool ping
+ export _EMQX_START_DAEMON_MODE=1
+ _EMQX_START_DAEMON_MODE=1
+ case "$COMMAND" in
+ shift
+ START_OPTION=console
+ HEART_OPTION=start
+ RUN_PARAM=
+ set -- /home/lemon/robot_ros_application/scripts/emqx/bin/emqx start
+ '[' '' ']'
+ HEART_COMMAND='/home/lemon/robot_ros_application/scripts/emqx/bin/emqx start'
+ export HEART_COMMAND
+ export RUN_ERL_LOG_GENERATIONS=5
+ RUN_ERL_LOG_GENERATIONS=5
+ export RUN_ERL_LOG_MAXSIZE=10485760
+ RUN_ERL_LOG_MAXSIZE=10485760
+ mkdir -p //home/lemon/robot_ros_application/scripts/emqx/data/lemon_erl_pipes/emqx@127.0.0.1/
++ relx_start_command
++ printf 'exec "%s" "%s"' /home/lemon/robot_ros_application/scripts/emqx/bin/emqx console
+ /home/lemon/robot_ros_application/scripts/emqx/erts-12.2.1/bin/run_erl -daemon //home/lemon/robot_ros_application/scripts/emqx/data/lemon_erl_pipes/emqx@127.0.0.1/ /home/lemon/robot_ros_application/scripts/emqx/log 'exec "/home/lemon/robot_ros_application/scripts/emqx/bin/emqx" "console"'
+ WAIT_TIME=120
+ wait_until_return_val true 120 relx_nodetool eval 'emqx:is_running()'
+ local RESULT
+ local WAIT_TIME
+ local CMD
+ RESULT=true
+ WAIT_TIME=120
+ shift 2
+ CMD='relx_nodetool eval emqx:is_running()'
+ true
++ relx_nodetool eval 'emqx:is_running()'
+ '[' '=ERROR REPORT==== 29-Jul-2022::14:03:18.306451 ===
Too short cookie string
=SUPERVISOR REPORT==== 29-Jul-2022::14:03:18.308584 ===
    supervisor: {local,net_sup}
    errorContext: start_error
    reason: {"Too short cookie string",
             [{auth,init_no_setcookie,0,[{file,"auth.erl"},{line,293}]},
              {auth,init,1,[{file,"auth.erl"},{line,144}]},
              {gen_server,init_it,2,[{file,"gen_server.erl"},{line,423}]},
              {gen_server,init_it,6,[{file,"gen_server.erl"},{line,390}]},
              {proc_lib,init_p_do_apply,3,
                        [{file,"proc_lib.erl"},{line,226}]}]}
    offender: [{pid,undefined},
               {id,auth},
               {mfargs,{auth,start_link,[]}},
               {restart_type,permanent},
               {significant,false},
               {shutdown,2000},
               {child_type,worker}]

=CRASH REPORT==== 29-Jul-2022::14:03:18.308671 ===
  crasher:
    initial call: auth:init/1
    pid: <0.81.0>
    registered_name: []
    exception error: "Too short cookie string"
      in function  auth:init_no_setcookie/0 (auth.erl, line 293)
      in call from auth:init/1 (auth.erl, line 144)
      in call from gen_server:init_it/2 (gen_server.erl, line 423)
      in call from gen_server:init_it/6 (gen_server.erl, line 390)
    ancestors: [net_sup,kernel_sup,<0.47.0>]
    message_queue_len: 0
    messages: []
    links: [<0.80.0>]
    dictionary: []
    trap_exit: true
    status: running
    heap_size: 1598
    stack_size: 28
    reductions: 2030
  neighbours:' = true ']'
+ '[' 120 -le 0 ']'
+ WAIT_TIME=119
+ sleep 1
+ true
++ relx_nodetool eval 'emqx:is_running()'
+ '[' '=ERROR REPORT==== 29-Jul-2022::14:03:19.691227 ===
Too short cookie string
=SUPERVISOR REPORT==== 29-Jul-2022::14:03:19.692728 ===
    supervisor: {local,net_sup}
    errorContext: start_error
    reason: {"Too short cookie string",
             [{auth,init_no_setcookie,0,[{file,"auth.erl"},{line,293}]},
              {auth,init,1,[{file,"auth.erl"},{line,144}]},
              {gen_server,init_it,2,[{file,"gen_server.erl"},{line,423}]},
              {gen_server,init_it,6,[{file,"gen_server.erl"},{line,390}]},
              {proc_lib,init_p_do_apply,3,
                        [{file,"proc_lib.erl"},{line,226}]}]}
    offender: [{pid,undefined},
               {id,auth},
               {mfargs,{auth,start_link,[]}},
               {restart_type,permanent},
               {significant,false},
               {shutdown,2000},
               {child_type,worker}]

=CRASH REPORT==== 29-Jul-2022::14:03:19.692798 ===
  crasher:
    initial call: auth:init/1
    pid: <0.81.0>
    registered_name: []
    exception error: "Too short cookie string"
      in function  auth:init_no_setcookie/0 (auth.erl, line 293)
      in call from auth:init/1 (auth.erl, line 144)
      in call from gen_server:init_it/2 (gen_server.erl, line 423)
      in call from gen_server:init_it/6 (gen_server.erl, line 390)
    ancestors: [net_sup,kernel_sup,<0.47.0>]
    message_queue_len: 0
    messages: []
    links: [<0.80.0>]
    dictionary: []
    trap_exit: true
    status: running
    heap_size: 1598
    stack_size: 28
    reductions: 2030
  neighbours:' = true ']'
+ '[' 119 -le 0 ']'
+ WAIT_TIME=118
+ sleep 1
+ true
++ relx_nodetool eval 'emqx:is_running()'
+ '[' '=ERROR REPORT==== 29-Jul-2022::14:03:20.957444 ===
Too short cookie string
=SUPERVISOR REPORT==== 29-Jul-2022::14:03:20.959011 ===
    supervisor: {local,net_sup}
    errorContext: start_error
    reason: {"Too short cookie string",
             [{auth,init_no_setcookie,0,[{file,"auth.erl"},{line,293}]},
              {auth,init,1,[{file,"auth.erl"},{line,144}]},
              {gen_server,init_it,2,[{file,"gen_server.erl"},{line,423}]},
              {gen_server,init_it,6,[{file,"gen_server.erl"},{line,390}]},
              {proc_lib,init_p_do_apply,3,
                        [{file,"proc_lib.erl"},{line,226}]}]}
    offender: [{pid,undefined},
               {id,auth},
               {mfargs,{auth,start_link,[]}},
               {restart_type,permanent},
               {significant,false},
               {shutdown,2000},
               {child_type,worker}]

=CRASH REPORT==== 29-Jul-2022::14:03:20.959077 ===
  crasher:
    initial call: auth:init/1
    pid: <0.81.0>
    registered_name: []
    exception error: "Too short cookie string"
      in function  auth:init_no_setcookie/0 (auth.erl, line 293)
      in call from auth:init/1 (auth.erl, line 144)
      in call from gen_server:init_it/2 (gen_server.erl, line 423)
      in call from gen_server:init_it/6 (gen_server.erl, line 390)
    ancestors: [net_sup,kernel_sup,<0.47.0>]
    message_queue_len: 0
    messages: []
    links: [<0.80.0>]
    dictionary: []
    trap_exit: true
    status: running
    heap_size: 1598
    stack_size: 28
    reductions: 2028
  neighbours:' = true ']'
+ '[' 118 -le 0 ']'
+ WAIT_TIME=117
+ sleep 1
+ true
++ relx_nodetool eval 'emqx:is_running()'
+ '[' '=ERROR REPORT==== 29-Jul-2022::14:03:22.252822 ===
Too short cookie string
=SUPERVISOR REPORT==== 29-Jul-2022::14:03:22.254627 ===
    supervisor: {local,net_sup}
    errorContext: start_error
    reason: {"Too short cookie string",
             [{auth,init_no_setcookie,0,[{file,"auth.erl"},{line,293}]},
              {auth,init,1,[{file,"auth.erl"},{line,144}]},
              {gen_server,init_it,2,[{file,"gen_server.erl"},{line,423}]},
              {gen_server,init_it,6,[{file,"gen_server.erl"},{line,390}]},
              {proc_lib,init_p_do_apply,3,
                        [{file,"proc_lib.erl"},{line,226}]}]}
    offender: [{pid,undefined},
               {id,auth},
               {mfargs,{auth,start_link,[]}},
               {restart_type,permanent},
               {significant,false},
               {shutdown,2000},
               {child_type,worker}]

=CRASH REPORT==== 29-Jul-2022::14:03:22.254749 ===
  crasher:
    initial call: auth:init/1
    pid: <0.81.0>
    registered_name: []
    exception error: "Too short cookie string"
      in function  auth:init_no_setcookie/0 (auth.erl, line 293)
      in call from auth:init/1 (auth.erl, line 144)
      in call from gen_server:init_it/2 (gen_server.erl, line 423)
      in call from gen_server:init_it/6 (gen_server.erl, line 390)
    ancestors: [net_sup,kernel_sup,<0.47.0>]
    message_queue_len: 0
    messages: []
    links: [<0.80.0>]
    dictionary: []
    trap_exit: true
    status: running
    heap_size: 1598
    stack_size: 28
    reductions: 2030
  neighbours:' = true ']

上次关闭emqx是不是非正常关闭,没有用

emqx stop

直接杀掉的进程或者启动过程直接取消了?
可以找下这个文件

erlang.cookie

也可能是

.erlang.cookie

在emqx目录下,把它删掉试下。

如你所说确实没有正常关闭,按照您提供的方法未能找到该文件

image

搜索关于 erlang 关键字的文件,仅如下

image

找一下emqx的文件夹,目录结构大致是这样

.
├── bin
├── data
├── erts-12.2.1
├── etc
├── lib
├── log
├── plugins
└── releases

然后把data/mnesia 删掉,目录结构大概是这样

data
├── cluster.uuid
├── configs
│   ├── app.2022.07.29.14.55.40.config
│   └── vm.2022.07.29.14.55.40.args
├── mnesia
│   └── emqx@127.0.0.1
├── node.uuid
├── patches
├── scripts
└── trace
    └── zip

如果我的目录结构和你有细微不同,可以忽略
PS:这样会清空全部的数据库,不能在生产环境这样做。

还有就是一般情况下,我们是不考虑异常关机的处理的,因为EMQX设计为不停机一直运行,突然中断进程,可能会导致数据库文件损坏,尽量避免这种情况。一般来说主机不挂EMQX就不挂

将 data/mnesia 删掉,依旧产生 ping 超时的问题,且日志中有报 Too short cookie string,是什么原因导致此问题呢。

嗯嗯谢谢 异常关机这里我会想办法处理

这个是erlang虚拟机报错,节点通讯出现了问题,有很多种可能,可以先删掉emqx重新解压一个用。

删除后重新解压使用问题依旧,使用 start 依旧会超时,但 dashboard 可以正常登录并且通信正常(这在删掉之前也是可以的),但用 stop 是无法正常关闭的,只能 kill 掉这个进程。

如果使用 ./emqx console 会提示启动成功:

lemon@lemon-NUC8i3BEH:~/robot_ros_application/scripts/emqx/bin$ ./emqx console
log.file_handlers.default.enable = EMQX_LOG__FILE_HANDLERS__DEFAULT__ENABLE = false
log.console_handler.enable = EMQX_LOG__CONSOLE_HANDLER__ENABLE = true
Erlang/OTP 24 [erts-12.2.1] [emqx] [64-bit] [smp:4:4] [ds:4:4:8] [async-threads:4]

Listener ssl:default on :8883 started.
Listener tcp:default on :1883 started.
Listener ws:default on :8083 started.
Listener wss:default on :8084 started.
Listener http:dashboard on :18083 started.
EMQX 5.0.4 is running now!
Restricted Eshell V12.2.1  (abort with ^G)
5.0.4(emqx@127.0.0.1)1> 

不好意思,我没有复现出这个问题,可能是应为我的版本是ubuntu20.04,我喊了大佬们,看看他们有没有办法

好的谢谢