集群添加节点失败

环境信息

  • EMQX 版本:5.0.0
  • 操作系统及版本:ubuntu 20.04
  • 其他 deb包安装的emqx,同一主机上创建的两台虚拟机,相互之前能ping通。

问题描述

配置完毕之后键入命令加入失败,报错
Failed to join the cluster: {node_down,‘emqxm1@192.168.31.132’}

配置文件及日志

node {
  name = "emqxc2@192.168.31.134"
  cookie = emqxsecretcookie
  data_dir = "/var/lib/emqx"
  etc_dir = "/etc/emqx"
}

log {
  file_handlers.default {
    level = warning
    file = "/var/log/emqx/emqx.log"
  }
}

cluster {
  name = emqxc2
  discovery_strategy = manual
}


listeners.tcp.default {
  bind = "0.0.0.0:1883"
  max_connections = 1024000
}

listeners.ssl.default {
  bind = "0.0.0.0:8883"
  max_connections = 512000
  ssl_options {
    keyfile = "/etc/emqx/certs/key.pem"
    certfile = "/etc/emqx/certs/cert.pem"
    cacertfile = "/etc/emqx/certs/cacert.pem"
  }
}

listeners.ws.default {
  bind = "0.0.0.0:8083"
  max_connections = 1024000
  websocket.mqtt_path = "/mqtt"
}

listeners.wss.default {
  bind = "0.0.0.0:8084"
  max_connections = 512000
  websocket.mqtt_path = "/mqtt"
  ssl_options {
    keyfile = "/etc/emqx/certs/key.pem"
    certfile = "/etc/emqx/certs/cert.pem"
    cacertfile = "/etc/emqx/certs/cacert.pem"
  }
}

# listeners.quic.default {
#  enabled = true
#  bind = "0.0.0.0:14567"
#  max_connections = 1024000
#  keyfile = "/etc/emqx/certs/key.pem"
#  certfile = "/etc/emqx/certs/cert.pem"
#}

dashboard {
    listeners.http {
        bind: 18083
    }
    default_username: "admin"
    default_password: "public"
}

authorization {
  deny_action: ignore
  no_match: allow
  sources: [
    {
      type: file
      path: "/etc/emqx/acl.conf"
    }
  ]
}

include emqx_enterprise.conf

以上是报错的节点

node {
  name = "emqxm1@192.168.31.132"
  cookie = emqxsecretcookie
  data_dir = "/var/lib/emqx"
  etc_dir = "/etc/emqx"
}

log {
  file_handlers.default {
    level = warning
    file = "/var/log/emqx/emqx.log"
  }
}

cluster {
  name = emqxm1
  discovery_strategy = manual
}


listeners.tcp.default {
  bind = "0.0.0.0:1883"
  max_connections = 1024000
}

listeners.ssl.default {
  bind = "0.0.0.0:8883"
  max_connections = 512000
  ssl_options {
    keyfile = "/etc/emqx/certs/key.pem"
    certfile = "/etc/emqx/certs/cert.pem"
    cacertfile = "/etc/emqx/certs/cacert.pem"
  }
}

listeners.ws.default {
  bind = "0.0.0.0:8083"
  max_connections = 1024000
  websocket.mqtt_path = "/mqtt"
}

listeners.wss.default {
  bind = "0.0.0.0:8084"
  max_connections = 512000
  websocket.mqtt_path = "/mqtt"
  ssl_options {
    keyfile = "/etc/emqx/certs/key.pem"
    certfile = "/etc/emqx/certs/cert.pem"
    cacertfile = "/etc/emqx/certs/cacert.pem"
  }
}

# listeners.quic.default {
#  enabled = true
#  bind = "0.0.0.0:14567"
#  max_connections = 1024000
#  keyfile = "/etc/emqx/certs/key.pem"
#  certfile = "/etc/emqx/certs/cert.pem"
#}

dashboard {
    listeners.http {
        bind: 18083
    }
    default_username: "admin"
    default_password: "public"
}

authorization {
  deny_action: ignore
  no_match: allow
  sources: [
    {
      type: file
      path: "/etc/emqx/acl.conf"
    }
  ]
}

include emqx_enterprise.conf

以上是被加入的节点

emqxm1@192.168.31.132 的名字以@为分隔符,前面最后一位的数字表示了集群之间端口的偏移量。
原本我们有4369的端口,那么你的节点就到了4370,尝试一下不用偏移量直接走集群试下。排除一下网络的问题

最后一位数字是什么,emqxm1这个的结尾1吗
那如果结尾没有数字呢

是RPC和集群节点之间通讯接口的偏移量,比如使用emqx@127.0.0.1,那么通讯端口是4369,使用 emqx1@127.0.0.1就是偏移量为1,使用4370端口,没写数字表示使用0作为偏移量,端口是4369。有不止一个端口用于节点通讯和RPC服务,4369是集群通讯使用的。