调用http-api 偶尔出现503.

问题描述

调用 http-api 时偶尔出现503的返回。

服务名称为 emqx-prod 生产环境中使用时,调用 http://emqx-prod:8081/api/v4 下的 api 时,偶尔会出现:

POST http://emqx-prod:8081/api/v4/mqtt/publish` resulted in a `503 Service Unavailable` response: upstream connect error or disconnect/reset before headers. reset reason: connection termination

环境信息

  • EMQ X 版本:4.2.5
  • 操作系统及版本:Linux version 5.4.0-58-generic (buildd@lcy01-amd64-004) (gcc version 9.3.0 (Ubuntu 9.3.0-17ubuntu1~20.04)) #64-Ubuntu SMP Wed Dec 9 08:16:25 UTC 2020
  • 其他: 使用官方的模版仓库 https://repos.emqx.io/charts 创建的应用

配置文件

emqx 部署在k8s集群中,k8s中应用配置如下:

kind: Pod
apiVersion: v1
metadata:
  name: emqx-prod-1
  generateName: emqx-prod-
  namespace: wowo-prod
  labels:
    app: emqx
    app.kubernetes.io/instance: emqx-prod
    app.kubernetes.io/name: emqx
    controller-revision-hash: emqx-prod-b7ffb5667
    statefulset.kubernetes.io/pod-name: emqx-prod-1
    version: 4.2.5
  annotations:
    cni.projectcalico.org/podIP: 10.233.79.203/32
    cni.projectcalico.org/podIPs: 10.233.79.203/32
    kubesphere.io/containerSecrets: ''
    logging.kubesphere.io/logsidecar-config: '{}'
spec:
  volumes:
    - name: emqx-acl
      configMap:
        name: emqx-prod-acl
        items:
          - key: acl.conf
            path: acl.conf
        defaultMode: 420
    - name: emqx-data
      persistentVolumeClaim:
        claimName: emqx-prod
    - name: emqx-prod-token-2n28j
      secret:
        secretName: emqx-prod-token-2n28j
        defaultMode: 420
  containers:
    - name: emqx
      image: 'emqx/emqx:4.2.5'
      ports:
        - name: mqtt
          containerPort: 1883
          protocol: TCP
        - name: mqttssl
          containerPort: 8883
          protocol: TCP
        - name: mgmt
          containerPort: 8081
          protocol: TCP
        - name: ws
          containerPort: 8083
          protocol: TCP
        - name: wss
          containerPort: 8084
          protocol: TCP
        - name: dashboard
          containerPort: 18083
          protocol: TCP
        - name: ekka
          containerPort: 4370
          protocol: TCP
      envFrom:
        - configMapRef:
            name: emqx-prod-env
      env:
        - name: EMQX_NAME
          value: emqx-prod
        - name: EMQX_CLUSTER__K8S__APP_NAME
          value: emqx-prod
        - name: EMQX_CLUSTER__DISCOVERY
          value: k8s
        - name: EMQX_CLUSTER__K8S__SERVICE_NAME
          value: emqx-prod-headless
        - name: EMQX_CLUSTER__K8S__NAMESPACE
          value: wowo-prod
      resources:
        limits:
          cpu: '4'
          memory: 4000Mi
        requests:
          cpu: 200m
          memory: 512Mi
      volumeMounts:
        - name: emqx-data
          mountPath: /opt/emqx/data/mnesia
        - name: emqx-acl
          mountPath: /opt/emqx/etc/acl.conf
          subPath: acl.conf
        - name: emqx-prod-token-2n28j
          readOnly: true
          mountPath: /var/run/secrets/kubernetes.io/serviceaccount
      readinessProbe:
        httpGet:
          path: /status
          port: 8081
          scheme: HTTP
        initialDelaySeconds: 5
        timeoutSeconds: 1
        periodSeconds: 5
        successThreshold: 1
        failureThreshold: 3
      terminationMessagePath: /dev/termination-log
      terminationMessagePolicy: File
      imagePullPolicy: IfNotPresent
  restartPolicy: Always
  terminationGracePeriodSeconds: 30
  dnsPolicy: ClusterFirst
  serviceAccountName: emqx-prod
  serviceAccount: emqx-prod
  nodeName: kw4
  securityContext:
    fsGroup: 1000
  hostname: emqx-prod-1
  subdomain: emqx-prod-headless
  schedulerName: default-scheduler
  tolerations:
    - key: node.kubernetes.io/not-ready
      operator: Exists
      effect: NoExecute
      tolerationSeconds: 300
    - key: node.kubernetes.io/unreachable
      operator: Exists
      effect: NoExecute
      tolerationSeconds: 300
  priority: 0
  enableServiceLinks: true

emqx-management.conf 中是默认配置,没有修改,内容如下:

##--------------------------------------------------------------------
## EMQ X Management Plugin
##--------------------------------------------------------------------

## Max Row Limit
management.max_row_limit = 10000

## Application default secret
##
## Value: String
## management.application.default_secret = public

## Default Application ID
##
## Value: String
management.default_application.id = admin

## Default Application Secret
##
## Value: String
management.default_application.secret = public

##--------------------------------------------------------------------
## HTTP Listener

management.listener.http = 8081
management.listener.http.acceptors = 2
management.listener.http.max_clients = 512
management.listener.http.backlog = 512
management.listener.http.send_timeout = 15s
management.listener.http.send_timeout_close = on
management.listener.http.inet6 = false
management.listener.http.ipv6_v6only = false

##--------------------------------------------------------------------
## HTTPS Listener

## management.listener.https = 8081
## management.listener.https.acceptors = 2
## management.listener.https.max_clients = 512
## management.listener.https.backlog = 512
## management.listener.https.send_timeout = 15s
## management.listener.https.send_timeout_close = on
## management.listener.https.certfile = etc/certs/cert.pem
## management.listener.https.keyfile = etc/certs/key.pem
## management.listener.https.cacertfile = etc/certs/cacert.pem
## management.listener.https.verify = verify_peer
## management.listener.https.tls_versions = tlsv1.2,tlsv1.1,tlsv1
## management.listener.https.ciphers = ECDHE-ECDSA-AES256-GCM-SHA384,ECDHE-RSA-AES256-GCM-SHA384,ECDHE-ECDSA-AES256-SHA384,ECDHE-RSA-AES256-SHA384,ECDHE-ECDSA-DES-CBC3-SHA,ECDH-ECDSA-AES256-GCM-SHA384,ECDH-RSA-AES256-GCM-SHA384,ECDH-ECDSA-AES256-SHA384,ECDH-RSA-AES256-SHA384,DHE-DSS-AES256-GCM-SHA384,DHE-DSS-AES256-SHA256,AES256-GCM-SHA384,AES256-SHA256,ECDHE-ECDSA-AES128-GCM-SHA256,ECDHE-RSA-AES128-GCM-SHA256,ECDHE-ECDSA-AES128-SHA256,ECDHE-RSA-AES128-SHA256,ECDH-ECDSA-AES128-GCM-SHA256,ECDH-RSA-AES128-GCM-SHA256,ECDH-ECDSA-AES128-SHA256,ECDH-RSA-AES128-SHA256,DHE-DSS-AES128-GCM-SHA256,DHE-DSS-AES128-SHA256,AES128-GCM-SHA256,AES128-SHA256,ECDHE-ECDSA-AES256-SHA,ECDHE-RSA-AES256-SHA,DHE-DSS-AES256-SHA,ECDH-ECDSA-AES256-SHA,ECDH-RSA-AES256-SHA,AES256-SHA,ECDHE-ECDSA-AES128-SHA,ECDHE-RSA-AES128-SHA,DHE-DSS-AES128-SHA,ECDH-ECDSA-AES128-SHA,ECDH-RSA-AES128-SHA,AES128-SHA
## management.listener.https.fail_if_no_peer_cert = true
## management.listener.https.inet6 = false
## management.listener.https.ipv6_v6only = false

流量监控图如下: