开源版集群问题

3台2核4g 阿里云服务器组成的开源版集群(版本5.0.25),9800个连接,出现的不稳定情况(丢数据),早上峰值19000/分钟消息流入,晚上(晚上客户端会因无太阳没有电离线,早上出太阳了再自动上线)在尝试修改tcp类型监听器参数 接收器改成了128,Buffer改成了8kb,消息发布速率改成3000/s,第二天90%客户端未上线,查看服务器日志如下:

2025-08-28T22:22:02.812319+08:00 [error] supervisor: ‘esockd_connection_sup - <0.20704.2771>’, errorContext: connection_crashed, reason: {{case_clause,{error,einval}},[{emqx_connection,process_msg,2,[{file,“emqx_connection.erl”},{line,485}]},{emqx_connection,process_msg,2,[{file,“emqx_connection.erl”},{line,491}]},{emqx_connection,handle_recv,3,[{file,“emqx_connection.erl”},{line,447}]},{proc_lib,wake_up,3,[{file,“proc_lib.erl”},{line,236}]}]}, offender: [{pid,<0.24418.2771>},{name,connection},{mfargs,{emqx_connection,start_link,[#{enable_authn => true,limiter => #{client => #{bytes => #{burst => 0,divisible => false,failure_strategy => force,initial => 0,low_watermark => 0,max_retry_time => 10000,rate => 20971520.0},messages => #{burst => 0,divisible => false,failure_strategy => force,initial => 0,low_watermark => 0,max_retry_time => 10000,rate => 300.0}},connection => #{burst => 0,initial => 0,rate => 300.0}},listener => {tcp,default},zone => default}]}}]
2025-08-28T22:22:26.557458+08:00 [error] crasher: initial call: emqx_connection:init/4, pid: <0.2323.2771>, registered_name: , error: {{case_clause,{error,einval}},[{emqx_connection,process_msg,2,[{file,“emqx_connection.erl”},{line,485}]},{emqx_connection,process_msg,2,[{file,“emqx_connection.erl”},{line,491}]},{emqx_connection,handle_recv,3,[{file,“emqx_connection.erl”},{line,447}]},{proc_lib,wake_up,3,[{file,“proc_lib.erl”},{line,236}]}]}, ancestors: [<0.20704.2771>,<0.15901.2771>,esockd_sup,<0.5540.0>], message_queue_len: 0, messages: , links: [<0.20704.2771>], dictionary: [{outgoing_bytes,1584},{send_pkt,410},{outgoing_pubs,1},{incoming_bytes,29287},{recv_msg,110},{{publish,<<“GW”>>},{allow,1756390819115}},{authz_keys_q,{[{publish,<<“MS”>>}],[{publish,<<“GW”>>}]}},{authz_cache_size,2},{send_msg,1},{{publish,<<“MS”>>},{allow,1756390879402}},{guid,{1756390879402425,45706287646995,111}},{rand_seed,{#{bits => 58,jump => #Fun<rand.3.92093067>,next => #Fun<rand.0.92093067>,type => exsss,uniform => #Fun<rand.1.92093067>,uniform_n => #Fun<rand.2.92093067>},[219187285867595895|207517103419760650]}},{incoming_pubs,110},{‘recv_msg.qos1’,110},{recv_pkt,410},{‘send_msg.qos0’,1},{‘$logger_metadata$’,#{clientid => <<“1235704322440025”>>,peername => “39.144.129.1:57226”}}], trap_exit: false, status: running, heap_size: 4185, stack_size: 28, reductions: 5717785; neighbours:
2025-08-28T22:22:26.558048+08:00 [error] supervisor: ‘esockd_connection_sup - <0.20704.2771>’, errorContext: connection_crashed, reason: {{case_clause,{error,einval}},[{emqx_connection,process_msg,2,[{file,“emqx_connection.erl”},{line,485}]},{emqx_connection,process_msg,2,[{file,“emqx_connection.erl”},{line,491}]},{emqx_connection,handle_recv,3,[{file,“emqx_connection.erl”},{line,447}]},{proc_lib,wake_up,3,[{file,“proc_lib.erl”},{line,236}]}]}, offender: [{pid,<0.2323.2771>},{name,connection},{mfargs,{emqx_connection,start_link,[#{enable_authn => true,limiter => #{client => #{bytes => #{burst => 0,divisible => false,failure_strategy => force,initial => 0,low_watermark => 0,max_retry_time => 10000,rate => 20971520.0},messages => #{burst => 0,divisible => false,failure_strategy => force,initial => 0,low_watermark => 0,max_retry_time => 10000,rate => 300.0}},connection => #{burst => 0,initial => 0,rate => 300.0}},listener => {tcp,default},zone => default}]}}]
2025-08-28T22:25:32.731419+08:00 [error] crasher: initial call: emqx_connection:init/4, pid: <0.16977.2771>, registered_name: , error: {{case_clause,{error,einval}},[{emqx_connection,process_msg,2,[{file,“emqx_connection.erl”},{line,485}]},{emqx_connection,process_msg,2,[{file,“emqx_connection.erl”},{line,491}]},{emqx_connection,handle_recv,3,[{file,“emqx_connection.erl”},{line,447}]},{proc_lib,wake_up,3,[{file,“proc_lib.erl”},{line,236}]}]}, ancestors: [<0.20704.2771>,<0.15901.2771>,esockd_sup,<0.5540.0>], message_queue_len: 0, messages: , links: [<0.20704.2771>], dictionary: [{outgoing_bytes,1620},{send_pkt,422},{outgoing_pubs,1},{incoming_bytes,30643},{recv_msg,116},{authz_keys_q,{[{publish,<<“MS”>>}],}},{authz_cache_size,1},{send_msg,1},{{publish,<<“MS”>>},{allow,1756390853513}},{guid,{1756390863561356,45706287661649,117}},{rand_seed,{#{bits => 58,jump => #Fun<rand.3.92093067>,next => #Fun<rand.0.92093067>,type => exsss,uniform => #Fun<rand.1.92093067>,uniform_n => #Fun<rand.2.92093067>},[73162932004907565|29978153619352299]}},{incoming_pubs,116},{‘recv_msg.qos1’,116},{recv_pkt,422},{‘send_msg.qos0’,1},{‘$logger_metadata$’,#{clientid => <<“1796104322280283”>>,peername => “39.144.129.57:9875”}}], trap_exit: false, status: running, heap_size: 2586, stack_size: 28, reductions: 5901615; neighbours:
2025-08-28T22:25:32.731974+08:00 [error] supervisor: ‘esockd_connection_sup - <0.20704.2771>’, errorContext: connection_crashed, reason: {{case_clause,{error,einval}},[{emqx_connection,process_msg,2,[{file,“emqx_connection.erl”},{line,485}]},{emqx_connection,process_msg,2,[{file,“emqx_connection.erl”},{line,491}]},{emqx_connection,handle_recv,3,[{file,“emqx_connection.erl”},{line,447}]},{proc_lib,wake_up,3,[{file,“proc_lib.erl”},{line,236}]}]}, offender: [{pid,<0.16977.2771>},{name,connection},{mfargs,{emqx_connection,start_link,[#{enable_authn => true,limiter => #{client => #{bytes => #{burst => 0,divisible => false,failure_strategy => force,initial => 0,low_watermark => 0,max_retry_time => 10000,rate => 20971520.0},messages => #{burst => 0,divisible => false,failure_strategy => force,initial => 0,low_watermark => 0,max_retry_time => 10000,rate => 300.0}},connection => #{burst => 0,initial => 0,rate => 300.0}},listener => {tcp,default},zone => default}]}}]
2025-08-29T00:06:19.742951+08:00 [error] crasher: initial call: emqx_connection:init/4, pid: <0.17718.2771>, registered_name: , error: {{case_clause,{error,einval}},[{emqx_connection,process_msg,2,[{file,“emqx_connection.erl”},{line,485}]},{emqx_connection,process_msg,2,[{file,“emqx_connection.erl”},{line,491}]},{emqx_connection,handle_recv,3,[{file,“emqx_connection.erl”},{line,447}]},{proc_lib,wake_up,3,[{file,“proc_lib.erl”},{line,236}]}]}, ancestors: [<0.20704.2771>,<0.15901.2771>,esockd_sup,<0.5540.0>], message_queue_len: 0, messages: , links: [<0.20704.2771>], dictionary: [{guid,{1756397179741377,45706287662390,135}},{incoming_pubs,133},{{publish,<<“q2a86fov7n6vv446”>>},{allow,1756397019550}},{outgoing_bytes,977},{send_pkt,348},{{publish,<<“92m710xk61m2l2l6”>>},{allow,1756396870560}},{{publish,<<“6ke3aq8a23msne66”>>},{allow,1756396950656}},{incoming_bytes,4872},{recv_msg,133},{authz_keys_q,{[{publish,<<“willTopic”>>},{publish,<<“f9044kt52m0lgg2r”>>},{publish,<<“hx3e15rd7yt5t436”>>},{publish,<<“q2a86fov7n6vv446”>>},{publish,<<“ku8z53z91n3y5fvr”>>},{publish,<<“6ke3aq8a23msne66”>>},{publish,<<“0p1e5lt7uohiot36”>>}],[{publish,<<“92m710xk61m2l2l6”>>}]}},{authz_cache_size,8},{{publish,<<“0p1e5lt7uohiot36”>>},{allow,1756396940608}},{rand_seed,{#{bits => 58,jump => #Fun<rand.3.92093067>,next => #Fun<rand.0.92093067>,type => exsss,uniform => #Fun<rand.1.92093067>,uniform_n => #Fun<rand.2.92093067>},[134486123903531962|268080830311136365]}},{{publish,<<“ku8z53z91n3y5fvr”>>},{allow,1756396960704}},{‘recv_msg.qos1’,133},{recv_pkt,349},{{publish,<<“hx3e15rd7yt5t436”>>},{allow,1756397109693}},{{publish,<<“f9044kt52m0lgg2r”>>},{allow,1756397179741}},{{publish,<<“willTopic”>>},{allow,1756397179742}},{‘$logger_metadata$’,#{clientid => <<“icloud-api-prod-2f4104f702fc-510661”>>,peername => “172.22.245.156:57970”}}], trap_exit: false, status: running, heap_size: 2586, stack_size: 28, reductions: 4178208; neighbours:
2025-08-29T00:06:19.743708+08:00 [error] supervisor: ‘esockd_connection_sup - <0.20704.2771>’, errorContext: connection_crashed, reason: {{case_clause,{error,einval}},[{emqx_connection,process_msg,2,[{file,“emqx_connection.erl”},{line,485}]},{emqx_connection,process_msg,2,[{file,“emqx_connection.erl”},{line,491}]},{emqx_connection,handle_recv,3,[{file,“emqx_connection.erl”},{line,447}]},{proc_lib,wake_up,3,[{file,“proc_lib.erl”},{line,236}]}]}, offender: [{pid,<0.17718.2771>},{name,connection},{mfargs,{emqx_connection,start_link,[#{enable_authn => true,limiter => #{client => #{bytes => #{burst => 0,divisible => false,failure_strategy => force,initial => 0,low_watermark => 0,max_retry_time => 10000,rate => 20971520.0},messages => #{burst => 0,divisible => false,failure_strategy => force,initial => 0,low_watermark => 0,max_retry_time => 10000,rate => 300.0}},connection => #{burst => 0,initial => 0,rate => 300.0}},listener => {tcp,default},zone => default}]}}]
2025-08-29T00:07:12.275165+08:00 [error] crasher: initial call: emqx_connection:init/4, pid: <0.21470.2771>, registered_name: , error: {{case_clause,{error,einval}},[{emqx_connection,process_msg,2,[{file,“emqx_connection.erl”},{line,485}]},{emqx_connection,process_msg,2,[{file,“emqx_connection.erl”},{line,491}]},{emqx_connection,handle_recv,3,[{file,“emqx_connection.erl”},{line,447}]},{proc_lib,wake_up,3,[{file,“proc_lib.erl”},{line,236}]}]}, ancestors: [<0.20704.2771>,<0.15901.2771>,esockd_sup,<0.5540.0>], message_queue_len: 0, messages: , links: [<0.20704.2771>], dictionary: [{authz_keys_q,{[{publish,<<“willTopic”>>},{publish,<<“33lbo29i595i7496”>>},{publish,<<“9y5q0z885h9r95a6”>>},{publish,<<“463a85v0qjwlr946”>>},{publish,<<“fumu7qjqflkqvlv6”>>},{publish,<<“e9bb661aty8t0706”>>},{publish,<<“q5s12w326p351am6”>>},{publish,<<“f9044kt52m0lgg2r”>>},{publish,<<“hx3e15rd7yt5t436”>>},{publish,<<“q2a86fov7n6vv446”>>},{publish,<<“ku8z53z91n3y5fvr”>>},{publish,<<“6ke3aq8a23msne66”>>},{publish,<<“0p1e5lt7uohiot36”>>}],[{publish,<<“92m710xk61m2l2l6”>>}]}},{authz_cache_size,14},{{publish,<<“q5s12w326p351am6”>>},{allow,1756396610735}},{guid,{1756397172273389,45706287666142,138}},{{publish,<<“e9bb661aty8t0706”>>},{allow,1756396620783}},{{publish,<<“q2a86fov7n6vv446”>>},{allow,1756396189916}},{incoming_pubs,136},{outgoing_bytes,996},{send_pkt,356},{{publish,<<“92m710xk61m2l2l6”>>},{allow,1756396049532}},{{publish,<<“6ke3aq8a23msne66”>>},{allow,1756396129628}},{incoming_bytes,4915},{recv_msg,136},{{publish,<<“463a85v0qjwlr946”>>},{allow,1756396811120}},{{publish,<<“0p1e5lt7uohiot36”>>},{allow,1756396119580}},{{publish,<<“9y5q0z885h9r95a6”>>},{allow,1756397041936}},{{publish,<<“ku8z53z91n3y5fvr”>>},{allow,1756396139676}},{rand_seed,{#{bits => 58,jump => #Fun<rand.3.92093067>,next => #Fun<rand.0.92093067>,type => exsss,uniform => #Fun<rand.1.92093067>,uniform_n => #Fun<rand.2.92093067>},[227564715351458169|50996265054166680]}},{{publish,<<“33lbo29i595i7496”>>},{allow,1756397172273}},{‘recv_msg.qos1’,136},{recv_pkt,357},{{publish,<<“hx3e15rd7yt5t436”>>},{allow,1756396280060}},{{publish,<<“f9044kt52m0lgg2r”>>},{allow,1756396330299}},{{publish,<<“willTopic”>>},{allow,1756397232274}},{‘$logger_metadata$’,#{clientid => <<“icloud-api-prod-184a4adeaf4d-378372”>>,peername => “172.22.245.155:59080”}},{{publish,<<“fumu7qjqflkqvlv6”>>},{allow,1756396740783}}], trap_exit: false, status: running, heap_size: 1598, stack_size: 28, reductions: 4259701; neighbours:
2025-08-29T00:07:12.275967+08:00 [error] supervisor: ‘esockd_connection_sup - <0.20704.2771>’, errorContext: connection_crashed, reason: {{case_clause,{error,einval}},[{emqx_connection,process_msg,2,[{file,“emqx_connection.erl”},{line,485}]},{emqx_connection,process_msg,2,[{file,“emqx_connection.erl”},{line,491}]},{emqx_connection,handle_recv,3,[{file,“emqx_connection.erl”},{line,447}]},{proc_lib,wake_up,3,[{file,“proc_lib.erl”},{line,236}]}]}, offender: [{pid,<0.21470.2771>},{name,connection},{mfargs,{emqx_connection,start_link,[#{enable_authn => true,limiter => #{client => #{bytes => #{burst => 0,divisible => false,failure_strategy => force,initial => 0,low_watermark => 0,max_retry_time => 10000,rate => 20971520.0},messages => #{burst => 0,divisible => false,failure_strategy => force,initial => 0,low_watermark => 0,max_retry_time => 10000,rate => 300.0}},connection => #{burst => 0,initial => 0,rate => 300.0}},listener => {tcp,default},zone => default}]}}]
2025-08-29T00:09:16.504273+08:00 [error] crasher: initial call: emqx_connection:init/4, pid: <0.21459.2771>, registered_name: , error: {{case_clause,{error,einval}},[{emqx_connection,process_msg,2,[{file,“emqx_connection.erl”},{line,485}]},{emqx_connection,process_msg,2,[{file,“emqx_connection.erl”},{line,491}]},{emqx_connection,handle_recv,3,[{file,“emqx_connection.erl”},{line,447}]},{proc_lib,wake_up,3,[{file,“proc_lib.erl”},{line,236}]}]}, ancestors: [<0.20704.2771>,<0.15901.2771>,esockd_sup,<0.5540.0>], message_queue_len: 0, messages: , links: [<0.20704.2771>], dictionary: [{outgoing_bytes,1006},{send_pkt,359},{{publish,<<“8pow08wk757ubgj0”>>},{allow,1756397296854}},{incoming_bytes,49316},{recv_msg,215},{authz_keys_q,{[{publish,<<“willTopic”>>}],[{publish,<<“8pow08wk757ubgj0”>>}]}},{authz_cache_size,2},{guid,{1756397296854406,45706287666131,217}},{incoming_pubs,215},{rand_seed,{#{bits => 58,jump => #Fun<rand.3.92093067>,next => #Fun<rand.0.92093067>,type => exsss,uniform => #Fun<rand.1.92093067>,uniform_n => #Fun<rand.2.92093067>},[30033897166717295|176613466736680649]}},{‘recv_msg.qos0’,82},{‘recv_msg.qos1’,133},{recv_pkt,442},{{publish,<<“willTopic”>>},{allow,1756397356503}},{‘$logger_metadata$’,#{clientid => <<“icloud-api-prod-a5d41b2144b9-696050”>>,peername => “172.22.96.248:42386”}}], trap_exit: false, status: running, heap_size: 1598, stack_size: 28, reductions: 4453630; neighbours:

请帮忙分析一下

einval 是 “Invalid argument” (无效参数) 的缩写,通常是底层操作系统调用返回的错误。这表明 EMQX 在处理客户端数据时,向操作系统发起了一个带有无效参数的请求(比如对 socket 进行读写操作时传入了错误的参数)。

没必要把接收器改到 128. 你才 2 核。改成 4或8 就应该算大了。大了也没什么用。