1. 背景
线上有个后台应用下载时偶发Broken Pipe异常, 堆栈如下:
2022-03-10 18:04:36,075 WARN [Thread-2754] [c.x.b.p.a.w.c.RedeemController] fail to write excel stream
org.apache.catalina.connector.ClientAbortException: java.io.IOException: Broken pipe
at org.apache.catalina.connector.OutputBuffer.doFlush(OutputBuffer.java:330) ~[catalina.jar:8.5.59]
at org.apache.catalina.connector.OutputBuffer.flush(OutputBuffer.java:293) ~[catalina.jar:8.5.59]
at org.apache.catalina.connector.CoyoteOutputStream.flush(CoyoteOutputStream.java:118) ~[catalina.jar:8.5.59]
at com.ximalaya.business.promotion.admin.web.utils.RedeemExcelUtils.sendExcelStreamWithPassword(RedeemExcelUtils.java:97) ~[classes/:?]
at com.ximalaya.business.promotion.admin.web.controller.RedeemController.exportRedeemCodesExcel(RedeemController.java:381) ~[classes/:?]
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_241]
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_241]
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_241]
at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_241]
at org.springframework.web.method.support.InvocableHandlerMethod.doInvoke(InvocableHandlerMethod.java:190) ~[spring-web-5.2.12.RELEASE.jar:5.2.12.RELEASE]
at org.springframework.web.method.support.InvocableHandlerMethod.invokeForRequest(InvocableHandlerMethod.java:138) ~[spring-web-5.2.12.RELEASE.jar:5.2.12.RELEASE]
at org.springframework.web.servlet.mvc.method.annotation.ServletInvocableHandlerMethod.invokeAndHandle(ServletInvocableHandlerMethod.java:105) ~[spring-webmvc-5.2.12.RELEASE.jar:5.2.12.RELEASE]
at org.springframework.web.servlet.mvc.method.annotation.RequestMappingHandlerAdapter.invokeHandlerMethod(RequestMappingHandlerAdapter.java:878) ~[spring-webmvc-5.2.12.RELEASE.jar:5.2.12.RELEASE]
at org.springframework.web.servlet.mvc.method.annotation.RequestMappingHandlerAdapter.handleInternal(RequestMappingHandlerAdapter.java:792) ~[spring-webmvc-5.2.12.RELEASE.jar:5.2.12.RELEASE]
at org.springframework.web.servlet.mvc.method.AbstractHandlerMethodAdapter.handle(AbstractHandlerMethodAdapter.java:87) ~[spring-webmvc-5.2.12.RELEASE.jar:5.2.12.RELEASE]
at org.springframework.web.servlet.DispatcherServlet.doDispatch(DispatcherServlet.java:1040) ~[spring-webmvc-5.2.12.RELEASE.jar:5.2.12.RELEASE]
at org.springframework.web.servlet.DispatcherServlet.doService(DispatcherServlet.java:943) ~[spring-webmvc-5.2.12.RELEASE.jar:5.2.12.RELEASE]
at org.springframework.web.servlet.FrameworkServlet.processRequest(FrameworkServlet.java:1006) ~[spring-webmvc-5.2.12.RELEASE.jar:5.2.12.RELEASE]
at org.springframework.web.servlet.FrameworkServlet.doPost(FrameworkServlet.java:909) ~[spring-webmvc-5.2.12.RELEASE.jar:5.2.12.RELEASE]
at javax.servlet.http.HttpServlet.service(HttpServlet.java:652) ~[servlet-api.jar:?]
at org.springframework.web.servlet.FrameworkServlet.service(FrameworkServlet.java:883) ~[spring-webmvc-5.2.12.RELEASE.jar:5.2.12.RELEASE]
a t javax.servlet.http.HttpServlet.service(HttpServlet.java:733) ~[servlet-api.jar:?]
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:231) ~[catalina.jar:8.5.59]
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:166) ~[catalina.jar:8.5.59]
at org.apache.tomcat.websocket.server.WsFilter.doFilter(WsFilter.java:52) ~[tomcat-websocket.jar:8.5.59]
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:193) ~[catalina.jar:8.5.59]
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:166) ~[catalina.jar:8.5.59]
at com.ximalaya.ops.auth.data.filter.VirtualFilterChain.doFilter(VirtualFilterChain.java:28) ~[auth-data-filter-1.8.6.jar:?]
at com.ximalaya.ops.auth.data.filter.UrlFilter.doFilter(UrlFilter.java:136) ~[auth-data-filter-1.8.6.jar:?]
at com.ximalaya.ops.auth.data.filter.VirtualFilterChain.doFilter(VirtualFilterChain.java:38) ~[auth-data-filter-1.8.6.jar:?]
at org.jasig.cas.client.util.HttpServletRequestWrapperFilter.doFilter(HttpServletRequestWrapperFilter.java:71) ~[java-cas-client-1.8.6.jar:?]
at com.ximalaya.ops.auth.data.filter.VirtualFilterChain.doFilter(VirtualFilterChain.java:38) ~[auth-data-filter-1.8.6.jar:?]
at org.jasig.cas.client.util.AssertionThreadLocalFilter.doFilter(AssertionThreadLocalFilter.java:50) ~[java-cas-client-1.8.6.jar:?]
at com.ximalaya.ops.auth.data.filter.VirtualFilterChain.doFilter(VirtualFilterChain.java:38) ~[auth-data-filter-1.8.6.jar:?]
at org.jasig.cas.client.validation.AbstractTicketValidationFilter.doFilter(AbstractTicketValidationFilter.java:289) ~[java-cas-client-1.8.6.jar:?]
at com.ximalaya.ops.auth.data.filter.VirtualFilterChain.doFilter(VirtualFilterChain.java:38) ~[auth-data-filter-1.8.6.jar:?]
at org.jasig.cas.client.authentication.AuthenticationFilter.doFilter(AuthenticationFilter.java:226) ~[java-cas-client-1.8.6.jar:?]
at com.ximalaya.ops.auth.data.filter.VirtualFilterChain.doFilter(VirtualFilterChain.java:38) ~[auth-data-filter-1.8.6.jar:?]
at org.jasig.cas.client.session.SingleSignOutFilter.doFilter(SingleSignOutFilter.java:97) ~[java-cas-client-1.8.6.jar:?]
at com.ximalaya.ops.auth.data.filter.VirtualFilterChain.doFilter(VirtualFilterChain.java:38) ~[auth-data-filter-1.8.6.jar:?]
at com.ximalaya.ops.auth.data.filter.FilterChainProxy.doFilter(FilterChainProxy.java:77) ~[auth-data-filter-1.8.6.jar:?]
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:193) ~[catalina.jar:8.5.59]
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:166) ~[catalina.jar:8.5.59]
at org.springframework.web.filter.CharacterEncodingFilter.doFilterInternal(CharacterEncodingFilter.java:201) ~[spring-web-5.2.12.RELEASE.jar:5.2.12.RELEASE]
at org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:119) ~[spring-web-5.2.12.RELEASE.jar:5.2.12.RELEASE]
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:193) ~[catalina.jar:8.5.59]
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:166) ~[catalina.jar:8.5.59]
at com.ximalaya.xdcs.spring.filter.XmlyXdcsTraceFilter.doFilter(XmlyXdcsTraceFilter.java:263) ~[xdcs-spring-0.0.20.jar:0.0.20]
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:193) ~[catalina.jar:8.5.59]
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:166) ~[catalina.jar:8.5.59]
at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:199) ~[catalina.jar:8.5.59]
at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:97) ~[catalina.jar:8.5.59]
at org.apache.catalina.authenticator.AuthenticatorBase.invoke(AuthenticatorBase.java:544) ~[catalina.jar:8.5.59]
at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:143) ~[catalina.jar:8.5.59]
at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:81) ~[catalina.jar:8.5.59]
at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:78) ~[catalina.jar:8.5.59]
at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:343) ~[catalina.jar:8.5.59]
at org.apache.coyote.http11.Http11Processor.service(Http11Processor.java:616) ~[tomcat-coyote.jar:8.5.59]
at org.apache.coyote.AbstractProcessorLight.process(AbstractProcessorLight.java:65) ~[tomcat-coyote.jar:8.5.59]
at org.apache.coyote.AbstractProtocol$ConnectionHandler.process(AbstractProtocol.java:818) ~[tomcat-coyote.jar:8.5.59]
at org.apache.tomcat.util.net.Nio2Endpoint$SocketProcessor.doRun(Nio2Endpoint.java:1620) ~[tomcat-coyote.jar:8.5.59]
at org.apache.tomcat.util.net.SocketProcessorBase.run(SocketProcessorBase.java:49) ~[tomcat-coyote.jar:8.5.59]
at org.apache.tomcat.util.net.AbstractEndpoint.processSocket(AbstractEndpoint.java:1082) ~[tomcat-coyote.jar:8.5.59]
at org.apache.tomcat.util.net.Nio2Endpoint$Nio2SocketWrapper$2.completed(Nio2Endpoint.java:565) ~[tomcat-coyote.jar:8.5.59]
at org.apache.tomcat.util.net.Nio2Endpoint$Nio2SocketWrapper$2.completed(Nio2Endpoint.java:543) ~[tomcat-coyote.jar:8.5.59]
at sun.nio.ch.Invoker.invokeUnchecked(Invoker.java:126) ~[?:1.8.0_241]
at sun.nio.ch.Invoker$2.run(Invoker.java:218) ~[?:1.8.0_241]
at sun.nio.ch.AsynchronousChannelGroupImpl$1.run(AsynchronousChannelGroupImpl.java:112) ~[?:1.8.0_241]
at com.alibaba.ttl.TtlRunnable.run(TtlRunnable.java:59) ~[?:?]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_241]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_241]
at java.lang.Thread.run(Thread.java:748) [?:1.8.0_241]
Caused by: java.io.IOException: Broken pipe
at sun.nio.ch.FileDispatcherImpl.write0(Native Method) ~[?:1.8.0_241]
at sun.nio.ch.
2. 分析/解决历程
- 刚开始我以为是因为数据量大了所以超时, 因为数据本身要调RPC且大于1W数据时会按分表分页查询100次, 而且又是同步下载本身就慢就没管, 写了个手工处理的脚本来处理.
- 后来因为工单有点多, 就算用脚本处理也有点烦(打断工作), 累计处理时间可观, 而且还要跟运营解释. 最重要的是老板说没有产出. 于是从这个时候开始, 我打算从根本上一次性解决这个问题. 如果是因为数据量大同步下载导致, 就优化成异步来处理.
- 我联系到导出失败最近半年才有, 难道历史数据没有数据量大的吗? 于是我从表发现有大数据量历史数据, 且有成功导出记录, 这时我才发现并不是数据量的问题, 我开始转变方向, 开始排查问题是什么时候开始出现的, 发现和应用上云的时间是耦合的.
- 根据报错在网上找到的很多案例发现, 原因可能是: 服务端向Socket写数据时发现, Socket已经关闭了. 再联系到上云时的调用链路是有变化的, 以前是直接
h5 -> nginx -> 物理机 , (由于上云后应用IP不是固定的)后面变成了h5 -> nginx -> 应用网关 -> Pod节点 , 最后联系运维把网关的配置拉出来, 果然超时时间只配置了3s, 调到10s后就好了.
3. 复盘
- 根本原因: 连接被关闭,服务端往socket写数据时发现socket已经被关闭了.
- 排查思路: 最近做了哪些变更(链路和代码)
- 可能原因: 要么操作耗时优化, 要么排查整个链路各节点配置的超时时间
收获:
- 发现问题, 优先查看最近有没有变更, 如果影响面很大第一时间回滚
- 墨菲定律: 如果有问题, 肯定会出问题
- 不要怕暴露/躲避问题, 正视问题/解决问题(不丢人)
- 第一时间解决问题, 可以节省很多时间
|