https://mp.weixin.qq.com/s?__biz=MzAwNDY1ODY2OQ==&mid=2649288031&idx=1&sn=91c94e16460a4685a9c0c8e1b9c362a6&chksm=8334c9ddb44340cb66e6ce512ca41592fb483c148419737dbe21f9bbc2bfc2f872d1e54d1641&scene=178&cur_album_id=1955379809983741955#rd
微信公众号,WeMobileDev?2021年7月19日发布的?微信Android客户端的ANR监控方案
该方案的所有代码已经在Matrix(https://github.com/Tencent/matrix)中开源,这篇文章将详细讲解源码实现。

?
1.SignalAnrTracer?onAlive方法里调用nativeInitSignalAnrDetective方法监听SIGQUIT信号
public class SignalAnrTracer extends Tracer {
//region 参数
private static final String TAG = "SignalAnrTracer";
//检测anr线程名字
//监控到SIGQUIT后,我们在20秒内(20秒是ANR dump的timeout时间)不断轮询自己是否有NOT_RESPONDING flag
//一旦发现有这个flag,那么马上就可以认定发生了一次ANR。
private static final String CHECK_ANR_STATE_THREAD_NAME = "Check-ANR-State-Thread";
//检测NOT_RESPONDING flag间隔时间
private static final int CHECK_ERROR_STATE_INTERVAL = 500;
//dump最长时间20s
private static final int ANR_DUMP_MAX_TIME = 20000;
//检测error次数
private static final int CHECK_ERROR_STATE_COUNT =
ANR_DUMP_MAX_TIME / CHECK_ERROR_STATE_INTERVAL;
//前台消息,超时2s的时候,说明卡住了
private static final long FOREGROUND_MSG_THRESHOLD = -2000;
//后台消息,超时2s的时候,说明卡住了
private static final long BACKGROUND_MSG_THRESHOLD = -10000;
//是否hasInstance
public static boolean hasInstance = false;
//是否是前台状态
private static boolean currentForeground = false;
//anr trace 文件路径
private static String sAnrTraceFilePath = "";
// 这个Hook Trace的方案,不仅仅可以用来查ANR问题,任何时候我们都可以手动向自己发送一个SIGQUIT信号,
// 从而hook到当时的Trace。Trace的内容对于我们排查线程死锁,线程异常,耗电等问题都非常有帮助。
//打印trace 文件路径 ,自己触发的
private static String sPrintTraceFilePath = "";
//监听
private static SignalAnrDetectedListener sSignalAnrDetectedListener;
//sApplication
private static Application sApplication;
//是否初始化了
private static boolean hasInit = false;
//anr发生时间,负值
private static long anrMessageWhen = 0L;
//anr发生时主线程处理的消息
private static String anrMessageString = "";
//endregion
static {
//加载trace-canary lib
System.loadLibrary("trace-canary");
}
//region 构造函数
public SignalAnrTracer(TraceConfig traceConfig) {
hasInstance = true;
sAnrTraceFilePath = traceConfig.anrTraceFilePath;
sPrintTraceFilePath = traceConfig.printTraceFilePath;
}
public SignalAnrTracer(Application application) {
hasInstance = true;
sApplication = application;
}
public SignalAnrTracer(Application application, String anrTraceFilePath, String printTraceFilePath) {
hasInstance = true;
sAnrTraceFilePath = anrTraceFilePath;
sPrintTraceFilePath = printTraceFilePath;
sApplication = application;
}
//endregion
/**
* AnrDumper.cc里 handleSignal
*/
@RequiresApi(api = Build.VERSION_CODES.M)
@Keep
private static void onANRDumped() {
//是否是前台
currentForeground = AppForegroundUtil.isInterestingToUser();
//是否是主线程堵塞了,需要report
boolean needReport = isMainThreadBlocked();
//有两种情况,主线程消息已经堵住了,或者开启一个线程检测状态 NOT_RESPONDING
//需要report
if (needReport) {
report(false);
} else {
// 监控到SIGQUIT后,我们在20秒内(20秒是ANR dump的timeout时间)不断轮询自己是否有NOT_RESPONDING flag
// ,一旦发现有这个flag,那么马上就可以认定发生了一次ANR。
new Thread(new Runnable() {
@Override
public void run() {
//开启了一个线程检查
checkErrorStateCycle();
}
}, CHECK_ANR_STATE_THREAD_NAME).start();
}
}
@Keep
private static void onANRDumpTrace() {
try {
MatrixUtil.printFileByLine(TAG, sAnrTraceFilePath);
} catch (Throwable t) {
MatrixLog.e(TAG, "onANRDumpTrace error: %s", t.getMessage());
}
}
//endregion
@Keep
private static void onPrintTrace() {
try {
MatrixUtil.printFileByLine(TAG, sPrintTraceFilePath);
} catch (Throwable t) {
MatrixLog.e(TAG, "onPrintTrace error: %s", t.getMessage());
}
}
/**
* @param fromProcessErrorState false代表主线程阻塞了
*/
private static void report(boolean fromProcessErrorState) {
try {
String stackTrace = Utils.getMainThreadJavaStackTrace();
if (sSignalAnrDetectedListener != null) {
sSignalAnrDetectedListener.onAnrDetected(stackTrace, anrMessageString, anrMessageWhen, fromProcessErrorState);
return;
}
TracePlugin plugin = Matrix.with().getPluginByClass(TracePlugin.class);
if (null == plugin) {
return;
}
String scene = AppMethodBeat.getVisibleScene();
JSONObject jsonObject = new JSONObject();
jsonObject = DeviceUtil.getDeviceInfo(jsonObject, Matrix.with().getApplication());
jsonObject.put(SharePluginInfo.ISSUE_STACK_TYPE, Constants.Type.SIGNAL_ANR);
jsonObject.put(SharePluginInfo.ISSUE_SCENE, scene);
jsonObject.put(SharePluginInfo.ISSUE_THREAD_STACK, stackTrace);
jsonObject.put(SharePluginInfo.ISSUE_PROCESS_FOREGROUND, currentForeground);
Issue issue = new Issue();
issue.setTag(SharePluginInfo.TAG_PLUGIN_EVIL_METHOD);
issue.setContent(jsonObject);
plugin.onDetectIssue(issue);
MatrixLog.e(TAG, "happens real ANR : %s ", jsonObject.toString());
} catch (JSONException e) {
MatrixLog.e(TAG, "[JSONException error: %s", e);
}
}
//通过消息时间,来判断是否到超出阈值
@RequiresApi(api = Build.VERSION_CODES.M)
private static boolean isMainThreadBlocked() {
try {
MessageQueue mainQueue = Looper.getMainLooper().getQueue();
Field field = mainQueue.getClass().getDeclaredField("mMessages");
field.setAccessible(true);
final Message mMessage = (Message) field.get(mainQueue);
if (mMessage != null) {
anrMessageString = mMessage.toString();
long when = mMessage.getWhen();
if (when == 0) {
return false;
}
long time = when - SystemClock.uptimeMillis();
anrMessageWhen = time;
long timeThreshold = BACKGROUND_MSG_THRESHOLD;
if (currentForeground) {
timeThreshold = FOREGROUND_MSG_THRESHOLD;
}
return time < timeThreshold;
}
} catch (Exception e) {
return false;
}
return false;
}
private static void checkErrorStateCycle() {
int checkErrorStateCount = 0;
//开启一个循环检测
while (checkErrorStateCount < CHECK_ERROR_STATE_COUNT) {
try {
checkErrorStateCount++;
boolean myAnr = checkErrorState();
if (myAnr) {
report(true);
break;
}
Thread.sleep(CHECK_ERROR_STATE_INTERVAL);
} catch (Throwable t) {
MatrixLog.e(TAG, "checkErrorStateCycle error, e : " + t.getMessage());
break;
}
}
}
//用来判断anr发生了
// 在ANR弹窗前,会执行到makeAppNotRespondingLocked方法中,在这里会给发生ANR进程标记一个NOT_RESPONDING的flag。
// 而这个flag我们可以通过ActivityManager来获取:
private static boolean checkErrorState() {
try {
Application application =
sApplication == null ? Matrix.with().getApplication() : sApplication;
ActivityManager am = (ActivityManager) application
.getSystemService(Context.ACTIVITY_SERVICE);
//从ActivityManager 获取ProcessErrorStateInfo
List<ActivityManager.ProcessErrorStateInfo> procs = am.getProcessesInErrorState();
if (procs == null) return false;
for (ActivityManager.ProcessErrorStateInfo proc : procs) {
MatrixLog.i(TAG, "[checkErrorState] found Error State proccessName = %s, proc.condition = %d", proc.processName, proc.condition);
if (proc.uid != android.os.Process.myUid()
&& proc.condition == ActivityManager.ProcessErrorStateInfo.NOT_RESPONDING) {
MatrixLog.i(TAG, "maybe received other apps ANR signal");
}
if (proc.pid != android.os.Process.myPid()) continue;
if (proc.condition != ActivityManager.ProcessErrorStateInfo.NOT_RESPONDING) {
continue;
}
//只有是自己进程,并且是NOT_RESPONDING的时候,才返回true
return true;
}
return false;
} catch (Throwable t) {
MatrixLog.e(TAG, "[checkErrorState] error : %s", t.getMessage());
}
return false;
}
//ok
public static void printTrace() {
if (!hasInstance) {
MatrixLog.e(TAG, "SignalAnrTracer has not been initialize");
return;
}
if (sPrintTraceFilePath.equals("")) {
MatrixLog.e(TAG, "PrintTraceFilePath has not been set");
return;
}
nativePrintTrace();
}
private static native void nativeInitSignalAnrDetective(String anrPrintTraceFilePath, String printTraceFilePath);
private static native void nativeFreeSignalAnrDetective();
private static native void nativePrintTrace();
@Override
protected void onAlive() {
super.onAlive();
if (!hasInit) {
//调用native方法启动监听
nativeInitSignalAnrDetective(sAnrTraceFilePath, sPrintTraceFilePath);
//主要用来判断是否是前台
AppForegroundUtil.INSTANCE.init();
hasInit = true;
}
}
@Override
protected void onDead() {
super.onDead();
//free anr检测
nativeFreeSignalAnrDetective();
}
public void setSignalAnrDetectedListener(SignalAnrDetectedListener listener) {
sSignalAnrDetectedListener = listener;
}
public interface SignalAnrDetectedListener {
void onAnrDetected(String stackTrace, String mMessageString, long mMessageWhen, boolean fromProcessErrorState);
}
}
2.MatrixTracer.cc??
2.1 JNI_OnLoad初始化,双向绑定函数
2.2 nativeInitSignalAnrDetective,开启检测,真正检测的地方在AnrDumper.cc
2.3?AnrDumper.cc 里handleSignal里调用MatrixTracer anrDumpCallback ,表示anr可能发生了,通知SignalAnrTracer检测ui线程是否block或者状态为NOT_RESPONDING。并调用hookAnrTraceWrite方法,开启hook,为了找到write trace的点
2.4?my_connect,my_open是开始socket通信了,主要为了检测socket通信之后的write方法
2.5?my_write是我们的write方法
#define PROP_VALUE_MAX 92 //用于求getApiLevel
#define PROP_SDK_NAME "ro.build.version.sdk" //用于求getApiLevel
#define HOOK_CONNECT_PATH "/dev/socket/tombstoned_java_trace" //socket文件地址
#define HOOK_OPEN_PATH "/data/anr/traces.txt" //socket文件地址
using namespace MatrixTracer;
static std::optional<AnrDumper> sAnrDumper; //AnrDumper,是自定义的SignalHandler
static bool isTraceWrite = false; //isTraceWrite my_connect my_open设置为true,my_write设置为false
static bool fromMyPrintTrace = false; //fromMyPrintTrace 是否是自己想打的
static bool isHooking = false; //是否hooking,unHookAnrTraceWrite设置为false
static std::string anrTracePathstring; //新的anrTracePathstring,系统用的
static std::string printTracePathstring; //新的printTracePathstring,我自己想打印的时候用的
static int signalCatcherTid; //signalCatcherTid的线程id
//一个结构体,用来保存java层 类,方法地址
static struct StacktraceJNI {
jclass AnrDetective; //SignalAnrTracer
jclass ThreadPriorityDetective;
jmethodID AnrDetector_onANRDumped; //SignalAnrTracer 里的
jmethodID AnrDetector_onANRDumpTrace; //SignalAnrTracer 里的
jmethodID AnrDetector_onPrintTrace; //SignalAnrTracer 里的
jmethodID ThreadPriorityDetective_onMainThreadPriorityModified;
jmethodID ThreadPriorityDetective_onMainThreadTimerSlackModified;
} gJ;
//region MainThreadPriorityModified相关的东西
int (*original_setpriority)(int __which, id_t __who, int __priority);
int my_setpriority(int __which, id_t __who, int __priority) {
if (__priority <= 0) {
return original_setpriority(__which, __who, __priority);
}
if (__who == 0 && getpid() == gettid()) {
JNIEnv *env = JniInvocation::getEnv();
env->CallStaticVoidMethod(gJ.ThreadPriorityDetective,
gJ.ThreadPriorityDetective_onMainThreadPriorityModified,
__priority);
} else if (__who == getpid()) {
JNIEnv *env = JniInvocation::getEnv();
env->CallStaticVoidMethod(gJ.ThreadPriorityDetective,
gJ.ThreadPriorityDetective_onMainThreadPriorityModified,
__priority);
}
return original_setpriority(__which, __who, __priority);
}
int (*original_prctl)(int option, unsigned long arg2, unsigned long arg3,
unsigned long arg4, unsigned long arg5);
int my_prctl(int option, unsigned long arg2, unsigned long arg3,
unsigned long arg4, unsigned long arg5) {
if (option == PR_SET_TIMERSLACK) {
if (gettid() == getpid() && arg2 > 50000) {
JNIEnv *env = JniInvocation::getEnv();
env->CallStaticVoidMethod(gJ.ThreadPriorityDetective,
gJ.ThreadPriorityDetective_onMainThreadTimerSlackModified,
arg2);
}
}
return original_prctl(option, arg2, arg3, arg4, arg5);
}
//endregion
/**
*
* @param content 内容
* @param filePath 文件地址
*/
void writeAnr(const std::string &content, const std::string &filePath) {
//unhook write
unHookAnrTraceWrite();
std::stringstream stringStream(content);
std::string to;
std::ofstream outfile;
outfile.open(filePath);
outfile << content;
}
//region my_connect original_connect
int (*original_connect)(int __fd, const struct sockaddr *__addr, socklen_t __addr_length);
int my_connect(int __fd, const struct sockaddr *__addr, socklen_t __addr_length) {
if (__addr != nullptr) {
//hook connect方法,检测sockaddr地址是否为HOOK_CONNECT_PATH,表明是signal检测线程
if (strcmp(__addr->sa_data, HOOK_CONNECT_PATH) == 0) {
//设置signal检测线程id
signalCatcherTid = gettid();
//标记开始打印
isTraceWrite = true;
}
}
return original_connect(__fd, __addr, __addr_length);
}
//endregion
//region my_open original_open
int (*original_open)(const char *pathname, int flags, mode_t mode);
int my_open(const char *pathname, int flags, mode_t mode) {
if (pathname != nullptr) {
//hook connect方法,检测sockaddr地址是否为HOOK_OPEN_PATH,表明是signal检测线程
if (strcmp(pathname, HOOK_OPEN_PATH) == 0) {
//设置signal检测线程id
signalCatcherTid = gettid();
//标记开始打印
isTraceWrite = true;
}
}
return original_open(pathname, flags, mode);
}
//endregion
//region original_write my_write
ssize_t (*original_write)(int fd, const void *const __pass_object_size0 buf, size_t count);
ssize_t my_write(int fd, const void *const buf, size_t count) {
//如果标记为isTraceWrite为true,第一个signalCatcher线程,write调用即为打印trace的地方
if (isTraceWrite && gettid() == signalCatcherTid) {
isTraceWrite = false;
signalCatcherTid = 0;
if (buf != nullptr) {
std::string targetFilePath;
if (fromMyPrintTrace) {
targetFilePath = printTracePathstring;
} else {
targetFilePath = anrTracePathstring;
}
if (!targetFilePath.empty()) {
char *content = (char *) buf;
writeAnr(content, targetFilePath);
if (!fromMyPrintTrace) {
anrDumpTraceCallback();
} else {
printTraceCallback();
}
fromMyPrintTrace = false;
}
}
}
return original_write(fd, buf, count);
}
//endregion
//调用java的onANRDumped,AnrDumper.cc 里handleSignal里调用anrCallback然后调用这个anrDumpCallback回调
bool anrDumpCallback() {
JNIEnv *env = JniInvocation::getEnv();
if (!env) return false;
env->CallStaticVoidMethod(gJ.AnrDetective, gJ.AnrDetector_onANRDumped);
return true;
}
//调用java的onANRDumpTrace,my_write里调用
bool anrDumpTraceCallback() {
JNIEnv *env = JniInvocation::getEnv();
if (!env) return false;
env->CallStaticVoidMethod(gJ.AnrDetective, gJ.AnrDetector_onANRDumpTrace);
return true;
}
//调用java的onPrintTrace,my_write里调用
bool printTraceCallback() {
JNIEnv *env = JniInvocation::getEnv();
if (!env) return false;
env->CallStaticVoidMethod(gJ.AnrDetective, gJ.AnrDetector_onPrintTrace);
return true;
}
//ok
int getApiLevel() {
char buf[PROP_VALUE_MAX];
int len = __system_property_get(PROP_SDK_NAME, buf);
if (len <= 0)
return 0;
return atoi(buf);
}
/**
* @param isSiUser true为自己的进程
* AnrDumper.cc 里handleSignal里调用anrCallback方法,或者调用siUserCallback,然后调用这个hookAnrTraceWrite回调
*/
void hookAnrTraceWrite(bool isSiUser) {
int apiLevel = getApiLevel();
if (apiLevel < 19) {
return;
}
//isSiUser为true,表示自己进程发的时候是通过kill发的,此处不符合逻辑,返回
if (!fromMyPrintTrace && isSiUser) {
return;
}
if (isHooking) {
return;
}
isHooking = true;
if (apiLevel >= 27) {
void *libcutils_info = xhook_elf_open("/system/lib64/libcutils.so");
if (!libcutils_info) {
libcutils_info = xhook_elf_open("/system/lib/libcutils.so");
}
xhook_hook_symbol(libcutils_info, "connect", (void *) my_connect,
(void **) (&original_connect));
} else {
void *libart_info = xhook_elf_open("libart.so");
xhook_hook_symbol(libart_info, "open", (void *) my_open, (void **) (&original_open));
}
if (apiLevel >= 30 || apiLevel == 25 || apiLevel == 24) {
void *libc_info = xhook_elf_open("libc.so");
xhook_hook_symbol(libc_info, "write", (void *) my_write, (void **) (&original_write));
} else if (apiLevel == 29) {
void *libbase_info = xhook_elf_open("/system/lib64/libbase.so");
if (!libbase_info) {
libbase_info = xhook_elf_open("/system/lib/libbase.so");
}
xhook_hook_symbol(libbase_info, "write", (void *) my_write, (void **) (&original_write));
xhook_elf_close(libbase_info);
} else {
void *libart_info = xhook_elf_open("libart.so");
xhook_hook_symbol(libart_info, "write", (void *) my_write, (void **) (&original_write));
}
}
//unhook
void unHookAnrTraceWrite() {
int apiLevel = getApiLevel();
if (apiLevel >= 27) {
void *libcutils_info = xhook_elf_open("/system/lib64/libcutils.so");
xhook_hook_symbol(libcutils_info, "connect", (void *) original_connect, nullptr);
} else {
void *libart_info = xhook_elf_open("libart.so");
xhook_hook_symbol(libart_info, "open", (void *) original_connect, nullptr);
}
if (apiLevel >= 30 || apiLevel == 25 || apiLevel == 24) {
void *libc_info = xhook_elf_open("libc.so");
xhook_hook_symbol(libc_info, "write", (void *) original_write, nullptr);
} else if (apiLevel == 29) {
void *libbase_info = xhook_elf_open("/system/lib64/libbase.so");
xhook_hook_symbol(libbase_info, "write", (void *) original_write, nullptr);
} else {
void *libart_info = xhook_elf_open("libart.so");
xhook_hook_symbol(libart_info, "write", (void *) original_write, nullptr);
}
isHooking = false;
}
//初始化,开启检测Signalanr检测,真正检测的地方在AnrDumper.cc
static void
nativeInitSignalAnrDetective(JNIEnv *env, jclass, jstring anrTracePath, jstring printTracePath) {
//anr发生时,打印path
const char *anrTracePathChar = env->GetStringUTFChars(anrTracePath, nullptr);
//手动发送SIGQUIT,打印的trace地址
const char *printTracePathChar = env->GetStringUTFChars(printTracePath, nullptr);
anrTracePathstring = std::string(anrTracePathChar);
printTracePathstring = std::string(printTracePathChar);
//开启检测,真正检测的地方在AnrDumper.cc
sAnrDumper.emplace(anrTracePathChar, printTracePathChar, anrDumpCallback);
}
//Free Signal Anr Detective 重置,释放
static void nativeFreeSignalAnrDetective(JNIEnv *env, jclass) {
//重置,释放
sAnrDumper.reset();
}
//region MainThreadPriority相关 ,先不看
static void nativeInitMainThreadPriorityDetective(JNIEnv *env, jclass) {
xhook_register(".*\\.so$", "setpriority", (void *) my_setpriority,
(void **) (&original_setpriority));
xhook_register(".*\\.so$", "prctl", (void *) my_prctl, (void **) (&original_prctl));
xhook_refresh(true);
}
//endregion
//自己打印trace,发送自己的进程发送SIGQUIT
static void nativePrintTrace() {
fromMyPrintTrace = true;
kill(getpid(), SIGQUIT);
}
template<typename T, std::size_t sz>//todo
static inline constexpr std::size_t NELEM(const T(&)[sz]) { return sz; }//todo
//JNINativeMethod 数组 anr相关的
static const JNINativeMethod ANR_METHODS[] = {
{"nativeInitSignalAnrDetective", "(Ljava/lang/String;Ljava/lang/String;)V", (void *) nativeInitSignalAnrDetective},
{"nativeFreeSignalAnrDetective", "()V", (void *) nativeFreeSignalAnrDetective},
{"nativePrintTrace", "()V", (void *) nativePrintTrace},
};
//MainThreadPriority相关的,先不看
static const JNINativeMethod THREAD_PRIORITY_METHODS[] = {
{"nativeInitMainThreadPriorityDetective", "()V", (void *) nativeInitMainThreadPriorityDetective},
};
//JNI_OnLoad 初始化jni环境
JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *vm, void *) {
JniInvocation::init(vm);
JNIEnv *env;
//获取env环境,如果env环境没有获取成功,返回-1
if (vm->GetEnv(reinterpret_cast<void **>(&env), JNI_VERSION_1_6) != JNI_OK)
return -1;
//获取SignalAnrTracer变为jclass
jclass anrDetectiveCls = env->FindClass("com/tencent/matrix/trace/tracer/SignalAnrTracer");
if (!anrDetectiveCls)
return -1;
//保存SignalAnrTracer为jclass
gJ.AnrDetective = static_cast<jclass>(env->NewGlobalRef(anrDetectiveCls));
//保存方法
gJ.AnrDetector_onANRDumped =
env->GetStaticMethodID(anrDetectiveCls, "onANRDumped", "()V");
gJ.AnrDetector_onANRDumpTrace =
env->GetStaticMethodID(anrDetectiveCls, "onANRDumpTrace", "()V");
gJ.AnrDetector_onPrintTrace =
env->GetStaticMethodID(anrDetectiveCls, "onPrintTrace", "()V");
//注册native方法,使得java可以调用native
if (env->RegisterNatives(
anrDetectiveCls, ANR_METHODS, static_cast<jint>(NELEM(ANR_METHODS))) != 0)
return -1;
//删除anrDetectiveCls
env->DeleteLocalRef(anrDetectiveCls);
jclass threadPriorityDetectiveCls = env->FindClass(
"com/tencent/matrix/trace/tracer/ThreadPriorityTracer");
if (!threadPriorityDetectiveCls)
return -1;
gJ.ThreadPriorityDetective = static_cast<jclass>(env->NewGlobalRef(threadPriorityDetectiveCls));
gJ.ThreadPriorityDetective_onMainThreadPriorityModified =
env->GetStaticMethodID(threadPriorityDetectiveCls, "onMainThreadPriorityModified",
"(I)V");
gJ.ThreadPriorityDetective_onMainThreadTimerSlackModified =
env->GetStaticMethodID(threadPriorityDetectiveCls, "onMainThreadTimerSlackModified",
"(J)V");
if (env->RegisterNatives(
threadPriorityDetectiveCls, THREAD_PRIORITY_METHODS,
static_cast<jint>(NELEM(THREAD_PRIORITY_METHODS))) != 0)
return -1;
env->DeleteLocalRef(threadPriorityDetectiveCls);
return JNI_VERSION_1_6;
} // namespace MatrixTracer
3.AnrDumper.h 定义AnrDumper,继承SignalHandler
namespace MatrixTracer {
class AnrDumper : public SignalHandler {
public:
//定义回调方法
using DumpCallbackFunction = std::function<bool()>;
AnrDumper(const char* anrTraceFile, const char* printTraceFile, DumpCallbackFunction&& callback);//&&引用。这个功能是C++的补充,常用在函数传参(C中一般用指针)、临时变量引用等。
virtual ~AnrDumper();
private:
//处理signal地方
Result handleSignal(int sig, const siginfo_t *info, void *uc) final;
const DumpCallbackFunction mCallback;
};
} // namespace MatrixTracer
#endif // LAGDETECTOR_LAG_DETECTOR_MAIN_CPP_ANRDUMPER_H_
4.AnrDumper.cc?handleSignal方法监听 SIGQUIT信号,并根据其他进程还是自己进程来调用anrCallback 或者siUserCallback,
4.1 anr是system_server进程发来的SIGQUIT,anrCallback代表可能发生了anr,之后会调用anrDumpCallback,让SignalAnrTracer检测ui线程是否block或者状态为NOT_RESPONDING
#define SIGNAL_CATCHER_THREAD_NAME "Signal Catcher"
#define SIGNAL_CATCHER_THREAD_SIGBLK 0x1000 //得到SignalCatcherThreadId,todo 没看明白
#define O_WRONLY 00000001
#define O_CREAT 00000100
#define O_TRUNC 00001000
namespace MatrixTracer {
static sigset_t old_sigSet;
const char *mAnrTraceFile;
const char *mPrintTraceFile;
//建立了Signal Handler之后,我们发现在同时有sigwait和signal handler的情况下,
// 信号没有走到我们的signal handler而是依然被系统的Signal Catcher线程捕获到了,这是什么原因呢?
//
//原来是Android默认把SIGQUIT设置成了BLOCKED,所以只会响应sigwait而不会进入到我们设置的handler方法中。
// 我们通过pthread_sigmask或者sigprocmask把SIGQUIT设置为UNBLOCK,那么再次收到SIGQUIT时,就一定会进入到我们的handler方法中。需要这样设置:
AnrDumper::AnrDumper(const char *anrTraceFile, const char *printTraceFile,
AnrDumper::DumpCallbackFunction &&callback) : mCallback(callback) {
// must unblocked SIGQUIT, otherwise the signal handler can not capture SIGQUIT
// 必须unblock,否则signal handler无法接收到信号,而是由signal_cahcher线程中的sigwait接收信号,走一般的ANR流程
mAnrTraceFile = anrTraceFile;
mPrintTraceFile = printTraceFile;
sigset_t sigSet;
sigemptyset(&sigSet);
sigaddset(&sigSet, SIGQUIT);
pthread_sigmask(SIG_UNBLOCK, &sigSet, &old_sigSet);
}
//得到SignalCatcherThreadId,todo 没看明白
static int getSignalCatcherThreadId() {
char taskDirPath[128];
DIR *taskDir;
long long sigblk;
int signalCatcherTid = -1;
int firstSignalCatcherTid = -1;
snprintf(taskDirPath, sizeof(taskDirPath), "/proc/%d/task", getpid());
if ((taskDir = opendir(taskDirPath)) == nullptr) {
return -1;
}
struct dirent *dent;
pid_t tid;
while ((dent = readdir(taskDir)) != nullptr) {
tid = atoi(dent->d_name);
if (tid <= 0) {
continue;
}
char threadName[1024];
char commFilePath[1024];
snprintf(commFilePath, sizeof(commFilePath), "/proc/%d/task/%d/comm", getpid(), tid);
Support::readFileAsString(commFilePath, threadName, sizeof(threadName));
if (strncmp(SIGNAL_CATCHER_THREAD_NAME, threadName,
sizeof(SIGNAL_CATCHER_THREAD_NAME) - 1) != 0) {
continue;
}
if (firstSignalCatcherTid == -1) {
firstSignalCatcherTid = tid;
}
sigblk = 0;
char taskPath[128];
snprintf(taskPath, sizeof(taskPath), "/proc/%d/status", tid);
ScopedFileDescriptor fd(open(taskPath, O_RDONLY, 0));
LineReader lr(fd.get());
const char *line;
size_t len;
while (lr.getNextLine(&line, &len)) {
if (1 == sscanf(line, "SigBlk: %" SCNx64, &sigblk)) {
break;
}
lr.popLine(len);
}
if (SIGNAL_CATCHER_THREAD_SIGBLK != sigblk) {
continue;
}
signalCatcherTid = tid;
break;
}
closedir(taskDir);
if (signalCatcherTid == -1) {
signalCatcherTid = firstSignalCatcherTid;
}
return signalCatcherTid;
}
//我们通过Signal Handler抢到了SIGQUIT后,原本的Signal Catcher线程中的sigwait就不再能收到SIGQUIT了,
// 原本的dump堆栈的逻辑就无法完成了,我们为了ANR的整个逻辑和流程跟原来完全一致,需要在Signal Handler里面重新向Signal Catcher线程发送一个SIGQUIT:
static void sendSigToSignalCatcher() {
//遍历/proc/[pid]目录,找到SignalCatcher线程的tid
int tid = getSignalCatcherThreadId();
syscall(SYS_tgkill, getpid(), tid, SIGQUIT);
}
//SIGQUIT发生了,其他进程发来的,anr是system_server进程发来的消息,不是自己进程发来的
static void *anrCallback(void *arg) {
//anr可能发生了,通知SignalAnrTracer检测ui线程是否block或者状态为NOT_RESPONDING
anrDumpCallback();
if (strlen(mAnrTraceFile) > 0) {
//开始hook write socket
hookAnrTraceWrite(false);
}
//转发SIGQUIT
sendSigToSignalCatcher();
return nullptr;
}
//SIGQUIT发生了,自己进程发来的,不是anr
static void *siUserCallback(void *arg) {
//这里没有调用anrDumpCallback,因为是自己触发的
if (strlen(mPrintTraceFile) > 0) {
//开始hook write socket
hookAnrTraceWrite(true);
}
//转发SIGQUIT
sendSigToSignalCatcher();
return nullptr;
}
//另外,Signal Handler回调的第二个参数siginfo_t,也包含了一些有用的信息,该结构体的第三个字段si_code表示该信号被
// 发送的方法,SI_USER表示信号是通过kill发送的,SI_QUEUE表示信号是通过sigqueue发送的。但在Android的ANR流程中,
// 高版本使用的是sigqueue发送的信号,某些低版本使用的是kill发送的信号,并不统一。
//
//而第五个字段(极少数机型上是第四个字段)si_pid表示的是发送该信号的进程的pid,这里适用几乎所有Android版本和机型的
// 一个条件是:如果发送信号的进程是自己的进程,那么一定不是一个ANR。可以通过这个条件排除自己发送SIGQUIT,
// 而导致误报的情况。
SignalHandler::Result AnrDumper::handleSignal(int sig, const siginfo_t *info, void *uc) {
// Only process SIGQUIT, which indicates an ANR.
if (sig != SIGQUIT) return NOT_HANDLED;
//Got An ANR
int fromPid1 = info->_si_pad[3];
int fromPid2 = info->_si_pad[4];
int myPid = getpid();
pthread_t thd;
if (fromPid1 != myPid && fromPid2 != myPid) {
//一个条件是:如果发送信号的进程是自己的进程,那么一定不是一个ANR。可以通过这个条件排除自己发送SIGQUIT,
pthread_create(&thd, nullptr, anrCallback, nullptr);
} else {
//自己的进程
pthread_create(&thd, nullptr, siUserCallback, nullptr);
}
pthread_detach(thd);
return HANDLED_NO_RETRIGGER;
}
//没用到
static void *anr_trace_callback(void *args) {
anrDumpTraceCallback();
return nullptr;
}
//没用到
static void *print_trace_callback(void *args) {
printTraceCallback();
return nullptr;
}
AnrDumper::~AnrDumper() {
pthread_sigmask(SIG_SETMASK, &old_sigSet, nullptr);
}
} // namespace MatrixTracer
5.我们的SignalHandler类
5.1 signalHandler方法主要是收到了信号
5.2 handleSignal处理信号
namespace MatrixTracer {
class SignalHandler {
public:
SignalHandler();
virtual ~SignalHandler();//析构函数:
// 当一个类的对象离开作用域时,析构函数将被调用(系统自动调用)。析构函数的名字和类名一样,不过要在前面加上 ~ 。
// 对一个类来说,只能允许一个析构函数,析构函数不能有参数,并且也没有返回值。
// 析构函数的作用是完成一个清理工作,如释放从堆中分配的内存。
protected:
enum Result {
NOT_HANDLED = 0, HANDLED, HANDLED_NO_RETRIGGER
};//retrigger
virtual Result handleSignal(int sig, const siginfo_t *info, void *uc) = 0;
private:
static void signalHandler(int sig, siginfo_t *info, void *uc);
static bool installHandlersLocked();
//https://blog.csdn.net/lmb1612977696/article/details/80035487
SignalHandler(const SignalHandler &) = delete;//禁止生成该函数,默认拷贝构造函数
SignalHandler &operator=(const SignalHandler &) = delete;//禁止生成该函数,默认赋值函数
};
} // namespace MatrixTracer
#endif // LAGDETECTOR_LAG_DETECTOR_MAIN_CPP_SIGNALHANDLER_H_
6.SignalHandler.cc
6.1?installHandlersLocked 通过可以sigaction方法,建立一个Signal Handler,sa_sigaction方法地址设置为我们的signalHandler方法
6.2 signalHandler 信号处理的地方,转发给各SignalHandler的handleSignal
//线程名字,todo,得到SignalCatcherThreadId,todo 没看明白
#define SIGNAL_CATCHER_THREAD_NAME "Signal Catcher"
//退出线程标记,todo,得到SignalCatcherThreadId,todo 没看明白
#define SIGNAL_CATCHER_THREAD_SIGBLK 0x1000
namespace MatrixTracer {
//信号
const int TARGET_SIG = SIGQUIT;//3
//使用sigaction方法注册signal handler进行异步监听,sOldHandlers是保存老的sigaction
struct sigaction sOldHandlers;//todo
bool sHandlerInstalled = false;
// The global signal handler stack. This is needed because there may exist
// multiple SignalHandler instances in a process. Each will have itself
// registered in this stack.
static std::vector<SignalHandler *> *sHandlerStack = nullptr;//todo
// C++11中新增了<mutex>,它是C++标准程序库中的一个头文件,定义了C++11标准中的一些互斥访问的类与方法等。其中std::mutex就是lock、unlock。std::lock_guard与std::mutex配合使用,把锁放到lock_guard中时,mutex自动上锁,lock_guard析构时,同时把mutex解锁。mutex又称互斥量。
static std::mutex sHandlerStackMutex;//todo
static bool sStackInstalled = false;
// InstallAlternateStackLocked will store the newly installed stack in new_stack
// and (if it exists) the previously installed stack in old_stack.
static stack_t sOldStack;//todo
static stack_t sNewStack;//todo
static void installAlternateStackLocked() {//todo
if (sStackInstalled)
return;
//重置
memset(&sOldStack, 0, sizeof(sOldStack));
memset(&sNewStack, 0, sizeof(sNewStack));
static constexpr unsigned kSigStackSize = std::max(16384, SIGSTKSZ);
//取到老的sOldStack
if (sigaltstack(nullptr, &sOldStack) == -1 || !sOldStack.ss_sp ||
sOldStack.ss_size < kSigStackSize) {
sNewStack.ss_sp = calloc(1, kSigStackSize);
sNewStack.ss_size = kSigStackSize;
//设置新的sNewStack
if (sigaltstack(&sNewStack, nullptr) == -1) {
free(sNewStack.ss_sp);
return;
}
}
sStackInstalled = true;
ALOGV("Alternative stack installed.");
}
// Runs before crashing: normal context.
// 我们通过可以sigaction方法,建立一个Signal Handler:ok
bool SignalHandler::installHandlersLocked() {
if (sHandlerInstalled) {
return false;
}
// Fail if unable to store all the old handlers.
//取到老的sOldHandlers
if (sigaction(TARGET_SIG, nullptr, &sOldHandlers) == -1) {
return false;
}
struct sigaction sa{};//sigaction结构体
sa.sa_sigaction = signalHandler;//方法地址,收到信号的地方
sa.sa_flags = SA_ONSTACK | SA_SIGINFO | SA_RESTART;
//我们通过可以sigaction方法,建立一个Signal Handler
if (sigaction(TARGET_SIG, &sa, nullptr) == -1) {//sigaction方法,将sa设置为Signal Handler
ALOGV("Signal handler cannot be installed");
// At this point it is impractical to back out changes, and so failure to
// install a signal is intentionally ignored.
}
sHandlerInstalled = true;
ALOGV("Signal handler installed.");
return true;
}
//todo
static void installDefaultHandler(int sig) {
// Android L+ expose signal and sigaction symbols that override the system
// ones. There is a bug in these functions where a request to set the handler
// to SIG_DFL is ignored. In that case, an infinite loop is entered as the
// signal is repeatedly sent to breakpad's signal handler.
// To work around this, directly call the system's sigaction.
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
sigemptyset(&sa.sa_mask);
sa.sa_handler = SIG_DFL;
sa.sa_flags = SA_RESTART;
sigaction(sig, &sa, nullptr);
}
// This function runs in a compromised context: see the top of the file.
// Runs on the crashing thread.
static void restoreHandlersLocked() {//todo
if (!sHandlerInstalled)
return;
//将老的sOldHandlers重新sigaction上
if (sigaction(TARGET_SIG, &sOldHandlers, nullptr) == -1) {
//todo
installDefaultHandler(TARGET_SIG);
}
sHandlerInstalled = false;
ALOGV("Signal handler restored.");
}
static void restoreAlternateStackLocked() {//todo
if (!sStackInstalled)
return;
stack_t current_stack;
if (sigaltstack(nullptr, ¤t_stack) == -1)
return;
// Only restore the old_stack if the current alternative stack is the one
// installed by the call to InstallAlternateStackLocked.
if (current_stack.ss_sp == sNewStack.ss_sp) {
if (sOldStack.ss_sp) {
if (sigaltstack(&sOldStack, nullptr) == -1)
return;
} else {
stack_t disable_stack;
disable_stack.ss_flags = SS_DISABLE;
if (sigaltstack(&disable_stack, nullptr) == -1)
return;
}
}
free(sNewStack.ss_sp);
sStackInstalled = false;
}
// This function runs in a compromised context: see the top of the file.
// Runs on the crashing thread.
// 发生信号处理的地方,转发给各sHandlerStack的handleSignal ok
void SignalHandler::signalHandler(int sig, siginfo_t *info, void *uc) {
ALOGV("Entered signal handler.");
// All the exception signals are blocked at this point.
std::unique_lock<std::mutex> lock(sHandlerStackMutex);
for (auto it = sHandlerStack->rbegin(); it != sHandlerStack->rend(); ++it) {
(*it)->handleSignal(sig, info, uc);
}
lock.unlock();
}
SignalHandler::SignalHandler() {
//上锁,todo
std::lock_guard<std::mutex> lock(sHandlerStackMutex);
//建一个sHandlerStack
if (!sHandlerStack)
sHandlerStack = new std::vector<SignalHandler *>;
//todo
installAlternateStackLocked();
//todo
installHandlersLocked();
//将自己放进去
sHandlerStack->push_back(this);
}
SignalHandler::~SignalHandler() {
std::lock_guard<std::mutex> lock(sHandlerStackMutex);
auto it = std::find(sHandlerStack->begin(), sHandlerStack->end(), this);
sHandlerStack->erase(it);
if (sHandlerStack->empty()) {
delete sHandlerStack;
sHandlerStack = nullptr;
restoreAlternateStackLocked();
restoreHandlersLocked();
}
}
} // namespace MatrixTracer
|