Logd框架梳理
1. 整体框架
2. 流程梳理
1. Logd进程启动
-
logd进程是在Android系统启动时,init进程通过解析 init.rc 文件启动的,大致分析一下启动流程 # system/core/rootdir/init.rc
on post-fs
# Load properties from
# /system/build.prop,
# /odm/build.prop,
# /vendor/build.prop and
# /factory/factory.prop
load_system_props
# start essential services
# 开始启动 logd.rc
start logd
start servicemanager
start hwservicemanager
start vndservicemanager
# system/core/logd/logd.rc
# 启动logd 进程
service logd /system/bin/logd
# 启动三个socket
socket logd stream 0666 logd logd
socket logdr seqpacket 0666 logd logd
socket logdw dgram+passcred 0222 logd logd
file /proc/kmsg r
file /dev/kmsg w
user logd
group logd system package_info readproc
writepid /dev/cpuset/system-background/tasks
# 启动 logd-reinit
service logd-reinit /system/bin/logd --reinit
# 开机只启动一次
oneshot
disabled
user logd
group logd
writepid /dev/cpuset/system-background/tasks
on fs
write /dev/event-log-tags "# content owned by logd"
chown logd logd /dev/event-log-tags
chmod 0644 /dev/event-log-tags
- 在 logd.rc 中,会启动 logd 进程,并创建三个socket,启动完成后,还会启动 logd-reinit;首先先看一下 logd 的启动
# system/core/logd/main.cpp
int main(int argc, char* argv[]) {
setenv("TZ", "UTC", 1);
if ((argc > 1) && argv[1] && !strcmp(argv[1], "--reinit")) {
return issueReinit();
}
static const char dev_kmsg[] = "/dev/kmsg";
fdDmesg = android_get_control_file(dev_kmsg);
if (fdDmesg < 0) {
fdDmesg = TEMP_FAILURE_RETRY(open(dev_kmsg, O_WRONLY | O_CLOEXEC));
}
int fdPmesg = -1;
bool klogd = __android_logger_property_get_bool(
"ro.logd.kernel",
BOOL_DEFAULT_TRUE | BOOL_DEFAULT_FLAG_ENG | BOOL_DEFAULT_FLAG_SVELTE);
if (klogd) {
static const char proc_kmsg[] = "/proc/kmsg";
fdPmesg = android_get_control_file(proc_kmsg);
if (fdPmesg < 0) {
fdPmesg = TEMP_FAILURE_RETRY(
open(proc_kmsg, O_RDONLY | O_NDELAY | O_CLOEXEC));
}
if (fdPmesg < 0) android::prdebug("Failed to open %s\n", proc_kmsg);
}
sem_init(&reinit, 0, 0);
sem_init(&uidName, 0, 0);
sem_init(&sem_name, 0, 1);
pthread_attr_t attr;
if (!pthread_attr_init(&attr)) {
struct sched_param param;
memset(¶m, 0, sizeof(param));
pthread_attr_setschedparam(&attr, ¶m);
pthread_attr_setschedpolicy(&attr, SCHED_BATCH);
if (!pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)) {
pthread_t thread;
reinit_running = true;
if (pthread_create(&thread, &attr, reinit_thread_start, nullptr)) {
reinit_running = false;
}
}
pthread_attr_destroy(&attr);
}
bool auditd =
__android_logger_property_get_bool("ro.logd.auditd", BOOL_DEFAULT_TRUE);
if (drop_privs(klogd, auditd) != 0) {
return -1;
}
LastLogTimes* times = new LastLogTimes();
logBuf = new LogBuffer(times);
signal(SIGHUP, reinit_signal_handler);
if (__android_logger_property_get_bool(
"logd.statistics", BOOL_DEFAULT_TRUE | BOOL_DEFAULT_FLAG_PERSIST |
BOOL_DEFAULT_FLAG_ENG |
BOOL_DEFAULT_FLAG_SVELTE)) {
logBuf->enableStatistics();
}
LogReader* reader = new LogReader(logBuf);
if (reader->startListener()) {
exit(1);
}
LogListener* swl = new LogListener(logBuf, reader);
if (swl->startListener(600)) {
exit(1);
}
CommandListener* cl = new CommandListener(logBuf, reader, swl);
if (cl->startListener()) {
exit(1);
}
LogAudit* al = nullptr;
if (auditd) {
al = new LogAudit(logBuf, reader,
__android_logger_property_get_bool(
"ro.logd.auditd.dmesg", BOOL_DEFAULT_TRUE)
? fdDmesg
: -1);
}
LogKlog* kl = nullptr;
if (klogd) {
kl = new LogKlog(logBuf, reader, fdDmesg, fdPmesg, al != nullptr);
}
readDmesg(al, kl);
if (kl && kl->startListener()) {
delete kl;
}
if (al && al->startListener()) {
delete al;
}
TEMP_FAILURE_RETRY(pause());
exit(0);
}
- 可以看到,在 logd 进程的启动过程中,主要完成了这样几件事:
- 打开 /dev/kmsg 节点(根据配置决定是否打开 /proc/kmsg 节点)
- 创建reinit线程,用来监测是否存在 reinit 请求,初始化 logBuffer
- 设置相关运行时优先级、权限
- 启动各个 log 监听器
- /dev/socket/logdr:监听的客户端的连接,用作读取
- /dev/socket/logdw:监听的是日志的写入
- /dev/socket/logd:监听的是log指令
- 根据属性判断创建
LogKlog 和 LogAudit 至此就完成了 logd 进程的启动,时序图如下:
-
这里再拓展一下,看listener的启动,以 LogListener 为例,如上启动的方式是:swl->startListener(600) ,LogListener 的父类是 SocketListener # system/core/libsysutils/src/SocketListener.cpp
int SocketListener::startListener(int backlog) {
...
if (pthread_create(&mThread, NULL, SocketListener::threadStart, this)) {
SLOGE("pthread_create (%s)", strerror(errno));
return -1;
}
return 0;
}
void *SocketListener::threadStart(void *obj) {
SocketListener *me = reinterpret_cast<SocketListener *>(obj);
me->runListener();
pthread_exit(NULL);
return NULL;
}
void SocketListener::runListener() {
SocketClientCollection pendingList;
while(1) {
...
while (!pendingList.empty()) {
it = pendingList.begin();
SocketClient* c = *it;
pendingList.erase(it);
if (!onDataAvailable(c)) {
release(c, false);
}
c->decRef();
}
}
}
- 可以看到,当监听到事件,会使用
onDataAvailable() 回调函数进行处理
2. Logd日志写入
-
在Java层,写入日志使用的通常是 android/util/Log.java 类中的 log.d() 方法(先不区分等级),那么就由此开始梳理写入的流程 # framework/base/core/java/android/util/Log.java
public static int e(String tag, String msg) {
return println_native(LOG_ID_MAIN, ERROR, tag, msg);
}
# framework/base/core/jni/android_util_Log.cpp
static jint android_util_Log_println_native(JNIEnv* env, jobject clazz,
jint bufID, jint priority, jstring tagObj, jstring msgObj)
{
const char* tag = NULL;
const char* msg = NULL;
if (msgObj == NULL) {
jniThrowNullPointerException(env, "println needs a message");
return -1;
}
...
int res = __android_log_buf_write(bufID, (android_LogPriority)priority, tag, msg);
if (tag != NULL)
env->ReleaseStringUTFChars(tagObj, tag);
env->ReleaseStringUTFChars(msgObj, msg);
return res;
}
__android_log_buf_write() 就是 liblog.so 库中的函数,也就是调用到了 liblog 目录下了 # system/core/liblog/logger_write.c
static int (*write_to_log)(log_id_t, struct iovec* vec,
size_t nr) = __write_to_log_init;
LIBLOG_ABI_PUBLIC int __android_log_buf_write(int bufID, int prio,
const char* tag, const char* msg) {
struct iovec vec[3];
char tmp_tag[32];
if (!tag) tag = "";
if (bufID != LOG_ID_RADIO) {
switch (tag[0]) {
...
inform:
bufID = LOG_ID_RADIO;
snprintf(tmp_tag, sizeof(tmp_tag), "use-Rlog/RLOG-%s", tag);
tag = tmp_tag;
default:
break;
}
}
#if __BIONIC__
if (prio == ANDROID_LOG_FATAL) {
android_set_abort_message(msg);
}
#endif
vec[0].iov_base = (unsigned char*)&prio;
vec[0].iov_len = 1;
vec[1].iov_base = (void*)tag;
vec[1].iov_len = strlen(tag) + 1;
vec[2].iov_base = (void*)msg;
vec[2].iov_len = strlen(msg) + 1;
return write_to_log(bufID, vec, 3);
}
- 可以看到,从上层传入的数据都被转存到了 iovec 结构体中,并且最后是通过函数指针进行继续下发,而该函数指针在一开始是定义为
__write_to_log_init() 方法的,继续追查流程 # system/core/liblog/logger_write.c
static int __write_to_log_init(log_id_t log_id, struct iovec* vec, size_t nr) {
int ret, save_errno = errno;
__android_log_lock();
if (write_to_log == __write_to_log_init) {
ret = __write_to_log_initialize();
if (ret < 0) {
...
return ret;
}
write_to_log = __write_to_log_daemon;
}
__android_log_unlock();
ret = write_to_log(log_id, vec, nr);
errno = save_errno;
return ret;
}
- 在该函数中,首先是调用
__write_to_log_initialize() 函数完成了初始化操作,接着将 函数指针重指向到 __write_to_log_daemon() ,最后再次调用函数指针完成数据下发;先来梳理一下初始化中都做了哪些事情 # system/core/liblog/logger_write.c
static int __write_to_log_initialize() {
struct android_log_transport_write* transport;
struct listnode* n;
int i = 0, ret = 0;
__android_log_config_write();
...
return ret;
}
- 省去相对不重要的代码,可以看到在该函数中完成了相关函数的配置
# system/core/liblog/config_write.c
LIBLOG_HIDDEN void __android_log_config_write() {
if (__android_log_transport & LOGGER_LOCAL) {
extern struct android_log_transport_write localLoggerWrite;
__android_log_add_transport(&__android_log_transport_write,
&localLoggerWrite);
}
...
}
- 在该函数中,主要就是根据
__android_log_transport 的值进行 __android_log_transport_write 列表中逐一元素的赋值,此处给的就是 android_log_transport_write 结构体中的元素,可以看一下该结构体的元素 # system/core/liblog/logd_writer.c
LIBLOG_HIDDEN struct android_log_transport_write logdLoggerWrite = {
.node = { &logdLoggerWrite.node, &logdLoggerWrite.node },
.context.sock = -EBADF,
.name = "logd",
.available = logdAvailable,
.open = logdOpen,
.close = logdClose,
.write = logdWrite,
};
- 可以看到元素逐一进行对应,那么返回去看
__write_to_log_init() 函数中,继续调用的 __write_to_log_daemon() 函数 # system/core/liblog/logger_write.c
static int __write_to_log_daemon(log_id_t log_id, struct iovec* vec, size_t nr) {
struct android_log_transport_write* node;
int ret, save_errno;
struct timespec ts;
size_t len, i;
...
write_transport_for_each(node, &__android_log_persist_write) {
if (node->logMask & i) {
(void)(*node->write)(log_id, &ts, vec, nr);
}
}
errno = save_errno;
return ret;
}
- 最后调用到的是
*node->write() 函数,而根据上面流程的分析,可以获知,此处的 write() 函数本质上是 logdWrite() 函数
static int logdWrite(log_id_t logId, struct timespec* ts, struct iovec* vec,
size_t nr) {
ssize_t ret;
int sock;
static const unsigned headerLength = 1;
struct iovec newVec[nr + headerLength];
android_log_header_t header;
size_t i, payloadSize;
static atomic_int_fast32_t dropped;
static atomic_int_fast32_t droppedSecurity;
sock = atomic_load(&logdLoggerWrite.context.sock);
...
header.id = logId;
for (payloadSize = 0, i = headerLength; i < nr + headerLength; i++) {
newVec[i].iov_base = vec[i - headerLength].iov_base;
payloadSize += newVec[i].iov_len = vec[i - headerLength].iov_len;
if (payloadSize > LOGGER_ENTRY_MAX_PAYLOAD) {
newVec[i].iov_len -= payloadSize - LOGGER_ENTRY_MAX_PAYLOAD;
if (newVec[i].iov_len) {
++i;
}
break;
}
}
...
if (sock < 0) {
ret = sock;
} else {
ret = TEMP_FAILURE_RETRY(writev(sock, newVec, i));
if (ret < 0) {
ret = -errno;
}
}
...
}
- 可以看到,在该函数中,就开始向 dev/socket/logdw 进行数据的写入,而一旦该 socket 写入数据,那么
LogListener 就会被唤醒,那么接下来就会开始将数据存放到 logbuf 中,根据篇章一的分析,可以得到,当socket被唤醒,会执行 onDataAvailable() 回调函数进行处理,那么接下来顺着该函数进行分析 # system/core/logd/LogListener.cpp
bool LogListener::onDataAvailable(SocketClient* cli) {
...
int socket = cli->getSocket();
ssize_t n = recvmsg(socket, &hdr, 0);
...
if (logbuf != nullptr) {
int res = logbuf->log(
logId, header->realtime, cred->uid, cred->pid, header->tid, msg,
((size_t)n <= USHRT_MAX) ? (unsigned short)n : USHRT_MAX);
if (res > 0 && reader != nullptr) {
reader->notifyNewLog(static_cast<log_mask_t>(1 << logId));
}
}
return true;
}
- 可以看到,最终的处理就是将log信息往 logbuf 中存入,并且在存入完成后,还会通知
reader 监听线程,先来看一下log的存入 # system/core/logd/LogBuffer.cpp
int LogBuffer::log(log_id_t log_id, log_time realtime, uid_t uid, pid_t pid,
pid_t tid, const char* msg, unsigned short len) {
...
LogBufferElement* elem =
new LogBufferElement(log_id, realtime, uid, pid, tid, msg, len);
if (log_id != LOG_ID_SECURITY) {
int prio = ANDROID_LOG_INFO;
const char* tag = nullptr;
size_t tag_len = 0;
if (log_id == LOG_ID_EVENTS || log_id == LOG_ID_STATS) {
tag = tagToName(elem->getTag());
if (tag) {
tag_len = strlen(tag);
}
} else {
prio = *msg;
tag = msg + 1;
tag_len = strnlen(tag, len - 1);
}
if (!__android_log_is_loggable_len(prio, tag, tag_len,
ANDROID_LOG_VERBOSE)) {
wrlock();
stats.addTotal(elem);
unlock();
delete elem;
return -EACCES;
}
}
wrlock();
LogBufferElement* currentLast = lastLoggedElements[log_id];
if (currentLast) {
LogBufferElement* dropped = droppedElements[log_id];
unsigned short count = dropped ? dropped->getDropped() : 0;
...
enum match_type match = identical(elem, currentLast);
...
lastLoggedElements[log_id] = new LogBufferElement(*elem);
log(elem);
unlock();
return len;
}
- 该函数大致就是将 log 数据封装到了 elem 结构中,然后再将该 elem 元素插入
mLogElements 中 # system/core/logd/LogBuffer.cpp
void LogBuffer::log(LogBufferElement* elem) {
...
if (end_always || (end_set && (end > (*it)->getRealTime()))) {
mLogElements.push_back(elem);
} else {
do {
last = it;
if (__predict_false(it == mLogElements.begin())) {
break;
}
--it;
} while (((*it)->getRealTime() > elem->getRealTime()) &&
(!end_set || (end <= (*it)->getRealTime())));
mLogElements.insert(last, elem);
}
LogTimeEntry::unlock();
}
stats.add(elem);
maybePrune(elem->getLogId());
}
- 经过一系列的判断,最终会将
elem 插入到 mLogElements 中,同时还会添加到 stats 中,该 stats 是记录当前log存放状态的,最后判断log是否已经存满,是否需要删除 # system/core/logd/LogBuffer.cpp
void LogBuffer::maybePrune(log_id_t id) {
size_t sizes = stats.sizes(id);
unsigned long maxSize = log_buffer_size(id);
if (sizes > maxSize) {
size_t sizeOver = sizes - ((maxSize * 9) / 10);
size_t elements = stats.realElements(id);
size_t minElements = elements / 100;
if (minElements < minPrune) {
minElements = minPrune;
}
unsigned long pruneRows = elements * sizeOver / sizes;
if (pruneRows < minElements) {
pruneRows = minElements;
}
if (pruneRows > maxPrune) {
pruneRows = maxPrune;
}
prune(id, pruneRows);
}
}
- 最后删除的log 的逻辑是在
prune() 函数中完成的,该函数太长有点复杂,暂时没有进入分析,网络上找到一篇分析的文章:Android log 机制 - 删除过多的 log | Jekton,总结一下该函数中主要完成这几件事:
- 计算一个
watermark ,表示所有客户正在读取的最早的log。时间小于 watermark 的 log 都不能删除 - 如果是客户请求删除 log,删除对应 uid 的 log
- 删除黑名单里的 log
- 如果已删除的条数还不够,删除不在白名单里的 log
- 如果已删除的条数还不够,删除白名单里的 log
-
至此,写入log的大致流程梳理完成,时序图如下:
3. Logd日志读取
-
log内容的读取,在串口或者借助ADB使用通过指令 logcat 可进行读取,而该命令的解析就是借助 logcat command 指令解析监听完成的,接下来解析一下解析和输出日志的过程 -
首先是 logcat command 解析器的创建,是通过 logcatd.rc 文件解析进行创建的 # system/core/logcatd.rc
# 声明了 logcatd service
# logcatd service
service logcatd /system/bin/logcatd -L -b ${logd.logpersistd.buffer:-all} -v threadtime -v usec -v printable -D -f /data/misc/logd/logcat -r 20480 -n ${logd.logpersistd.size:-256} --id=${ro.build.id}
class late_start
disabled
# logd for write to /data/misc/logd, log group for read from log daemon
user logd
group log
writepid /dev/cpuset/system-background/tasks
oom_score_adjust -600
- 当该
service 启动时,会去执行 logcatd_main.cpp 中的 main() 函数 # system/core/logcat/logcatd_main.cpp
int main(int argc, char** argv, char** envp) {
android_logcat_context ctx = create_android_logcat();
if (!ctx) return -1;
signal(SIGPIPE, exit);
std::vector<std::string> args;
bool last = false;
for (int i = 0; i < argc; ++i) {
if (!argv[i]) continue;
args.push_back(std::string(argv[i]));
if (!strcmp(argv[i], "-L") || !strcmp(argv[i], "--last")) last = true;
}
std::vector<const char*> argv_hold;
for (auto& str : args) argv_hold.push_back(str.c_str());
argv_hold.push_back(nullptr);
int ret = 0;
if (last) {
ret = android_logcat_run_command(ctx, -1, -1, argv_hold.size() - 1,
(char* const*)&argv_hold[0], envp);
for (std::vector<const char*>::iterator it = argv_hold.begin();
it != argv_hold.end();) {
if (!*it || (strcmp(*it, "-L") && strcmp(*it, "--last"))) {
++it;
} else {
it = argv_hold.erase(it);
}
}
...
}
- 在该
main() 函数中,会调用 android_logcat_run_command() 函数进行 command 解析器的启动 # system/core/logcat/logcat.cpp
int android_logcat_run_command(android_logcat_context ctx,
int output, int error,
int argc, char* const* argv,
char* const* envp) {
android_logcat_context_internal* context = ctx;
context->output_fd = output;
context->error_fd = error;
context->argc = argc;
context->argv = argv;
context->envp = envp;
context->stop = false;
context->thread_stopped = false;
return __logcat(context);
}
static int __logcat(android_logcat_context_internal* context) {
...
for (int i = 0; i < argc; ++i) {
...
}
...
if (argc == 2 && !strcmp(argv[1], "--help")) {
show_help(context);
context->retval = EXIT_SUCCESS;
goto exit;
}
...
for (;;) {
...
ret = getopt_long_r(argc, argv, ":cdDhLt:T:gG:sQf:r:n:v:b:BSpP:m:e:",
long_options, &option_index, &optctx);
if (ret < 0) break;
switch (ret) {
...
}
}
...
if (!context->devices) {
dev = context->devices = new log_device_t("main", false);
context->devCount = 1;
if (android_name_to_log_id("system") == LOG_ID_SYSTEM) {
dev = dev->next = new log_device_t("system", false);
context->devCount++;
}
if (android_name_to_log_id("crash") == LOG_ID_CRASH) {
dev = dev->next = new log_device_t("crash", false);
context->devCount++;
}
}
...
dev = nullptr;
while (!context->stop &&
(!context->maxCount || (context->printCount < context->maxCount))) {
struct log_msg log_msg;
int ret = android_logger_list_read(logger_list, &log_msg);
if (!ret) {
logcat_panic(context, HELP_FALSE, "read: unexpected EOF!\n");
break;
}
...
if (context->printBinary) {
printBinary(context, &log_msg);
} else {
processBuffer(context, dev, &log_msg);
}
}
...
}
1. 从*/dev/socket/logdr* 读取日志
-
如上分析,读取日志是借助 android_logger_list_read() 函数完成的,经过转换,又会调用到 android_transport_read() 函数, # system/core/liblog/logger_read.c
static int android_transport_read(struct android_log_logger_list* logger_list,
struct android_log_transport_context* transp,
struct log_msg* log_msg) {
int ret = (*transp->transport->read)(logger_list, transp, log_msg);
...
}
- 和之前的
write() 方法类似,通过一系列的转化,最后是调用的函数: logdRead() # system/core/liblog/logd_reader.c
static int logdRead(struct android_log_logger_list* logger_list,
struct android_log_transport_context* transp,
struct log_msg* log_msg) {
int ret, e;
struct sigaction ignore;
struct sigaction old_sigaction;
unsigned int old_alarm = 0;
ret = logdOpen(logger_list, transp);
if (ret < 0) {
return ret;
}
memset(log_msg, 0, sizeof(*log_msg));
...
ret = recv(ret, log_msg, LOGGER_ENTRY_MAX_LEN, 0);
e = errno;
...
return ret;
}
-
可以看到,在该函数中,会首先 创建一个client客户去连接 logdr socket,而这会唤醒 LogReader 线程,就会调用到 LogReader.onDataAvailable() 函数;其次会调用 recv() 接收 socket 数据,将其存入 log_msg ,这样就将日志信息存到了 log_msg 变量中; -
在这里我们再深究一下当有客户端连接 logdr socket时,在LogReader.onDataAvailable() 都会做些什么? # system/core/logd/LogReader.cpp
bool LogReader::onDataAvailable(SocketClient* cli) {
...
logbuf().flushTo(cli, sequence, nullptr, FlushCommand::hasReadLogs(cli),
FlushCommand::hasSecurityLogs(cli),
logFindStart.callback, &logFindStart);
...
}
- 在该函数中,会调用到 LogBuffer 中去,而
LogBuffer.flushTo() 是调用到 LogBufferElement.flushTo() 函数中 # system/core/logd/LogBufferElement.cpp
log_time LogBufferElement::flushTo(SocketClient* reader, LogBuffer* parent,
bool privileged, bool lastSame) {
...
log_time retval = reader->sendDatav(iovec, 1 + (entry.len != 0))
? FLUSH_ERROR
: mRealTime;
...
}
- 最后又调用到了
LogReader.sendDatav() 函数,LogReader 是继承自 SocketClient 类的,而在这其中,又是调用到了 sendDataLockedv() 函数中 # system/core/libsysutils/src/SocketClient.cpp
int SocketClient::sendDataLockedv(struct iovec *iov, int iovcnt) {
...
for (;;) {
ssize_t rc = TEMP_FAILURE_RETRY(
writev(mSocket, iov + current, iovcnt - current));
...
}
...
}
-
至此读取的过程就分析完毕了,总结时序图如下:
2. 输出日志
-
如上面分析,输出日志是通过 printBinary() 或者 processBuffer() 完成的,大致看一下 # system/core/logcat/logcat.cpp
void printBinary(android_logcat_context_internal* context, struct log_msg* buf) {
size_t size = buf->len();
TEMP_FAILURE_RETRY(write(context->output_fd, buf, size));
}
printBinary() 其实就是通过 write() 方法直接写入到了特定的位置;而分析 processBuffer() 方法其实本质也是一样的,也是通过 write() 方法写入特定位置,只是多做了一层数据格式的读取和转换
|