From 6f11aff418eca582916b41f1fd6baa1564c46510 Mon Sep 17 00:00:00 2001 From: Vova Sharaienko Date: Sat, 24 Apr 2021 00:30:06 +0000 Subject: [PATCH] lmkd: reroute atoms logging to AMS - Added new lmkd message for clients to subscribe LMK_ASYNC_EVENT_STAT - Added support to write kill & mem stats information via data socket to be read & parsed on the AMS Java side for future logging to statsd Bug: 184698933 Test: lmkd_unit_test - test check_for_oom tests lmkd message send to AMS Test: statsd_testdrive 51 54 to inspect statsd logged atoms data Change-Id: Id682a438c87b3e4503261d26461f6cee641d86c4 --- Android.bp | 34 +-------- include/lmkd.h | 21 +++--- lmkd.cpp | 49 ++++++++++++- statslog.cpp | 195 ++++++++++++++++++++++++++++++------------------- statslog.h | 70 ++++++++++++------ 5 files changed, 223 insertions(+), 146 deletions(-) diff --git a/Android.bp b/Android.bp index 267cd48..fcf26ca 100644 --- a/Android.bp +++ b/Android.bp @@ -18,11 +18,9 @@ cc_binary { "liblog", "libprocessgroup", "libpsi", - "libstatssocket", ], static_libs: [ "libstatslogc", - "libstatslog_lmkd", "liblmkd_utils", ], local_include_dirs: ["include"], @@ -40,6 +38,7 @@ cc_binary { cc_library_static { name: "libstatslogc", srcs: ["statslog.cpp"], + local_include_dirs: ["include"], cflags: [ "-Wall", "-Werror", @@ -49,37 +48,6 @@ cc_library_static { shared_libs: [ "liblog", ], - static_libs: [ - "libstatslog_lmkd", - ], -} - -genrule { - name: "statslog_lmkd.h", - tools: ["stats-log-api-gen"], - cmd: "$(location stats-log-api-gen) --header $(genDir)/statslog_lmkd.h --module lmkd --namespace android,lmkd,stats", - out: [ - "statslog_lmkd.h", - ], -} - -genrule { - name: "statslog_lmkd.cpp", - tools: ["stats-log-api-gen"], - cmd: "$(location stats-log-api-gen) --cpp $(genDir)/statslog_lmkd.cpp --module lmkd --namespace android,lmkd,stats --importHeader statslog_lmkd.h", - out: [ - "statslog_lmkd.cpp", - ], -} - -cc_library_static { - name: "libstatslog_lmkd", - generated_sources: ["statslog_lmkd.cpp"], - generated_headers: ["statslog_lmkd.h"], - export_generated_headers: ["statslog_lmkd.h"], - shared_libs: [ - "libstatssocket", - ], } cc_library_static { diff --git a/include/lmkd.h b/include/lmkd.h index ad5dc75..c814d17 100644 --- a/include/lmkd.h +++ b/include/lmkd.h @@ -27,14 +27,16 @@ __BEGIN_DECLS * Supported LMKD commands */ enum lmk_cmd { - LMK_TARGET = 0, /* Associate minfree with oom_adj_score */ - LMK_PROCPRIO, /* Register a process and set its oom_adj_score */ - LMK_PROCREMOVE, /* Unregister a process */ - LMK_PROCPURGE, /* Purge all registered processes */ - LMK_GETKILLCNT, /* Get number of kills */ - LMK_SUBSCRIBE, /* Subscribe for asynchronous events */ - LMK_PROCKILL, /* Unsolicited msg to subscribed clients on proc kills */ - LMK_UPDATE_PROPS, /* Reinit properties */ + LMK_TARGET = 0, /* Associate minfree with oom_adj_score */ + LMK_PROCPRIO, /* Register a process and set its oom_adj_score */ + LMK_PROCREMOVE, /* Unregister a process */ + LMK_PROCPURGE, /* Purge all registered processes */ + LMK_GETKILLCNT, /* Get number of kills */ + LMK_SUBSCRIBE, /* Subscribe for asynchronous events */ + LMK_PROCKILL, /* Unsolicited msg to subscribed clients on proc kills */ + LMK_UPDATE_PROPS, /* Reinit properties */ + LMK_STAT_KILL_OCCURRED, /* Unsolicited msg to subscribed clients on proc kills for statsd log */ + LMK_STAT_STATE_CHANGED, /* Unsolicited msg to subscribed clients on state changed */ }; /* @@ -204,10 +206,11 @@ static inline size_t lmkd_pack_set_getkillcnt_repl(LMKD_CTRL_PACKET packet, int return 2 * sizeof(int); } -/* Types of asyncronous events sent from lmkd to its clients */ +/* Types of asynchronous events sent from lmkd to its clients */ enum async_event_type { LMK_ASYNC_EVENT_FIRST, LMK_ASYNC_EVENT_KILL = LMK_ASYNC_EVENT_FIRST, + LMK_ASYNC_EVENT_STAT, LMK_ASYNC_EVENT_COUNT, }; diff --git a/lmkd.cpp b/lmkd.cpp index e1c5276..dc93ac4 100644 --- a/lmkd.cpp +++ b/lmkd.cpp @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -758,6 +757,49 @@ static void ctrl_data_write_lmk_kill_occurred(pid_t pid, uid_t uid) { } } +/* + * Write the kill_stat/memory_stat over the data socket to be propagated via AMS to statsd + */ +static void stats_write_lmk_kill_occurred(struct kill_stat *kill_st, + struct memory_stat *mem_st) { + LMK_KILL_OCCURRED_PACKET packet; + const size_t len = lmkd_pack_set_kill_occurred(packet, kill_st, mem_st); + if (len == 0) { + return; + } + + for (int i = 0; i < MAX_DATA_CONN; i++) { + if (data_sock[i].sock >= 0 && data_sock[i].async_event_mask & 1 << LMK_ASYNC_EVENT_STAT) { + ctrl_data_write(i, packet, len); + } + } + +} + +static void stats_write_lmk_kill_occurred_pid(int pid, struct kill_stat *kill_st, + struct memory_stat *mem_st) { + kill_st->taskname = stats_get_task_name(pid); + if (kill_st->taskname != NULL) { + stats_write_lmk_kill_occurred(kill_st, mem_st); + } +} + +/* + * Write the state_changed over the data socket to be propagated via AMS to statsd + */ +static void stats_write_lmk_state_changed(enum lmk_state state) { + LMKD_CTRL_PACKET packet_state_changed; + const size_t len = lmkd_pack_set_state_changed(packet_state_changed, state); + if (len == 0) { + return; + } + for (int i = 0; i < MAX_DATA_CONN; i++) { + if (data_sock[i].sock >= 0 && data_sock[i].async_event_mask & 1 << LMK_ASYNC_EVENT_STAT) { + ctrl_data_write(i, (char*)packet_state_changed, len); + } + } +} + static void poll_kernel(int poll_fd) { if (poll_fd == -1) { // not waiting @@ -2183,8 +2225,7 @@ static int find_and_kill_process(int min_score_adj, enum kill_reasons kill_reaso if (killed_size >= 0) { if (!lmk_state_change_start) { lmk_state_change_start = true; - stats_write_lmk_state_changed( - android::lmkd::stats::LMK_STATE_CHANGED__STATE__START); + stats_write_lmk_state_changed(STATE_START); } break; } @@ -2195,7 +2236,7 @@ static int find_and_kill_process(int min_score_adj, enum kill_reasons kill_reaso } if (lmk_state_change_start) { - stats_write_lmk_state_changed(android::lmkd::stats::LMK_STATE_CHANGED__STATE__STOP); + stats_write_lmk_state_changed(STATE_STOP); } return killed_size; diff --git a/statslog.cpp b/statslog.cpp index 5560b6b..ba39f54 100644 --- a/statslog.cpp +++ b/statslog.cpp @@ -24,21 +24,27 @@ #include #include #include +#include +#include #include #include #include #ifdef LMKD_LOG_STATS -#define LINE_MAX 128 #define STRINGIFY(x) STRINGIFY_INTERNAL(x) #define STRINGIFY_INTERNAL(x) #x +/** + * Used to make sure that the payload is always smaller than LMKD_REPLY_MAX_SIZE + */ +#define BUILD_BUG_ON(cond) ((void)sizeof(char[1 - 2 * !!(cond)])) + static bool enable_stats_log = property_get_bool("ro.lmk.log_stats", true); struct proc { int pid; - char taskname[LINE_MAX]; + char taskname[MAX_TASKNAME_LEN]; struct proc* pidhash_next; }; @@ -46,20 +52,6 @@ struct proc { static struct proc** pidhash = NULL; #define pid_hashfn(x) ((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1)) -/** - * Logs the change in LMKD state which is used as start/stop boundaries for logging - * LMK_KILL_OCCURRED event. - * Code: LMK_STATE_CHANGED = 54 - */ -int -stats_write_lmk_state_changed(int32_t state) { - if (enable_stats_log) { - return android::lmkd::stats::stats_write(android::lmkd::stats::LMK_STATE_CHANGED, state); - } else { - return -EINVAL; - } -} - static struct proc* pid_lookup(int pid) { struct proc* procp; @@ -71,68 +63,11 @@ static struct proc* pid_lookup(int pid) { return procp; } -inline int32_t map_kill_reason(enum kill_reasons reason) { - switch (reason) { - case PRESSURE_AFTER_KILL: - return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__PRESSURE_AFTER_KILL; - case NOT_RESPONDING: - return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__NOT_RESPONDING; - case LOW_SWAP_AND_THRASHING: - return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__LOW_SWAP_AND_THRASHING; - case LOW_MEM_AND_SWAP: - return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__LOW_MEM_AND_SWAP; - case LOW_MEM_AND_THRASHING: - return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__LOW_MEM_AND_THRASHING; - case DIRECT_RECL_AND_THRASHING: - return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__DIRECT_RECL_AND_THRASHING; - case LOW_MEM_AND_SWAP_UTIL: - return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__LOW_MEM_AND_SWAP_UTIL; - default: - return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__UNKNOWN; - } -} - -/** - * Logs the event when LMKD kills a process to reduce memory pressure. - * Code: LMK_KILL_OCCURRED = 51 - */ -int stats_write_lmk_kill_occurred(struct kill_stat *kill_st, struct memory_stat *mem_st) { - if (enable_stats_log) { - return android::lmkd::stats::stats_write( - android::lmkd::stats::LMK_KILL_OCCURRED, - kill_st->uid, - kill_st->taskname, - kill_st->oom_score, - mem_st ? mem_st->pgfault : -1, - mem_st ? mem_st->pgmajfault : -1, - mem_st ? mem_st->rss_in_bytes : -1, - mem_st ? mem_st->cache_in_bytes : -1, - mem_st ? mem_st->swap_in_bytes : -1, - mem_st ? mem_st->process_start_time_ns : -1, - kill_st->min_oom_score, - kill_st->free_mem_kb, - kill_st->free_swap_kb, - map_kill_reason(kill_st->kill_reason) - ); - } else { - return -EINVAL; - } -} - -int stats_write_lmk_kill_occurred_pid(int pid, struct kill_stat *kill_st, - struct memory_stat* mem_st) { - struct proc* proc = pid_lookup(pid); - if (!proc) return -EINVAL; - - kill_st->taskname = proc->taskname; - return stats_write_lmk_kill_occurred(kill_st, mem_st); -} - static void memory_stat_parse_line(char* line, struct memory_stat* mem_st) { - char key[LINE_MAX + 1]; + char key[MAX_TASKNAME_LEN + 1]; int64_t value; - sscanf(line, "%" STRINGIFY(LINE_MAX) "s %" SCNd64 "", key, &value); + sscanf(line, "%" STRINGIFY(MAX_TASKNAME_LEN) "s %" SCNd64 "", key, &value); if (strcmp(key, "total_") < 0) { return; @@ -275,8 +210,8 @@ void stats_store_taskname(int pid, const char* taskname) { } procp = static_cast(malloc(sizeof(struct proc))); procp->pid = pid; - strncpy(procp->taskname, taskname, LINE_MAX - 1); - procp->taskname[LINE_MAX - 1] = '\0'; + strncpy(procp->taskname, taskname, MAX_TASKNAME_LEN - 1); + procp->taskname[MAX_TASKNAME_LEN - 1] = '\0'; proc_insert(procp); } @@ -299,4 +234,110 @@ void stats_purge_tasknames() { memset(pidhash, 0, PIDHASH_SZ * sizeof(*pidhash)); } +const char* stats_get_task_name(int pid) { + struct proc* proc = pid_lookup(pid); + return proc ? proc->taskname : NULL; +} + +/** + * Writes int32 in a machine independent way + * https://docs.oracle.com/javase/7/docs/api/java/io/DataOutput.html#writeInt(int) + */ +static inline size_t pack_int32(LMK_KILL_OCCURRED_PACKET packet, + size_t index, + int32_t value) { + int32_t* int_buffer = (int32_t*)(packet + index); + + *int_buffer = htonl(value); + + return index + sizeof(int32_t); +} + +/** + * Writes int64 in a machine independent way + * https://docs.oracle.com/javase/7/docs/api/java/io/DataOutput.html#writeLong(long) + */ +static inline size_t pack_int64(LMK_KILL_OCCURRED_PACKET packet, + size_t index, + int64_t value) { + int64_t* int64_buffer = (int64_t*)(packet + index); + + *int64_buffer = htonq(value); + + return index + sizeof(int64_t); +} + +/** + * Writes ANSI string in a machine independent way + * https://docs.oracle.com/javase/7/docs/api/java/io/DataOutput.html#writeShort(int) + * 2 bytes str len following n chars + * to be read on the Java side with + * https://docs.oracle.com/javase/7/docs/api/java/io/DataInput.html#readUTF() + * Truncates the value string & packs up to MAX_TASKNAME_LEN - 1 chars + */ +static inline size_t pack_string(LMK_KILL_OCCURRED_PACKET packet, + size_t index, + const char* value) { + const size_t len_proc_name = MIN(strlen(value), MAX_TASKNAME_LEN - 1); + int16_t* short_buffer = (int16_t*)(packet + index); + *short_buffer = htons((int16_t)len_proc_name); + + char* byte_buffer = (char*)(short_buffer + 1); + strncpy(byte_buffer, value, MAX_TASKNAME_LEN - 1); + byte_buffer[MAX_TASKNAME_LEN - 1] = '\0'; + + return index + sizeof(int16_t) + len_proc_name + 1; +} + +size_t lmkd_pack_set_kill_occurred(LMK_KILL_OCCURRED_PACKET packet, + struct kill_stat *kill_stat, + struct memory_stat *mem_stat) { + BUILD_BUG_ON(sizeof(LMK_KILL_OCCURRED_PACKET) > LMKD_REPLY_MAX_SIZE); + + if (!enable_stats_log) { + return 0; + } + + int32_t index = 0; + index = pack_int32(packet, index, LMK_STAT_KILL_OCCURRED); + + if (mem_stat) { + index = pack_int64(packet, index, mem_stat->pgfault); + index = pack_int64(packet, index, mem_stat->pgmajfault); + index = pack_int64(packet, index, mem_stat->rss_in_bytes); + index = pack_int64(packet, index, mem_stat->cache_in_bytes); + index = pack_int64(packet, index, mem_stat->swap_in_bytes); + index = pack_int64(packet, index, mem_stat->process_start_time_ns); + } else { + index = pack_int64(packet, index, -1); + index = pack_int64(packet, index, -1); + index = pack_int64(packet, index, -1); + index = pack_int64(packet, index, -1); + index = pack_int64(packet, index, -1); + index = pack_int64(packet, index, -1); + } + + index = pack_int32(packet, index, kill_stat->uid); + index = pack_int32(packet, index, kill_stat->oom_score); + index = pack_int32(packet, index, kill_stat->min_oom_score); + index = pack_int32(packet, index, (int)kill_stat->free_mem_kb); + index = pack_int32(packet, index, (int)kill_stat->free_swap_kb); + index = pack_int32(packet, index, (int)kill_stat->kill_reason); + + index = pack_string(packet, index, kill_stat->taskname); + return index; +} + +size_t lmkd_pack_set_state_changed(LMKD_CTRL_PACKET packet, + enum lmk_state state) { + if (!enable_stats_log) { + return 0; + } + + packet[0] = htonl(LMK_STAT_STATE_CHANGED); + packet[1] = htonl(state); + + return 2 * sizeof(int); +} + #endif /* LMKD_LOG_STATS */ diff --git a/statslog.h b/statslog.h index 628775f..e0859fa 100644 --- a/statslog.h +++ b/statslog.h @@ -17,9 +17,10 @@ #ifndef _STATSLOG_H_ #define _STATSLOG_H_ +#include + #include #include -#include #include #include #include @@ -28,6 +29,21 @@ __BEGIN_DECLS +#define MAX_TASKNAME_LEN 128 + +/* + * Max LMKD reply packet length in bytes + * Notes about size calculation: + * 4 bytes for packet type + * 80 bytes for the LmkKillOccurred fields: memory_stat + kill_stat + * 2 bytes for process name string size + * MAX_TASKNAME_LEN bytes for the process name string + * + * Must be in sync with LmkdConnection.java + */ +#define LMKD_REPLY_MAX_SIZE 214 + +/* LMK_MEMORY_STATS packet payload */ struct memory_stat { int64_t pgfault; int64_t pgmajfault; @@ -37,7 +53,7 @@ struct memory_stat { int64_t process_start_time_ns; }; -// If you update this, also update the corresponding stats enum mapping. +// If you update this, also update the corresponding stats enum mapping and LmkdStatsReporter.java enum kill_reasons { NONE = -1, /* To denote no kill condition */ PRESSURE_AFTER_KILL = 0, @@ -50,9 +66,10 @@ enum kill_reasons { KILL_REASON_COUNT }; +/* LMK_KILL_STAT packet payload */ struct kill_stat { int32_t uid; - char *taskname; + const char *taskname; enum kill_reasons kill_reason; int32_t oom_score; int32_t min_oom_score; @@ -60,6 +77,16 @@ struct kill_stat { int64_t free_swap_kb; }; +/* LMKD reply packet to hold data for the LmkKillOccurred statsd atom */ +typedef char LMK_KILL_OCCURRED_PACKET[LMKD_REPLY_MAX_SIZE]; + +// If you update this, also update the corresponding stats enum mapping. +enum lmk_state { + STATE_UNKNOWN = 0, + STATE_START, + STATE_STOP, +}; + #ifdef LMKD_LOG_STATS #define MEMCG_PROCESS_MEMORY_STAT_PATH "/dev/memcg/apps/uid_%u/pid_%d/memory.stat" @@ -68,25 +95,20 @@ struct kill_stat { #define BYTES_IN_KILOBYTE 1024 /** - * Logs the change in LMKD state which is used as start/stop boundaries for logging + * Produces packet with the change in LMKD state which is used as start/stop boundaries for logging * LMK_KILL_OCCURRED event. * Code: LMK_STATE_CHANGED = 54 */ -int -stats_write_lmk_state_changed(int32_t state); +size_t lmkd_pack_set_state_changed(LMKD_CTRL_PACKET packet, + enum lmk_state state); /** - * Logs the event when LMKD kills a process to reduce memory pressure. + * Produces packet with the event when LMKD kills a process to reduce memory pressure. * Code: LMK_KILL_OCCURRED = 51 */ -int stats_write_lmk_kill_occurred(struct kill_stat *kill_st, struct memory_stat *mem_st); - -/** - * Logs the event when LMKD kills a process to reduce memory pressure. - * Code: LMK_KILL_OCCURRED = 51 - */ -int stats_write_lmk_kill_occurred_pid(int pid, struct kill_stat *kill_st, - struct memory_stat* mem_st); +size_t lmkd_pack_set_kill_occurred(LMK_KILL_OCCURRED_PACKET packet, + struct kill_stat *kill_st, + struct memory_stat *mem_st); /** * Reads memory stats used to log the statsd atom. Returns non-null ptr on success. @@ -109,19 +131,19 @@ void stats_purge_tasknames(); */ void stats_remove_taskname(int pid); +const char* stats_get_task_name(int pid); + #else /* LMKD_LOG_STATS */ -static inline int -stats_write_lmk_state_changed(int32_t state __unused) { return -EINVAL; } - -static inline int -stats_write_lmk_kill_occurred(struct kill_stat *kill_st __unused, - struct memory_stat *mem_st __unused) { +static inline size_t +lmkd_pack_set_state_changed(LMKD_CTRL_PACKET packet __unused, enum lmk_state state __unused) { return -EINVAL; } -int stats_write_lmk_kill_occurred_pid(int pid __unused, struct kill_stat *kill_st __unused, - struct memory_stat* mem_st __unused) { +static inline size_t +lmkd_pack_set_kill_occurred(LMK_KILL_OCCURRED_PACKET packet __unused, + struct kill_stat *kill_st __unused, + struct memory_stat *mem_st __unused) { return -EINVAL; } @@ -137,6 +159,8 @@ static inline void stats_purge_tasknames() {} static inline void stats_remove_taskname(int pid __unused) {} +static inline const char* stats_get_task_name(int pid __unused) { return NULL; } + #endif /* LMKD_LOG_STATS */ __END_DECLS