lmkd: report kill reason, and meminfo details to statsd for each kill

Information like free memory and swap as well as kill reason would be
useful for understanding regressions in the number of lmk kills in the
field.

Bug: 168117803
Test: statsd_testdrive 51, load with lmk_unit_test
Merged-In: Ic46aed3c85b880b32ac5ad61b55f90e0d33517c7
Change-Id: Ic46aed3c85b880b32ac5ad61b55f90e0d33517c7
This commit is contained in:
Suren Baghdasaryan 2020-09-09 20:19:02 -07:00
parent d816ab7c54
commit 140385d3a0
3 changed files with 93 additions and 46 deletions

View File

@ -797,8 +797,18 @@ static void poll_kernel(int poll_fd) {
ctrl_data_write_lmk_kill_occurred((pid_t)pid, (uid_t)uid);
mem_st.process_start_time_ns = starttime * (NS_PER_SEC / sysconf(_SC_CLK_TCK));
mem_st.rss_in_bytes = rss_in_pages * PAGE_SIZE;
stats_write_lmk_kill_occurred_pid(uid, pid, oom_score_adj,
min_score_adj, 0, &mem_st);
struct kill_stat kill_st = {
.uid = static_cast<int32_t>(uid),
.kill_reason = NONE,
.oom_score = oom_score_adj,
.min_oom_score = min_score_adj,
.free_mem_kb = 0,
.free_swap_kb = 0,
.tasksize = 0,
};
stats_write_lmk_kill_occurred_pid(pid, &kill_st, &mem_st);
}
free(taskname);
@ -2041,7 +2051,7 @@ static void start_wait_for_proc_kill(int pid_or_fd) {
}
/* Kill one process specified by procp. Returns the size of the process killed */
static int kill_one_process(struct proc* procp, int min_oom_score, int kill_reason,
static int kill_one_process(struct proc* procp, int min_oom_score, enum kill_reasons kill_reason,
const char *kill_desc, union meminfo *mi, struct wakeup_info *wi,
struct timespec *tm) {
int pid = procp->pid;
@ -2054,6 +2064,7 @@ static int kill_one_process(struct proc* procp, int min_oom_score, int kill_reas
int result = -1;
struct memory_stat *mem_st;
char buf[LINE_MAX];
struct kill_stat kill_st;
tgid = proc_get_tgid(pid);
if (tgid >= 0 && tgid != pid) {
@ -2109,7 +2120,15 @@ static int kill_one_process(struct proc* procp, int min_oom_score, int kill_reas
uid, procp->oomadj, tasksize * page_k);
}
stats_write_lmk_kill_occurred(uid, taskname, procp->oomadj, min_oom_score, tasksize, mem_st);
kill_st.uid = static_cast<int32_t>(uid);
kill_st.taskname = taskname;
kill_st.kill_reason = kill_reason;
kill_st.oom_score = procp->oomadj;
kill_st.min_oom_score = min_oom_score;
kill_st.free_mem_kb = mi->field.nr_free_pages * page_k;
kill_st.free_swap_kb = mi->field.free_swap * page_k;
kill_st.tasksize = tasksize;
stats_write_lmk_kill_occurred(&kill_st, mem_st);
ctrl_data_write_lmk_kill_occurred((pid_t)pid, uid);
@ -2128,8 +2147,9 @@ out:
* Find one process to kill at or above the given oom_adj level.
* Returns size of the killed process.
*/
static int find_and_kill_process(int min_score_adj, int kill_reason, const char *kill_desc,
union meminfo *mi, struct wakeup_info *wi, struct timespec *tm) {
static int find_and_kill_process(int min_score_adj, enum kill_reasons kill_reason,
const char *kill_desc, union meminfo *mi,
struct wakeup_info *wi, struct timespec *tm) {
int i;
int killed_size = 0;
bool lmk_state_change_start = false;
@ -2276,16 +2296,6 @@ void calc_zone_watermarks(struct zoneinfo *zi, struct zone_watermarks *watermark
}
static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_params) {
enum kill_reasons {
NONE = -1, /* To denote no kill condition */
PRESSURE_AFTER_KILL = 0,
NOT_RESPONDING,
LOW_SWAP_AND_THRASHING,
LOW_MEM_AND_SWAP,
LOW_MEM_AND_THRASHING,
DIRECT_RECL_AND_THRASHING,
KILL_REASON_COUNT
};
enum reclaim_state {
NO_RECLAIM = 0,
KSWAPD_RECLAIM,
@ -2723,7 +2733,7 @@ static void mp_event_common(int data, uint32_t events, struct polling_params *po
do_kill:
if (low_ram_device) {
/* For Go devices kill only one task */
if (find_and_kill_process(level_oomadj[level], -1, NULL, &mi, &wi, &curr_tm) == 0) {
if (find_and_kill_process(level_oomadj[level], NONE, NULL, &mi, &wi, &curr_tm) == 0) {
if (debug_process_killing) {
ALOGI("Nothing to kill");
}
@ -2746,7 +2756,7 @@ do_kill:
min_score_adj = level_oomadj[level];
}
pages_freed = find_and_kill_process(min_score_adj, -1, NULL, &mi, &wi, &curr_tm);
pages_freed = find_and_kill_process(min_score_adj, NONE, NULL, &mi, &wi, &curr_tm);
if (pages_freed == 0) {
/* Rate limit kill reports when nothing was reclaimed */

View File

@ -69,41 +69,61 @@ static struct proc* pid_lookup(int pid) {
return procp;
}
inline int32_t map_kill_reason(enum kill_reasons reason) {
switch (reason) {
case PRESSURE_AFTER_KILL:
return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__PRESSURE_AFTER_KILL;
case NOT_RESPONDING:
return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__NOT_RESPONDING;
case LOW_SWAP_AND_THRASHING:
return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__LOW_SWAP_AND_THRASHING;
case LOW_MEM_AND_SWAP:
return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__LOW_MEM_AND_SWAP;
case LOW_MEM_AND_THRASHING:
return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__LOW_MEM_AND_THRASHING;
case DIRECT_RECL_AND_THRASHING:
return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__DIRECT_RECL_AND_THRASHING;
case LOW_MEM_AND_SWAP_UTIL:
return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__LOW_MEM_AND_SWAP_UTIL;
default:
return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__UNKNOWN;
}
}
/**
* Logs the event when LMKD kills a process to reduce memory pressure.
* Code: LMK_KILL_OCCURRED = 51
*/
int
stats_write_lmk_kill_occurred(int32_t uid, char const* process_name,
int32_t oom_score, int32_t min_oom_score, int tasksize,
struct memory_stat *mem_st) {
int stats_write_lmk_kill_occurred(struct kill_stat *kill_st, struct memory_stat *mem_st) {
if (enable_stats_log) {
return android::lmkd::stats::stats_write(
android::lmkd::stats::LMK_KILL_OCCURRED,
uid,
process_name,
oom_score,
kill_st->uid,
kill_st->taskname,
kill_st->oom_score,
mem_st ? mem_st->pgfault : -1,
mem_st ? mem_st->pgmajfault : -1,
mem_st ? mem_st->rss_in_bytes : tasksize * BYTES_IN_KILOBYTE,
mem_st ? mem_st->rss_in_bytes : kill_st->tasksize * BYTES_IN_KILOBYTE,
mem_st ? mem_st->cache_in_bytes : -1,
mem_st ? mem_st->swap_in_bytes : -1,
mem_st ? mem_st->process_start_time_ns : -1,
min_oom_score
kill_st->min_oom_score,
kill_st->free_mem_kb,
kill_st->free_swap_kb,
map_kill_reason(kill_st->kill_reason)
);
} else {
return -EINVAL;
}
}
int stats_write_lmk_kill_occurred_pid(int32_t uid, int pid, int32_t oom_score,
int32_t min_oom_score, int tasksize,
int stats_write_lmk_kill_occurred_pid(int pid, struct kill_stat *kill_st,
struct memory_stat* mem_st) {
struct proc* proc = pid_lookup(pid);
if (!proc) return -EINVAL;
return stats_write_lmk_kill_occurred(uid, proc->taskname, oom_score, min_oom_score,
tasksize, mem_st);
kill_st->taskname = proc->taskname;
return stats_write_lmk_kill_occurred(kill_st, mem_st);
}
static void memory_stat_parse_line(char* line, struct memory_stat* mem_st) {

View File

@ -37,6 +37,30 @@ struct memory_stat {
int64_t process_start_time_ns;
};
// If you update this, also update the corresponding stats enum mapping.
enum kill_reasons {
NONE = -1, /* To denote no kill condition */
PRESSURE_AFTER_KILL = 0,
NOT_RESPONDING,
LOW_SWAP_AND_THRASHING,
LOW_MEM_AND_SWAP,
LOW_MEM_AND_THRASHING,
DIRECT_RECL_AND_THRASHING,
LOW_MEM_AND_SWAP_UTIL,
KILL_REASON_COUNT
};
struct kill_stat {
int32_t uid;
char *taskname;
enum kill_reasons kill_reason;
int32_t oom_score;
int32_t min_oom_score;
int64_t free_mem_kb;
int64_t free_swap_kb;
int tasksize;
};
#ifdef LMKD_LOG_STATS
#define MEMCG_PROCESS_MEMORY_STAT_PATH "/dev/memcg/apps/uid_%u/pid_%u/memory.stat"
@ -56,17 +80,13 @@ stats_write_lmk_state_changed(int32_t state);
* Logs the event when LMKD kills a process to reduce memory pressure.
* Code: LMK_KILL_OCCURRED = 51
*/
int
stats_write_lmk_kill_occurred(int32_t uid, char const* process_name,
int32_t oom_score, int32_t min_oom_score,
int tasksize, struct memory_stat *mem_st);
int stats_write_lmk_kill_occurred(struct kill_stat *kill_st, struct memory_stat *mem_st);
/**
* Logs the event when LMKD kills a process to reduce memory pressure.
* Code: LMK_KILL_OCCURRED = 51
*/
int stats_write_lmk_kill_occurred_pid(int32_t uid, int pid, int32_t oom_score,
int32_t min_oom_score, int tasksize,
int stats_write_lmk_kill_occurred_pid(int pid, struct kill_stat *kill_st,
struct memory_stat* mem_st);
struct memory_stat *stats_read_memory_stat(bool per_app_memcg, int pid, uid_t uid);
@ -92,16 +112,13 @@ static inline int
stats_write_lmk_state_changed(int32_t state __unused) { return -EINVAL; }
static inline int
stats_write_lmk_kill_occurred(int32_t uid __unused,
char const* process_name __unused, int32_t oom_score __unused,
int32_t min_oom_score __unused, int tasksize __unused,
struct memory_stat *mem_st __unused) { return -EINVAL; }
stats_write_lmk_kill_occurred(struct kill_stat *kill_st __unused,
struct memory_stat *mem_st __unused) {
return -EINVAL;
}
static inline int stats_write_lmk_kill_occurred_pid(int32_t uid __unused,
int pid __unused, int32_t oom_score __unused,
int32_t min_oom_score __unused,
int tasksize __unused,
struct memory_stat* mem_st __unused) {
int stats_write_lmk_kill_occurred_pid(int pid __unused, struct kill_stat *kill_st __unused,
struct memory_stat* mem_st __unused) {
return -EINVAL;
}