lmkd: report kill reason, and meminfo details to statsd for each kill

Information like free memory and swap as well as kill reason would be
useful for understanding regressions in the number of lmk kills in the
field.

Bug: 168117803
Change-Id: Ic46aed3c85b880b32ac5ad61b55f90e0d33517c7
Test: statsd_testdrive 51, load with lmk_unit_test
This commit is contained in:
Suren Baghdasaryan 2020-09-09 20:19:02 -07:00 committed by Ioannis Ilkos
parent d65f63c7d4
commit 3cc1f13044
3 changed files with 88 additions and 44 deletions

View File

@ -800,8 +800,16 @@ static void poll_kernel(int poll_fd) {
ctrl_data_write_lmk_kill_occurred((pid_t)pid, (uid_t)uid);
mem_st.process_start_time_ns = starttime * (NS_PER_SEC / sysconf(_SC_CLK_TCK));
mem_st.rss_in_bytes = rss_in_pages * PAGE_SIZE;
stats_write_lmk_kill_occurred_pid(uid, pid, oom_score_adj,
min_score_adj, &mem_st);
struct kill_stat kill_st = {
.uid = static_cast<int32_t>(uid),
.kill_reason = NONE,
.oom_score = oom_score_adj,
.min_oom_score = min_score_adj,
.free_mem_kb = 0,
.free_swap_kb = 0,
};
stats_write_lmk_kill_occurred_pid(pid, &kill_st, &mem_st);
}
free(taskname);
@ -2045,7 +2053,7 @@ static void start_wait_for_proc_kill(int pid_or_fd) {
}
/* Kill one process specified by procp. Returns the size (in pages) of the process killed */
static int kill_one_process(struct proc* procp, int min_oom_score, int kill_reason,
static int kill_one_process(struct proc* procp, int min_oom_score, enum kill_reasons kill_reason,
const char *kill_desc, union meminfo *mi, struct wakeup_info *wi,
struct timespec *tm) {
int pid = procp->pid;
@ -2055,6 +2063,7 @@ static int kill_one_process(struct proc* procp, int min_oom_score, int kill_reas
int r;
int result = -1;
struct memory_stat *mem_st;
struct kill_stat kill_st;
int64_t tgid;
int64_t rss_kb;
int64_t swap_kb;
@ -2123,7 +2132,14 @@ static int kill_one_process(struct proc* procp, int min_oom_score, int kill_reas
uid, procp->oomadj, rss_kb);
}
stats_write_lmk_kill_occurred(uid, taskname, procp->oomadj, min_oom_score, mem_st);
kill_st.uid = static_cast<int32_t>(uid);
kill_st.taskname = taskname;
kill_st.kill_reason = kill_reason;
kill_st.oom_score = procp->oomadj;
kill_st.min_oom_score = min_oom_score;
kill_st.free_mem_kb = mi->field.nr_free_pages * page_k;
kill_st.free_swap_kb = mi->field.free_swap * page_k;
stats_write_lmk_kill_occurred(&kill_st, mem_st);
ctrl_data_write_lmk_kill_occurred((pid_t)pid, uid);
@ -2142,8 +2158,9 @@ out:
* Find one process to kill at or above the given oom_adj level.
* Returns size of the killed process.
*/
static int find_and_kill_process(int min_score_adj, int kill_reason, const char *kill_desc,
union meminfo *mi, struct wakeup_info *wi, struct timespec *tm) {
static int find_and_kill_process(int min_score_adj, enum kill_reasons kill_reason,
const char *kill_desc, union meminfo *mi,
struct wakeup_info *wi, struct timespec *tm) {
int i;
int killed_size = 0;
bool lmk_state_change_start = false;
@ -2297,17 +2314,6 @@ static int calc_swap_utilization(union meminfo *mi) {
}
static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_params) {
enum kill_reasons {
NONE = -1, /* To denote no kill condition */
PRESSURE_AFTER_KILL = 0,
NOT_RESPONDING,
LOW_SWAP_AND_THRASHING,
LOW_MEM_AND_SWAP,
LOW_MEM_AND_THRASHING,
DIRECT_RECL_AND_THRASHING,
LOW_MEM_AND_SWAP_UTIL,
KILL_REASON_COUNT
};
enum reclaim_state {
NO_RECLAIM = 0,
KSWAPD_RECLAIM,
@ -2756,7 +2762,7 @@ static void mp_event_common(int data, uint32_t events, struct polling_params *po
do_kill:
if (low_ram_device) {
/* For Go devices kill only one task */
if (find_and_kill_process(level_oomadj[level], -1, NULL, &mi, &wi, &curr_tm) == 0) {
if (find_and_kill_process(level_oomadj[level], NONE, NULL, &mi, &wi, &curr_tm) == 0) {
if (debug_process_killing) {
ALOGI("Nothing to kill");
}
@ -2779,7 +2785,7 @@ do_kill:
min_score_adj = level_oomadj[level];
}
pages_freed = find_and_kill_process(min_score_adj, -1, NULL, &mi, &wi, &curr_tm);
pages_freed = find_and_kill_process(min_score_adj, NONE, NULL, &mi, &wi, &curr_tm);
if (pages_freed == 0) {
/* Rate limit kill reports when nothing was reclaimed */

View File

@ -71,40 +71,61 @@ static struct proc* pid_lookup(int pid) {
return procp;
}
inline int32_t map_kill_reason(enum kill_reasons reason) {
switch (reason) {
case PRESSURE_AFTER_KILL:
return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__PRESSURE_AFTER_KILL;
case NOT_RESPONDING:
return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__NOT_RESPONDING;
case LOW_SWAP_AND_THRASHING:
return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__LOW_SWAP_AND_THRASHING;
case LOW_MEM_AND_SWAP:
return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__LOW_MEM_AND_SWAP;
case LOW_MEM_AND_THRASHING:
return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__LOW_MEM_AND_THRASHING;
case DIRECT_RECL_AND_THRASHING:
return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__DIRECT_RECL_AND_THRASHING;
case LOW_MEM_AND_SWAP_UTIL:
return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__LOW_MEM_AND_SWAP_UTIL;
default:
return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__UNKNOWN;
}
}
/**
* Logs the event when LMKD kills a process to reduce memory pressure.
* Code: LMK_KILL_OCCURRED = 51
*/
int
stats_write_lmk_kill_occurred(int32_t uid, char const* process_name,
int32_t oom_score, int32_t min_oom_score,
struct memory_stat *mem_st) {
int stats_write_lmk_kill_occurred(struct kill_stat *kill_st, struct memory_stat *mem_st) {
if (enable_stats_log) {
return android::lmkd::stats::stats_write(
android::lmkd::stats::LMK_KILL_OCCURRED,
uid,
process_name,
oom_score,
kill_st->uid,
kill_st->taskname,
kill_st->oom_score,
mem_st ? mem_st->pgfault : -1,
mem_st ? mem_st->pgmajfault : -1,
mem_st ? mem_st->rss_in_bytes : -1,
mem_st ? mem_st->cache_in_bytes : -1,
mem_st ? mem_st->swap_in_bytes : -1,
mem_st ? mem_st->process_start_time_ns : -1,
min_oom_score
kill_st->min_oom_score,
kill_st->free_mem_kb,
kill_st->free_swap_kb,
map_kill_reason(kill_st->kill_reason)
);
} else {
return -EINVAL;
}
}
int stats_write_lmk_kill_occurred_pid(int32_t uid, int pid, int32_t oom_score,
int32_t min_oom_score,
int stats_write_lmk_kill_occurred_pid(int pid, struct kill_stat *kill_st,
struct memory_stat* mem_st) {
struct proc* proc = pid_lookup(pid);
if (!proc) return -EINVAL;
return stats_write_lmk_kill_occurred(uid, proc->taskname, oom_score, min_oom_score, mem_st);
kill_st->taskname = proc->taskname;
return stats_write_lmk_kill_occurred(kill_st, mem_st);
}
static void memory_stat_parse_line(char* line, struct memory_stat* mem_st) {

View File

@ -37,6 +37,29 @@ struct memory_stat {
int64_t process_start_time_ns;
};
// If you update this, also update the corresponding stats enum mapping.
enum kill_reasons {
NONE = -1, /* To denote no kill condition */
PRESSURE_AFTER_KILL = 0,
NOT_RESPONDING,
LOW_SWAP_AND_THRASHING,
LOW_MEM_AND_SWAP,
LOW_MEM_AND_THRASHING,
DIRECT_RECL_AND_THRASHING,
LOW_MEM_AND_SWAP_UTIL,
KILL_REASON_COUNT
};
struct kill_stat {
int32_t uid;
char *taskname;
enum kill_reasons kill_reason;
int32_t oom_score;
int32_t min_oom_score;
int64_t free_mem_kb;
int64_t free_swap_kb;
};
#ifdef LMKD_LOG_STATS
#define MEMCG_PROCESS_MEMORY_STAT_PATH "/dev/memcg/apps/uid_%u/pid_%d/memory.stat"
@ -56,17 +79,13 @@ stats_write_lmk_state_changed(int32_t state);
* Logs the event when LMKD kills a process to reduce memory pressure.
* Code: LMK_KILL_OCCURRED = 51
*/
int
stats_write_lmk_kill_occurred(int32_t uid, char const* process_name,
int32_t oom_score, int32_t min_oom_score,
struct memory_stat *mem_st);
int stats_write_lmk_kill_occurred(struct kill_stat *kill_st, struct memory_stat *mem_st);
/**
* Logs the event when LMKD kills a process to reduce memory pressure.
* Code: LMK_KILL_OCCURRED = 51
*/
int stats_write_lmk_kill_occurred_pid(int32_t uid, int pid, int32_t oom_score,
int32_t min_oom_score,
int stats_write_lmk_kill_occurred_pid(int pid, struct kill_stat *kill_st,
struct memory_stat* mem_st);
/**
@ -96,15 +115,13 @@ static inline int
stats_write_lmk_state_changed(int32_t state __unused) { return -EINVAL; }
static inline int
stats_write_lmk_kill_occurred(int32_t uid __unused,
char const* process_name __unused, int32_t oom_score __unused,
int32_t min_oom_score __unused,
struct memory_stat *mem_st __unused) { return -EINVAL; }
stats_write_lmk_kill_occurred(struct kill_stat *kill_st __unused,
struct memory_stat *mem_st __unused) {
return -EINVAL;
}
static inline int stats_write_lmk_kill_occurred_pid(int32_t uid __unused,
int pid __unused, int32_t oom_score __unused,
int32_t min_oom_score __unused,
struct memory_stat* mem_st __unused) {
int stats_write_lmk_kill_occurred_pid(int pid __unused, struct kill_stat *kill_st __unused,
struct memory_stat* mem_st __unused) {
return -EINVAL;
}