From e16047516d05d8fa125a99e0cbb5326c72496efb Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 22 Jul 2021 16:21:21 -0700 Subject: [PATCH] lmkd: Add current and max thrashing levels in LMK_MEMORY_STATS reports Thrashing threshold tuning requires collecting thrashing level data from the field and correlating these levels with other indications of device being non-responsive. Include current and max thrashing levels in the lmkd kill reports. Max thrashing level captures the highest level seen since the last kill report. Bug: 194433891 Signed-off-by: Suren Baghdasaryan Change-Id: I8a34dc41e7f03668bfad4ac2cbcb5d2570a10752 Merged-In: I8a34dc41e7f03668bfad4ac2cbcb5d2570a10752 --- lmkd.cpp | 52 ++++++++++++++++++++++++++++++++++++---------------- statslog.cpp | 2 ++ statslog.h | 6 ++++-- 3 files changed, 42 insertions(+), 18 deletions(-) diff --git a/lmkd.cpp b/lmkd.cpp index f3c301e..2f0df91 100644 --- a/lmkd.cpp +++ b/lmkd.cpp @@ -2110,10 +2110,16 @@ static void start_wait_for_proc_kill(int pid_or_fd) { maxevents++; } +struct kill_info { + enum kill_reasons kill_reason; + const char *kill_desc; + int thrashing; + int max_thrashing; +}; + /* Kill one process specified by procp. Returns the size (in pages) of the process killed */ -static int kill_one_process(struct proc* procp, int min_oom_score, enum kill_reasons kill_reason, - const char *kill_desc, union meminfo *mi, struct wakeup_info *wi, - struct timespec *tm) { +static int kill_one_process(struct proc* procp, int min_oom_score, struct kill_info *ki, + union meminfo *mi, struct wakeup_info *wi, struct timespec *tm) { int pid = procp->pid; int pidfd = procp->pidfd; uid_t uid = procp->uid; @@ -2180,19 +2186,25 @@ static int kill_one_process(struct proc* procp, int min_oom_score, enum kill_rea inc_killcnt(procp->oomadj); - killinfo_log(procp, min_oom_score, rss_kb, swap_kb, kill_reason, mi, wi, tm); - - if (kill_desc) { + if (ki) { + kill_st.kill_reason = ki->kill_reason; + kill_st.thrashing = ki->thrashing; + kill_st.max_thrashing = ki->max_thrashing; + killinfo_log(procp, min_oom_score, rss_kb, swap_kb, ki->kill_reason, mi, wi, tm); ALOGI("Kill '%s' (%d), uid %d, oom_score_adj %d to free %" PRId64 "kB rss, %" PRId64 - "kB swap; reason: %s", taskname, pid, uid, procp->oomadj, rss_kb, swap_kb, kill_desc); + "kB swap; reason: %s", taskname, pid, uid, procp->oomadj, rss_kb, swap_kb, + ki->kill_desc); } else { + kill_st.kill_reason = NONE; + kill_st.thrashing = 0; + kill_st.max_thrashing = 0; + killinfo_log(procp, min_oom_score, rss_kb, swap_kb, NONE, mi, wi, tm); ALOGI("Kill '%s' (%d), uid %d, oom_score_adj %d to free %" PRId64 "kB rss, %" PRId64 "kb swap", taskname, pid, uid, procp->oomadj, rss_kb, swap_kb); } kill_st.uid = static_cast(uid); kill_st.taskname = taskname; - kill_st.kill_reason = kill_reason; kill_st.oom_score = procp->oomadj; kill_st.min_oom_score = min_oom_score; kill_st.free_mem_kb = mi->field.nr_free_pages * page_k; @@ -2216,8 +2228,7 @@ out: * Find one process to kill at or above the given oom_score_adj level. * Returns size of the killed process. */ -static int find_and_kill_process(int min_score_adj, enum kill_reasons kill_reason, - const char *kill_desc, union meminfo *mi, +static int find_and_kill_process(int min_score_adj, struct kill_info *ki, union meminfo *mi, struct wakeup_info *wi, struct timespec *tm) { int i; int killed_size = 0; @@ -2242,8 +2253,7 @@ static int find_and_kill_process(int min_score_adj, enum kill_reasons kill_reaso if (!procp) break; - killed_size = kill_one_process(procp, min_score_adj, kill_reason, kill_desc, - mi, wi, tm); + killed_size = kill_one_process(procp, min_score_adj, ki, mi, wi, tm); if (killed_size >= 0) { if (!lmk_state_change_start) { lmk_state_change_start = true; @@ -2399,6 +2409,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ static struct timespec thrashing_reset_tm; static int64_t prev_thrash_growth = 0; static bool check_filecache = false; + static int max_thrashing = 0; union meminfo mi; union vmstat vs; @@ -2524,6 +2535,9 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ } /* Add previous cycle's decayed thrashing amount */ thrashing += prev_thrash_growth; + if (max_thrashing < thrashing) { + max_thrashing = thrashing; + } /* * Refresh watermarks once per min in case user updated one of the margins. @@ -2636,10 +2650,16 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ /* Kill a process if necessary */ if (kill_reason != NONE) { - int pages_freed = find_and_kill_process(min_score_adj, kill_reason, kill_desc, &mi, - &wi, &curr_tm); + struct kill_info ki = { + .kill_reason = kill_reason, + .kill_desc = kill_desc, + .thrashing = (int)thrashing, + .max_thrashing = max_thrashing, + }; + int pages_freed = find_and_kill_process(min_score_adj, &ki, &mi, &wi, &curr_tm); if (pages_freed > 0) { killing = true; + max_thrashing = 0; if (cut_thrashing_limit) { /* * Cut thrasing limit by thrashing_limit_decay_pct percentage of the current @@ -2856,7 +2876,7 @@ static void mp_event_common(int data, uint32_t events, struct polling_params *po do_kill: if (low_ram_device) { /* For Go devices kill only one task */ - if (find_and_kill_process(level_oomadj[level], NONE, NULL, &mi, &wi, &curr_tm) == 0) { + if (find_and_kill_process(level_oomadj[level], NULL, &mi, &wi, &curr_tm) == 0) { if (debug_process_killing) { ALOGI("Nothing to kill"); } @@ -2879,7 +2899,7 @@ do_kill: min_score_adj = level_oomadj[level]; } - pages_freed = find_and_kill_process(min_score_adj, NONE, NULL, &mi, &wi, &curr_tm); + pages_freed = find_and_kill_process(min_score_adj, NULL, &mi, &wi, &curr_tm); if (pages_freed == 0) { /* Rate limit kill reports when nothing was reclaimed */ diff --git a/statslog.cpp b/statslog.cpp index ba39f54..6568f73 100644 --- a/statslog.cpp +++ b/statslog.cpp @@ -323,6 +323,8 @@ size_t lmkd_pack_set_kill_occurred(LMK_KILL_OCCURRED_PACKET packet, index = pack_int32(packet, index, (int)kill_stat->free_mem_kb); index = pack_int32(packet, index, (int)kill_stat->free_swap_kb); index = pack_int32(packet, index, (int)kill_stat->kill_reason); + index = pack_int32(packet, index, kill_stat->thrashing); + index = pack_int32(packet, index, kill_stat->max_thrashing); index = pack_string(packet, index, kill_stat->taskname); return index; diff --git a/statslog.h b/statslog.h index 44af35f..89e4d2e 100644 --- a/statslog.h +++ b/statslog.h @@ -35,13 +35,13 @@ __BEGIN_DECLS * Max LMKD reply packet length in bytes * Notes about size calculation: * 4 bytes for packet type - * 80 bytes for the LmkKillOccurred fields: memory_stat + kill_stat + * 88 bytes for the LmkKillOccurred fields: memory_stat + kill_stat * 2 bytes for process name string size * MAX_TASKNAME_LEN bytes for the process name string * * Must be in sync with LmkdConnection.java */ -#define LMKD_REPLY_MAX_SIZE 214 +#define LMKD_REPLY_MAX_SIZE 222 /* LMK_MEMORY_STATS packet payload */ struct memory_stat { @@ -76,6 +76,8 @@ struct kill_stat { int32_t min_oom_score; int64_t free_mem_kb; int64_t free_swap_kb; + int32_t thrashing; + int32_t max_thrashing; }; /* LMKD reply packet to hold data for the LmkKillOccurred statsd atom */