lmkd: Add current and max thrashing levels in LMK_MEMORY_STATS reports

Thrashing threshold tuning requires collecting thrashing level data from
the field and correlating these levels with other indications of device
being non-responsive.
Include current and max thrashing levels in the lmkd kill reports. Max
thrashing level captures the highest level seen since the last kill report.

Bug: 194433891
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Change-Id: I8a34dc41e7f03668bfad4ac2cbcb5d2570a10752
This commit is contained in:
Suren Baghdasaryan 2021-07-22 16:21:21 -07:00
parent dded82877f
commit 81bb801dd2
3 changed files with 42 additions and 18 deletions

View File

@ -2110,10 +2110,16 @@ static void start_wait_for_proc_kill(int pid_or_fd) {
maxevents++;
}
struct kill_info {
enum kill_reasons kill_reason;
const char *kill_desc;
int thrashing;
int max_thrashing;
};
/* Kill one process specified by procp. Returns the size (in pages) of the process killed */
static int kill_one_process(struct proc* procp, int min_oom_score, enum kill_reasons kill_reason,
const char *kill_desc, union meminfo *mi, struct wakeup_info *wi,
struct timespec *tm) {
static int kill_one_process(struct proc* procp, int min_oom_score, struct kill_info *ki,
union meminfo *mi, struct wakeup_info *wi, struct timespec *tm) {
int pid = procp->pid;
int pidfd = procp->pidfd;
uid_t uid = procp->uid;
@ -2180,19 +2186,25 @@ static int kill_one_process(struct proc* procp, int min_oom_score, enum kill_rea
inc_killcnt(procp->oomadj);
killinfo_log(procp, min_oom_score, rss_kb, swap_kb, kill_reason, mi, wi, tm);
if (kill_desc) {
if (ki) {
kill_st.kill_reason = ki->kill_reason;
kill_st.thrashing = ki->thrashing;
kill_st.max_thrashing = ki->max_thrashing;
killinfo_log(procp, min_oom_score, rss_kb, swap_kb, ki->kill_reason, mi, wi, tm);
ALOGI("Kill '%s' (%d), uid %d, oom_score_adj %d to free %" PRId64 "kB rss, %" PRId64
"kB swap; reason: %s", taskname, pid, uid, procp->oomadj, rss_kb, swap_kb, kill_desc);
"kB swap; reason: %s", taskname, pid, uid, procp->oomadj, rss_kb, swap_kb,
ki->kill_desc);
} else {
kill_st.kill_reason = NONE;
kill_st.thrashing = 0;
kill_st.max_thrashing = 0;
killinfo_log(procp, min_oom_score, rss_kb, swap_kb, NONE, mi, wi, tm);
ALOGI("Kill '%s' (%d), uid %d, oom_score_adj %d to free %" PRId64 "kB rss, %" PRId64
"kb swap", taskname, pid, uid, procp->oomadj, rss_kb, swap_kb);
}
kill_st.uid = static_cast<int32_t>(uid);
kill_st.taskname = taskname;
kill_st.kill_reason = kill_reason;
kill_st.oom_score = procp->oomadj;
kill_st.min_oom_score = min_oom_score;
kill_st.free_mem_kb = mi->field.nr_free_pages * page_k;
@ -2216,8 +2228,7 @@ out:
* Find one process to kill at or above the given oom_score_adj level.
* Returns size of the killed process.
*/
static int find_and_kill_process(int min_score_adj, enum kill_reasons kill_reason,
const char *kill_desc, union meminfo *mi,
static int find_and_kill_process(int min_score_adj, struct kill_info *ki, union meminfo *mi,
struct wakeup_info *wi, struct timespec *tm) {
int i;
int killed_size = 0;
@ -2242,8 +2253,7 @@ static int find_and_kill_process(int min_score_adj, enum kill_reasons kill_reaso
if (!procp)
break;
killed_size = kill_one_process(procp, min_score_adj, kill_reason, kill_desc,
mi, wi, tm);
killed_size = kill_one_process(procp, min_score_adj, ki, mi, wi, tm);
if (killed_size >= 0) {
if (!lmk_state_change_start) {
lmk_state_change_start = true;
@ -2399,6 +2409,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
static struct timespec thrashing_reset_tm;
static int64_t prev_thrash_growth = 0;
static bool check_filecache = false;
static int max_thrashing = 0;
union meminfo mi;
union vmstat vs;
@ -2524,6 +2535,9 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
}
/* Add previous cycle's decayed thrashing amount */
thrashing += prev_thrash_growth;
if (max_thrashing < thrashing) {
max_thrashing = thrashing;
}
/*
* Refresh watermarks once per min in case user updated one of the margins.
@ -2636,10 +2650,16 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
/* Kill a process if necessary */
if (kill_reason != NONE) {
int pages_freed = find_and_kill_process(min_score_adj, kill_reason, kill_desc, &mi,
&wi, &curr_tm);
struct kill_info ki = {
.kill_reason = kill_reason,
.kill_desc = kill_desc,
.thrashing = (int)thrashing,
.max_thrashing = max_thrashing,
};
int pages_freed = find_and_kill_process(min_score_adj, &ki, &mi, &wi, &curr_tm);
if (pages_freed > 0) {
killing = true;
max_thrashing = 0;
if (cut_thrashing_limit) {
/*
* Cut thrasing limit by thrashing_limit_decay_pct percentage of the current
@ -2856,7 +2876,7 @@ static void mp_event_common(int data, uint32_t events, struct polling_params *po
do_kill:
if (low_ram_device) {
/* For Go devices kill only one task */
if (find_and_kill_process(level_oomadj[level], NONE, NULL, &mi, &wi, &curr_tm) == 0) {
if (find_and_kill_process(level_oomadj[level], NULL, &mi, &wi, &curr_tm) == 0) {
if (debug_process_killing) {
ALOGI("Nothing to kill");
}
@ -2879,7 +2899,7 @@ do_kill:
min_score_adj = level_oomadj[level];
}
pages_freed = find_and_kill_process(min_score_adj, NONE, NULL, &mi, &wi, &curr_tm);
pages_freed = find_and_kill_process(min_score_adj, NULL, &mi, &wi, &curr_tm);
if (pages_freed == 0) {
/* Rate limit kill reports when nothing was reclaimed */

View File

@ -323,6 +323,8 @@ size_t lmkd_pack_set_kill_occurred(LMK_KILL_OCCURRED_PACKET packet,
index = pack_int32(packet, index, (int)kill_stat->free_mem_kb);
index = pack_int32(packet, index, (int)kill_stat->free_swap_kb);
index = pack_int32(packet, index, (int)kill_stat->kill_reason);
index = pack_int32(packet, index, kill_stat->thrashing);
index = pack_int32(packet, index, kill_stat->max_thrashing);
index = pack_string(packet, index, kill_stat->taskname);
return index;

View File

@ -35,13 +35,13 @@ __BEGIN_DECLS
* Max LMKD reply packet length in bytes
* Notes about size calculation:
* 4 bytes for packet type
* 80 bytes for the LmkKillOccurred fields: memory_stat + kill_stat
* 88 bytes for the LmkKillOccurred fields: memory_stat + kill_stat
* 2 bytes for process name string size
* MAX_TASKNAME_LEN bytes for the process name string
*
* Must be in sync with LmkdConnection.java
*/
#define LMKD_REPLY_MAX_SIZE 214
#define LMKD_REPLY_MAX_SIZE 222
/* LMK_MEMORY_STATS packet payload */
struct memory_stat {
@ -76,6 +76,8 @@ struct kill_stat {
int32_t min_oom_score;
int64_t free_mem_kb;
int64_t free_swap_kb;
int32_t thrashing;
int32_t max_thrashing;
};
/* LMKD reply packet to hold data for the LmkKillOccurred statsd atom */