lmkd: Add current and max thrashing levels in LMK_MEMORY_STATS reports

Thrashing threshold tuning requires collecting thrashing level data from
the field and correlating these levels with other indications of device
being non-responsive.
Include current and max thrashing levels in the lmkd kill reports. Max
thrashing level captures the highest level seen since the last kill report.

Bug: 194433891
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Change-Id: I8a34dc41e7f03668bfad4ac2cbcb5d2570a10752
Merged-In: I8a34dc41e7f03668bfad4ac2cbcb5d2570a10752
This commit is contained in:
Suren Baghdasaryan 2021-07-22 16:21:21 -07:00
parent 1ef4718aed
commit e16047516d
3 changed files with 42 additions and 18 deletions

View File

@ -2110,10 +2110,16 @@ static void start_wait_for_proc_kill(int pid_or_fd) {
maxevents++; maxevents++;
} }
struct kill_info {
enum kill_reasons kill_reason;
const char *kill_desc;
int thrashing;
int max_thrashing;
};
/* Kill one process specified by procp. Returns the size (in pages) of the process killed */ /* Kill one process specified by procp. Returns the size (in pages) of the process killed */
static int kill_one_process(struct proc* procp, int min_oom_score, enum kill_reasons kill_reason, static int kill_one_process(struct proc* procp, int min_oom_score, struct kill_info *ki,
const char *kill_desc, union meminfo *mi, struct wakeup_info *wi, union meminfo *mi, struct wakeup_info *wi, struct timespec *tm) {
struct timespec *tm) {
int pid = procp->pid; int pid = procp->pid;
int pidfd = procp->pidfd; int pidfd = procp->pidfd;
uid_t uid = procp->uid; uid_t uid = procp->uid;
@ -2180,19 +2186,25 @@ static int kill_one_process(struct proc* procp, int min_oom_score, enum kill_rea
inc_killcnt(procp->oomadj); inc_killcnt(procp->oomadj);
killinfo_log(procp, min_oom_score, rss_kb, swap_kb, kill_reason, mi, wi, tm); if (ki) {
kill_st.kill_reason = ki->kill_reason;
if (kill_desc) { kill_st.thrashing = ki->thrashing;
kill_st.max_thrashing = ki->max_thrashing;
killinfo_log(procp, min_oom_score, rss_kb, swap_kb, ki->kill_reason, mi, wi, tm);
ALOGI("Kill '%s' (%d), uid %d, oom_score_adj %d to free %" PRId64 "kB rss, %" PRId64 ALOGI("Kill '%s' (%d), uid %d, oom_score_adj %d to free %" PRId64 "kB rss, %" PRId64
"kB swap; reason: %s", taskname, pid, uid, procp->oomadj, rss_kb, swap_kb, kill_desc); "kB swap; reason: %s", taskname, pid, uid, procp->oomadj, rss_kb, swap_kb,
ki->kill_desc);
} else { } else {
kill_st.kill_reason = NONE;
kill_st.thrashing = 0;
kill_st.max_thrashing = 0;
killinfo_log(procp, min_oom_score, rss_kb, swap_kb, NONE, mi, wi, tm);
ALOGI("Kill '%s' (%d), uid %d, oom_score_adj %d to free %" PRId64 "kB rss, %" PRId64 ALOGI("Kill '%s' (%d), uid %d, oom_score_adj %d to free %" PRId64 "kB rss, %" PRId64
"kb swap", taskname, pid, uid, procp->oomadj, rss_kb, swap_kb); "kb swap", taskname, pid, uid, procp->oomadj, rss_kb, swap_kb);
} }
kill_st.uid = static_cast<int32_t>(uid); kill_st.uid = static_cast<int32_t>(uid);
kill_st.taskname = taskname; kill_st.taskname = taskname;
kill_st.kill_reason = kill_reason;
kill_st.oom_score = procp->oomadj; kill_st.oom_score = procp->oomadj;
kill_st.min_oom_score = min_oom_score; kill_st.min_oom_score = min_oom_score;
kill_st.free_mem_kb = mi->field.nr_free_pages * page_k; kill_st.free_mem_kb = mi->field.nr_free_pages * page_k;
@ -2216,8 +2228,7 @@ out:
* Find one process to kill at or above the given oom_score_adj level. * Find one process to kill at or above the given oom_score_adj level.
* Returns size of the killed process. * Returns size of the killed process.
*/ */
static int find_and_kill_process(int min_score_adj, enum kill_reasons kill_reason, static int find_and_kill_process(int min_score_adj, struct kill_info *ki, union meminfo *mi,
const char *kill_desc, union meminfo *mi,
struct wakeup_info *wi, struct timespec *tm) { struct wakeup_info *wi, struct timespec *tm) {
int i; int i;
int killed_size = 0; int killed_size = 0;
@ -2242,8 +2253,7 @@ static int find_and_kill_process(int min_score_adj, enum kill_reasons kill_reaso
if (!procp) if (!procp)
break; break;
killed_size = kill_one_process(procp, min_score_adj, kill_reason, kill_desc, killed_size = kill_one_process(procp, min_score_adj, ki, mi, wi, tm);
mi, wi, tm);
if (killed_size >= 0) { if (killed_size >= 0) {
if (!lmk_state_change_start) { if (!lmk_state_change_start) {
lmk_state_change_start = true; lmk_state_change_start = true;
@ -2399,6 +2409,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
static struct timespec thrashing_reset_tm; static struct timespec thrashing_reset_tm;
static int64_t prev_thrash_growth = 0; static int64_t prev_thrash_growth = 0;
static bool check_filecache = false; static bool check_filecache = false;
static int max_thrashing = 0;
union meminfo mi; union meminfo mi;
union vmstat vs; union vmstat vs;
@ -2524,6 +2535,9 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
} }
/* Add previous cycle's decayed thrashing amount */ /* Add previous cycle's decayed thrashing amount */
thrashing += prev_thrash_growth; thrashing += prev_thrash_growth;
if (max_thrashing < thrashing) {
max_thrashing = thrashing;
}
/* /*
* Refresh watermarks once per min in case user updated one of the margins. * Refresh watermarks once per min in case user updated one of the margins.
@ -2636,10 +2650,16 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
/* Kill a process if necessary */ /* Kill a process if necessary */
if (kill_reason != NONE) { if (kill_reason != NONE) {
int pages_freed = find_and_kill_process(min_score_adj, kill_reason, kill_desc, &mi, struct kill_info ki = {
&wi, &curr_tm); .kill_reason = kill_reason,
.kill_desc = kill_desc,
.thrashing = (int)thrashing,
.max_thrashing = max_thrashing,
};
int pages_freed = find_and_kill_process(min_score_adj, &ki, &mi, &wi, &curr_tm);
if (pages_freed > 0) { if (pages_freed > 0) {
killing = true; killing = true;
max_thrashing = 0;
if (cut_thrashing_limit) { if (cut_thrashing_limit) {
/* /*
* Cut thrasing limit by thrashing_limit_decay_pct percentage of the current * Cut thrasing limit by thrashing_limit_decay_pct percentage of the current
@ -2856,7 +2876,7 @@ static void mp_event_common(int data, uint32_t events, struct polling_params *po
do_kill: do_kill:
if (low_ram_device) { if (low_ram_device) {
/* For Go devices kill only one task */ /* For Go devices kill only one task */
if (find_and_kill_process(level_oomadj[level], NONE, NULL, &mi, &wi, &curr_tm) == 0) { if (find_and_kill_process(level_oomadj[level], NULL, &mi, &wi, &curr_tm) == 0) {
if (debug_process_killing) { if (debug_process_killing) {
ALOGI("Nothing to kill"); ALOGI("Nothing to kill");
} }
@ -2879,7 +2899,7 @@ do_kill:
min_score_adj = level_oomadj[level]; min_score_adj = level_oomadj[level];
} }
pages_freed = find_and_kill_process(min_score_adj, NONE, NULL, &mi, &wi, &curr_tm); pages_freed = find_and_kill_process(min_score_adj, NULL, &mi, &wi, &curr_tm);
if (pages_freed == 0) { if (pages_freed == 0) {
/* Rate limit kill reports when nothing was reclaimed */ /* Rate limit kill reports when nothing was reclaimed */

View File

@ -323,6 +323,8 @@ size_t lmkd_pack_set_kill_occurred(LMK_KILL_OCCURRED_PACKET packet,
index = pack_int32(packet, index, (int)kill_stat->free_mem_kb); index = pack_int32(packet, index, (int)kill_stat->free_mem_kb);
index = pack_int32(packet, index, (int)kill_stat->free_swap_kb); index = pack_int32(packet, index, (int)kill_stat->free_swap_kb);
index = pack_int32(packet, index, (int)kill_stat->kill_reason); index = pack_int32(packet, index, (int)kill_stat->kill_reason);
index = pack_int32(packet, index, kill_stat->thrashing);
index = pack_int32(packet, index, kill_stat->max_thrashing);
index = pack_string(packet, index, kill_stat->taskname); index = pack_string(packet, index, kill_stat->taskname);
return index; return index;

View File

@ -35,13 +35,13 @@ __BEGIN_DECLS
* Max LMKD reply packet length in bytes * Max LMKD reply packet length in bytes
* Notes about size calculation: * Notes about size calculation:
* 4 bytes for packet type * 4 bytes for packet type
* 80 bytes for the LmkKillOccurred fields: memory_stat + kill_stat * 88 bytes for the LmkKillOccurred fields: memory_stat + kill_stat
* 2 bytes for process name string size * 2 bytes for process name string size
* MAX_TASKNAME_LEN bytes for the process name string * MAX_TASKNAME_LEN bytes for the process name string
* *
* Must be in sync with LmkdConnection.java * Must be in sync with LmkdConnection.java
*/ */
#define LMKD_REPLY_MAX_SIZE 214 #define LMKD_REPLY_MAX_SIZE 222
/* LMK_MEMORY_STATS packet payload */ /* LMK_MEMORY_STATS packet payload */
struct memory_stat { struct memory_stat {
@ -76,6 +76,8 @@ struct kill_stat {
int32_t min_oom_score; int32_t min_oom_score;
int64_t free_mem_kb; int64_t free_mem_kb;
int64_t free_swap_kb; int64_t free_swap_kb;
int32_t thrashing;
int32_t max_thrashing;
}; };
/* LMKD reply packet to hold data for the LmkKillOccurred statsd atom */ /* LMKD reply packet to hold data for the LmkKillOccurred statsd atom */