lmkd: Add lmkd wakeup information into killinfo logs

Oftentimes while investigating bugreports it's unclear whether lmkd
was active between kills. To provide visibility into lmkd activity
adding the following fields into killinfo reports:
MsSinceEvent - number of msecs since the last PSI/vmpressure event
MsSincePrevWakeup - number of msecs since the previous wakeup
WakeupsSinceEvent - number of wakeups since the last PSI/vmpressure
event
SkippedWakeups - number of wakeups that were skipped due to an
incomplete kill

Bug: 162034541
Bug: 161955028
Bug: 162297751
Test: lmkd_unit_test
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Change-Id: I0356c27515132ff0dd309b59a8bf907acbd67cd8
(cherry picked from commit d7b4fcb8a5)
Signed-off-by: Martin Liu <liumartin@google.com>
Merged-In: I0356c27515132ff0dd309b59a8bf907acbd67cd8
This commit is contained in:
Suren Baghdasaryan 2020-07-23 18:00:43 -07:00 committed by Martin Liu
parent dd8c2c181b
commit fa667edb56
2 changed files with 61 additions and 9 deletions

View File

@ -35,4 +35,4 @@
# TODO: generate ".java" and ".h" files with integer constants from this file. # TODO: generate ".java" and ".h" files with integer constants from this file.
# for killinfo logs # for killinfo logs
10195355 killinfo (Pid|1|5),(Uid|1|5),(OomAdj|1),(MinOomAdj|1),(TaskSize|1),(enum kill_reasons|1|5),(MemFree|1),(Cached|1),(SwapCached|1),(Buffers|1),(Shmem|1),(Unevictable|1),(SwapTotal|1),(SwapFree|1),(ActiveAnon|1),(InactiveAnon|1),(ActiveFile|1),(InactiveFile|1),(SReclaimable|1),(SUnreclaim|1),(KernelStack|1),(PageTables|1),(IonHeap|1),(IonHeapPool|1),(CmaFree|1) 10195355 killinfo (Pid|1|5),(Uid|1|5),(OomAdj|1),(MinOomAdj|1),(TaskSize|1),(enum kill_reasons|1|5),(MemFree|1),(Cached|1),(SwapCached|1),(Buffers|1),(Shmem|1),(Unevictable|1),(SwapTotal|1),(SwapFree|1),(ActiveAnon|1),(InactiveAnon|1),(ActiveFile|1),(InactiveFile|1),(SReclaimable|1),(SUnreclaim|1),(KernelStack|1),(PageTables|1),(IonHeap|1),(IonHeapPool|1),(CmaFree|1),(MsSinceEvent|1),(MsSincePrevWakeup|1),(WakeupsSinceEvent|1),(SkippedWakeups|1)

View File

@ -1818,8 +1818,44 @@ static int vmstat_parse(union vmstat *vs) {
return 0; return 0;
} }
enum wakeup_reason {
Event,
Polling
};
struct wakeup_info {
struct timespec wakeup_tm;
struct timespec prev_wakeup_tm;
struct timespec last_event_tm;
int wakeups_since_event;
int skipped_wakeups;
};
/*
* After the initial memory pressure event is received lmkd schedules periodic wakeups to check
* the memory conditions and kill if needed (polling). This is done because pressure events are
* rate-limited and memory conditions can change in between events. Therefore after the initial
* event there might be multiple wakeups. This function records the wakeup information such as the
* timestamps of the last event and the last wakeup, the number of wakeups since the last event
* and how many of those wakeups were skipped (some wakeups are skipped if previously killed
* process is still freeing its memory).
*/
static void record_wakeup_time(struct timespec *tm, enum wakeup_reason reason,
struct wakeup_info *wi) {
wi->prev_wakeup_tm = wi->wakeup_tm;
wi->wakeup_tm = *tm;
if (reason == Event) {
wi->last_event_tm = *tm;
wi->wakeups_since_event = 0;
wi->skipped_wakeups = 0;
} else {
wi->wakeups_since_event++;
}
}
static void killinfo_log(struct proc* procp, int min_oom_score, int tasksize, static void killinfo_log(struct proc* procp, int min_oom_score, int tasksize,
int kill_reason, union meminfo *mi) { int kill_reason, union meminfo *mi,
struct wakeup_info *wi, struct timespec *tm) {
/* log process information */ /* log process information */
android_log_write_int32(ctx, procp->pid); android_log_write_int32(ctx, procp->pid);
android_log_write_int32(ctx, procp->uid); android_log_write_int32(ctx, procp->uid);
@ -1833,6 +1869,12 @@ static void killinfo_log(struct proc* procp, int min_oom_score, int tasksize,
android_log_write_int32(ctx, (int32_t)min(mi->arr[field_idx] * page_k, INT32_MAX)); android_log_write_int32(ctx, (int32_t)min(mi->arr[field_idx] * page_k, INT32_MAX));
} }
/* log lmkd wakeup information */
android_log_write_int32(ctx, (int32_t)get_time_diff_ms(&wi->last_event_tm, tm));
android_log_write_int32(ctx, (int32_t)get_time_diff_ms(&wi->prev_wakeup_tm, tm));
android_log_write_int32(ctx, wi->wakeups_since_event);
android_log_write_int32(ctx, wi->skipped_wakeups);
android_log_write_list(ctx, LOG_ID_EVENTS); android_log_write_list(ctx, LOG_ID_EVENTS);
android_log_reset(ctx); android_log_reset(ctx);
} }
@ -1999,7 +2041,8 @@ static void start_wait_for_proc_kill(int pid_or_fd) {
/* Kill one process specified by procp. Returns the size of the process killed */ /* Kill one process specified by procp. Returns the size of the process killed */
static int kill_one_process(struct proc* procp, int min_oom_score, int kill_reason, static int kill_one_process(struct proc* procp, int min_oom_score, int kill_reason,
const char *kill_desc, union meminfo *mi, struct timespec *tm) { const char *kill_desc, union meminfo *mi, struct wakeup_info *wi,
struct timespec *tm) {
int pid = procp->pid; int pid = procp->pid;
int pidfd = procp->pidfd; int pidfd = procp->pidfd;
uid_t uid = procp->uid; uid_t uid = procp->uid;
@ -2055,7 +2098,7 @@ static int kill_one_process(struct proc* procp, int min_oom_score, int kill_reas
inc_killcnt(procp->oomadj); inc_killcnt(procp->oomadj);
killinfo_log(procp, min_oom_score, tasksize, kill_reason, mi); killinfo_log(procp, min_oom_score, tasksize, kill_reason, mi, wi, tm);
if (kill_desc) { if (kill_desc) {
ALOGI("Kill '%s' (%d), uid %d, oom_adj %d to free %ldkB; reason: %s", taskname, pid, ALOGI("Kill '%s' (%d), uid %d, oom_adj %d to free %ldkB; reason: %s", taskname, pid,
@ -2085,7 +2128,7 @@ out:
* Returns size of the killed process. * Returns size of the killed process.
*/ */
static int find_and_kill_process(int min_score_adj, int kill_reason, const char *kill_desc, static int find_and_kill_process(int min_score_adj, int kill_reason, const char *kill_desc,
union meminfo *mi, struct timespec *tm) { union meminfo *mi, struct wakeup_info *wi, struct timespec *tm) {
int i; int i;
int killed_size = 0; int killed_size = 0;
bool lmk_state_change_start = false; bool lmk_state_change_start = false;
@ -2100,7 +2143,8 @@ static int find_and_kill_process(int min_score_adj, int kill_reason, const char
if (!procp) if (!procp)
break; break;
killed_size = kill_one_process(procp, min_score_adj, kill_reason, kill_desc, mi, tm); killed_size = kill_one_process(procp, min_score_adj, kill_reason, kill_desc,
mi, wi, tm);
if (killed_size >= 0) { if (killed_size >= 0) {
if (!lmk_state_change_start) { if (!lmk_state_change_start) {
lmk_state_change_start = true; lmk_state_change_start = true;
@ -2256,6 +2300,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
static bool in_reclaim; static bool in_reclaim;
static struct zone_watermarks watermarks; static struct zone_watermarks watermarks;
static struct timespec wmark_update_tm; static struct timespec wmark_update_tm;
static struct wakeup_info wi;
union meminfo mi; union meminfo mi;
union vmstat vs; union vmstat vs;
@ -2276,10 +2321,13 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
return; return;
} }
record_wakeup_time(&curr_tm, events ? Event : Polling, &wi);
bool kill_pending = is_kill_pending(); bool kill_pending = is_kill_pending();
if (kill_pending && (kill_timeout_ms == 0 || if (kill_pending && (kill_timeout_ms == 0 ||
get_time_diff_ms(&last_kill_tm, &curr_tm) < static_cast<long>(kill_timeout_ms))) { get_time_diff_ms(&last_kill_tm, &curr_tm) < static_cast<long>(kill_timeout_ms))) {
/* Skip while still killing a process */ /* Skip while still killing a process */
wi.skipped_wakeups++;
goto no_kill; goto no_kill;
} }
/* /*
@ -2421,7 +2469,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
/* Kill a process if necessary */ /* Kill a process if necessary */
if (kill_reason != NONE) { if (kill_reason != NONE) {
int pages_freed = find_and_kill_process(min_score_adj, kill_reason, kill_desc, &mi, int pages_freed = find_and_kill_process(min_score_adj, kill_reason, kill_desc, &mi,
&curr_tm); &wi, &curr_tm);
if (pages_freed > 0) { if (pages_freed > 0) {
killing = true; killing = true;
if (cut_thrashing_limit) { if (cut_thrashing_limit) {
@ -2482,6 +2530,7 @@ static void mp_event_common(int data, uint32_t events, struct polling_params *po
.filename = MEMCG_MEMORYSW_USAGE, .filename = MEMCG_MEMORYSW_USAGE,
.fd = -1, .fd = -1,
}; };
static struct wakeup_info wi;
if (debug_process_killing) { if (debug_process_killing) {
ALOGI("%s memory pressure event is triggered", level_name[level]); ALOGI("%s memory pressure event is triggered", level_name[level]);
@ -2517,6 +2566,8 @@ static void mp_event_common(int data, uint32_t events, struct polling_params *po
return; return;
} }
record_wakeup_time(&curr_tm, events ? Event : Polling, &wi);
if (kill_timeout_ms && if (kill_timeout_ms &&
get_time_diff_ms(&last_kill_tm, &curr_tm) < static_cast<long>(kill_timeout_ms)) { get_time_diff_ms(&last_kill_tm, &curr_tm) < static_cast<long>(kill_timeout_ms)) {
/* /*
@ -2525,6 +2576,7 @@ static void mp_event_common(int data, uint32_t events, struct polling_params *po
*/ */
if (is_kill_pending()) { if (is_kill_pending()) {
kill_skip_count++; kill_skip_count++;
wi.skipped_wakeups++;
return; return;
} }
/* /*
@ -2636,7 +2688,7 @@ static void mp_event_common(int data, uint32_t events, struct polling_params *po
do_kill: do_kill:
if (low_ram_device) { if (low_ram_device) {
/* For Go devices kill only one task */ /* For Go devices kill only one task */
if (find_and_kill_process(level_oomadj[level], -1, NULL, &mi, &curr_tm) == 0) { if (find_and_kill_process(level_oomadj[level], -1, NULL, &mi, &wi, &curr_tm) == 0) {
if (debug_process_killing) { if (debug_process_killing) {
ALOGI("Nothing to kill"); ALOGI("Nothing to kill");
} }
@ -2659,7 +2711,7 @@ do_kill:
min_score_adj = level_oomadj[level]; min_score_adj = level_oomadj[level];
} }
pages_freed = find_and_kill_process(min_score_adj, -1, NULL, &mi, &curr_tm); pages_freed = find_and_kill_process(min_score_adj, -1, NULL, &mi, &wi, &curr_tm);
if (pages_freed == 0) { if (pages_freed == 0) {
/* Rate limit kill reports when nothing was reclaimed */ /* Rate limit kill reports when nothing was reclaimed */