From 95551f816ae87fac700f97c7e6d1bf3302f2b85b Mon Sep 17 00:00:00 2001 From: Martin Liu Date: Fri, 14 Aug 2020 03:11:22 +0800 Subject: [PATCH 1/6] lmkd: Add a margin for watermark when swap free is low When swap is depleted file cache thrashing might result in allocations quickly pushing memory below low watermark and kswapd quickly pushing it back above high watermark. In this situation free memory stays above high watermark most of the time and lmkd during its periodic wake-ups has low chance of detecting low memory conditions. Add a 15% margin for high watermark which would allow lmkd to kill if swap is low,some memory was reclaimed since the last wakeup and free memory is just above the high watermark limit. Bug: 163134367 Test: heavy loading launch Signed-off-by: Martin Liu Change-Id: I5694736b04bafcd13c01f4b51e242e2ac4ff55a8 --- lmkd.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lmkd.cpp b/lmkd.cpp index a2820b8..b7eb18f 100644 --- a/lmkd.cpp +++ b/lmkd.cpp @@ -2239,9 +2239,11 @@ struct zone_watermarks { * Returns lowest breached watermark or WMARK_NONE. */ static enum zone_watermark get_lowest_watermark(union meminfo *mi, - struct zone_watermarks *watermarks) + struct zone_watermarks *watermarks, + long margin) { int64_t nr_free_pages = mi->field.nr_free_pages - mi->field.cma_free; + int64_t high_wmark = (watermarks->high_wmark * margin) / 100; if (nr_free_pages < watermarks->min_wmark) { return WMARK_MIN; @@ -2249,7 +2251,7 @@ static enum zone_watermark get_lowest_watermark(union meminfo *mi, if (nr_free_pages < watermarks->low_wmark) { return WMARK_LOW; } - if (nr_free_pages < watermarks->high_wmark) { + if (nr_free_pages < high_wmark) { return WMARK_HIGH; } return WMARK_NONE; @@ -2406,7 +2408,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ } /* Find out which watermark is breached if any */ - wmark = get_lowest_watermark(&mi, &watermarks); + wmark = get_lowest_watermark(&mi, &watermarks, swap_is_low ? 115 : 100); /* * TODO: move this logic into a separate function From cd5f08d8ee58834da49a5e7db25cf2fa6e604832 Mon Sep 17 00:00:00 2001 From: Martin Liu Date: Fri, 21 Aug 2020 12:38:58 +0800 Subject: [PATCH 2/6] Revert "lmkd: Add a margin for watermark when swap free is low" This reverts commit 95551f816ae87fac700f97c7e6d1bf3302f2b85b. Reason to revert: don't need this change. Bug: 163134367 Signed-off-by: Martin Liu Change-Id: I8b209b054b6caec553bce13cd51f931401c1e42a --- lmkd.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/lmkd.cpp b/lmkd.cpp index b7eb18f..a2820b8 100644 --- a/lmkd.cpp +++ b/lmkd.cpp @@ -2239,11 +2239,9 @@ struct zone_watermarks { * Returns lowest breached watermark or WMARK_NONE. */ static enum zone_watermark get_lowest_watermark(union meminfo *mi, - struct zone_watermarks *watermarks, - long margin) + struct zone_watermarks *watermarks) { int64_t nr_free_pages = mi->field.nr_free_pages - mi->field.cma_free; - int64_t high_wmark = (watermarks->high_wmark * margin) / 100; if (nr_free_pages < watermarks->min_wmark) { return WMARK_MIN; @@ -2251,7 +2249,7 @@ static enum zone_watermark get_lowest_watermark(union meminfo *mi, if (nr_free_pages < watermarks->low_wmark) { return WMARK_LOW; } - if (nr_free_pages < high_wmark) { + if (nr_free_pages < watermarks->high_wmark) { return WMARK_HIGH; } return WMARK_NONE; @@ -2408,7 +2406,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ } /* Find out which watermark is breached if any */ - wmark = get_lowest_watermark(&mi, &watermarks, swap_is_low ? 115 : 100); + wmark = get_lowest_watermark(&mi, &watermarks); /* * TODO: move this logic into a separate function From 94d99a7bc82700eb91ebd10f44ad99495bfa04b4 Mon Sep 17 00:00:00 2001 From: Martin Liu Date: Fri, 21 Aug 2020 13:18:50 +0800 Subject: [PATCH 3/6] lmkd: adjust thrashing dection strategy When a device is thrashing the file cache, workingset refaults can grow slowly because of variant reasons. Current thrashing detection mechanism could reset the thrashing counter frequently as it relies on presence of reclaim activity, however refaults can keep increasing even when the device is not actively reclaiming. In addition, the thrashing counter gets reset when conditions require a kill but lmkd could not find an eligible process to be killed. This is problematic because when this happens thrashing is being ignored. Use a fixed 1 sec periods to aggregate the thrashing counter. Also we need to keep monitoring thrashing counter while retrying as someone could release the memory to mitigate the thrashing. If thrashing counter is greater than the limit at the end of the 1 sec period this means lmkd failed to find an eligible process to kill. In this case we store accumulated thrashing in case a new eligible process appears until accumulated thrashing is less that the limit or we miss an entire 1 sec window. Bug: 163134367 Test: heavy loading launch Signed-off-by: Martin Liu Merged-In: Ie9f4121ea604179c0ad510cc8430e7a6aec6e6b2 Change-Id: Ie9f4121ea604179c0ad510cc8430e7a6aec6e6b2 --- lmkd.cpp | 52 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/lmkd.cpp b/lmkd.cpp index a2820b8..fc9f610 100644 --- a/lmkd.cpp +++ b/lmkd.cpp @@ -98,6 +98,7 @@ #define EIGHT_MEGA (1 << 23) #define TARGET_UPDATE_MIN_INTERVAL_MS 1000 +#define THRASHING_RESET_INTERVAL_MS 1000 #define NS_PER_MS (NS_PER_SEC / MS_PER_SEC) #define US_PER_MS (US_PER_SEC / MS_PER_SEC) @@ -2291,16 +2292,18 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ DIRECT_RECLAIM, }; static int64_t init_ws_refault; + static int64_t prev_workingset_refault; static int64_t base_file_lru; static int64_t init_pgscan_kswapd; static int64_t init_pgscan_direct; static int64_t swap_low_threshold; static bool killing; - static int thrashing_limit; - static bool in_reclaim; + static int thrashing_limit = thrashing_limit_pct; static struct zone_watermarks watermarks; static struct timespec wmark_update_tm; static struct wakeup_info wi; + static struct timespec thrashing_reset_tm; + static int64_t prev_thrash_growth = 0; union meminfo mi; union vmstat vs; @@ -2315,6 +2318,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ char kill_desc[LINE_MAX]; bool cut_thrashing_limit = false; int min_score_adj = 0; + long since_thrashing_reset_ms; if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) { ALOGE("Failed to get current time"); @@ -2353,6 +2357,8 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ /* Reset file-backed pagecache size and refault amounts after a kill */ base_file_lru = vs.field.nr_inactive_file + vs.field.nr_active_file; init_ws_refault = vs.field.workingset_refault; + thrashing_reset_tm = curr_tm; + prev_thrash_growth = 0; } /* Check free swap levels */ @@ -2371,22 +2377,50 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ } else if (vs.field.pgscan_kswapd > init_pgscan_kswapd) { init_pgscan_kswapd = vs.field.pgscan_kswapd; reclaim = KSWAPD_RECLAIM; - } else { - in_reclaim = false; - /* Skip if system is not reclaiming */ + } else if (vs.field.workingset_refault == prev_workingset_refault) { + /* Device is not thrashing and not reclaiming, bail out early until we see these stats changing*/ goto no_kill; } - if (!in_reclaim) { - /* Record file-backed pagecache size when entering reclaim cycle */ + prev_workingset_refault = vs.field.workingset_refault; + + /* + * It's possible we fail to find an eligible process to kill (ex. no process is + * above oom_adj_min). When this happens, we should retry to find a new process + * for a kill whenever a new eligible process is available. This is especially + * important for a slow growing refault case. While retrying, we should keep + * monitoring new thrashing counter as someone could release the memory to mitigate + * the thrashing. Thus, when thrashing reset window comes, we decay the prev thrashing + * counter by window counts. if the counter is still greater than thrashing limit, + * we preserve the current prev_thrash counter so we will retry kill again. Otherwise, + * we reset the prev_thrash counter so we will stop retrying. + */ + since_thrashing_reset_ms = get_time_diff_ms(&thrashing_reset_tm, &curr_tm); + if (since_thrashing_reset_ms > THRASHING_RESET_INTERVAL_MS) { + long windows_passed; + /* Calculate prev_thrash_growth if we crossed THRASHING_RESET_INTERVAL_MS */ + prev_thrash_growth = (vs.field.workingset_refault - init_ws_refault) * 100 / base_file_lru; + windows_passed = (since_thrashing_reset_ms / THRASHING_RESET_INTERVAL_MS); + /* + * Decay prev_thrashing unless over-the-limit thrashing was registered in the window we + * just crossed, which means there were no eligible processes to kill. We preserve the + * counter in that case to ensure a kill if a new eligible process appears. + */ + if (windows_passed > 1 || prev_thrash_growth < thrashing_limit) { + prev_thrash_growth >>= windows_passed; + } + + /* Record file-backed pagecache size when crossing THRASHING_RESET_INTERVAL_MS */ base_file_lru = vs.field.nr_inactive_file + vs.field.nr_active_file; init_ws_refault = vs.field.workingset_refault; + thrashing_reset_tm = curr_tm; thrashing_limit = thrashing_limit_pct; } else { /* Calculate what % of the file-backed pagecache refaulted so far */ thrashing = (vs.field.workingset_refault - init_ws_refault) * 100 / base_file_lru; } - in_reclaim = true; + /* Add previous cycle's decayed thrashing amount */ + thrashing += prev_thrash_growth; /* * Refresh watermarks once per min in case user updated one of the margins. @@ -2403,7 +2437,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ calc_zone_watermarks(&zi, &watermarks); wmark_update_tm = curr_tm; - } + } /* Find out which watermark is breached if any */ wmark = get_lowest_watermark(&mi, &watermarks); From 91bf5982820aa8b14e3a5f21c0ad5697fcd6decd Mon Sep 17 00:00:00 2001 From: Martin Liu Date: Thu, 3 Sep 2020 22:12:14 +0800 Subject: [PATCH 4/6] lmkd: avoid division by zero because of file_base_lru It seems we have chance that file_base_lru is zero. Avoid it by adding 1. Bug: 167660459 Bug: 163134367 Test: boot Signed-off-by: Martin Liu Merged-In: If19dbbaafe6cd28a9d5b7f8a002f3cd33daab5e7 Change-Id: If19dbbaafe6cd28a9d5b7f8a002f3cd33daab5e7 --- lmkd.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lmkd.cpp b/lmkd.cpp index fc9f610..0f46f0a 100644 --- a/lmkd.cpp +++ b/lmkd.cpp @@ -2399,7 +2399,8 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ if (since_thrashing_reset_ms > THRASHING_RESET_INTERVAL_MS) { long windows_passed; /* Calculate prev_thrash_growth if we crossed THRASHING_RESET_INTERVAL_MS */ - prev_thrash_growth = (vs.field.workingset_refault - init_ws_refault) * 100 / base_file_lru; + prev_thrash_growth = (vs.field.workingset_refault - init_ws_refault) * 100 + / (base_file_lru + 1); windows_passed = (since_thrashing_reset_ms / THRASHING_RESET_INTERVAL_MS); /* * Decay prev_thrashing unless over-the-limit thrashing was registered in the window we @@ -2417,7 +2418,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ thrashing_limit = thrashing_limit_pct; } else { /* Calculate what % of the file-backed pagecache refaulted so far */ - thrashing = (vs.field.workingset_refault - init_ws_refault) * 100 / base_file_lru; + thrashing = (vs.field.workingset_refault - init_ws_refault) * 100 / (base_file_lru + 1); } /* Add previous cycle's decayed thrashing amount */ thrashing += prev_thrash_growth; From d816ab7c549438f23be5b17e888e182b2f78f03b Mon Sep 17 00:00:00 2001 From: Martin Liu Date: Wed, 2 Sep 2020 23:15:18 +0800 Subject: [PATCH 5/6] lmkd: fix possible long stall state If the first PSI event triggers a kill, lmkd won't resume polling immediately after the process has died. Instead, it will wait until the next PSI event to resume the polling which is too late when the device is under memory pressure. This happens if data communication with AMS happens after previous polling window expired, in which case paused handler gets reset and polling does not resume after the kill. Fix this by changing pause handler reset logic. Bug: 167562248 Test: memory pressure test Signed-off-by: Martin Liu Signed-off-by: Suren Baghdasaryan Merged-In: I10c65c85b718a656e3d8991bf09948b96da895cb Change-Id: I10c65c85b718a656e3d8991bf09948b96da895cb --- lmkd.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/lmkd.cpp b/lmkd.cpp index 0f46f0a..68e884e 100644 --- a/lmkd.cpp +++ b/lmkd.cpp @@ -3087,6 +3087,8 @@ static bool polling_paused(struct polling_params *poll_params) { static void resume_polling(struct polling_params *poll_params, struct timespec curr_tm) { poll_params->poll_start_tm = curr_tm; poll_params->poll_handler = poll_params->paused_handler; + poll_params->polling_interval_ms = PSI_POLL_PERIOD_SHORT_MS; + poll_params->paused_handler = NULL; } static void call_handler(struct event_handler_info* handler_info, @@ -3121,7 +3123,6 @@ static void call_handler(struct event_handler_info* handler_info, if (get_time_diff_ms(&poll_params->poll_start_tm, &curr_tm) > PSI_WINDOW_SIZE_MS) { /* Polled for the duration of PSI window, time to stop */ poll_params->poll_handler = NULL; - poll_params->paused_handler = NULL; } break; } @@ -3146,12 +3147,8 @@ static void mainloop(void) { bool poll_now; clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm); - if (poll_params.poll_handler == poll_params.paused_handler) { - /* - * Just transitioned into POLLING_RESUME. Reset paused_handler - * and poll immediately - */ - poll_params.paused_handler = NULL; + if (poll_params.update == POLLING_RESUME) { + /* Just transitioned into POLLING_RESUME, poll immediately. */ poll_now = true; nevents = 0; } else { @@ -3182,6 +3179,7 @@ static void mainloop(void) { stop_wait_for_proc_kill(false); if (polling_paused(&poll_params)) { clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm); + poll_params.update = POLLING_RESUME; resume_polling(&poll_params, curr_tm); } } From 140385d3a0263d145ef46edd78dbd860c2eba912 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 9 Sep 2020 20:19:02 -0700 Subject: [PATCH 6/6] lmkd: report kill reason, and meminfo details to statsd for each kill Information like free memory and swap as well as kill reason would be useful for understanding regressions in the number of lmk kills in the field. Bug: 168117803 Test: statsd_testdrive 51, load with lmk_unit_test Merged-In: Ic46aed3c85b880b32ac5ad61b55f90e0d33517c7 Change-Id: Ic46aed3c85b880b32ac5ad61b55f90e0d33517c7 --- lmkd.cpp | 46 ++++++++++++++++++++++++++++------------------ statslog.cpp | 46 +++++++++++++++++++++++++++++++++------------- statslog.h | 47 ++++++++++++++++++++++++++++++++--------------- 3 files changed, 93 insertions(+), 46 deletions(-) diff --git a/lmkd.cpp b/lmkd.cpp index 68e884e..b065bbf 100644 --- a/lmkd.cpp +++ b/lmkd.cpp @@ -797,8 +797,18 @@ static void poll_kernel(int poll_fd) { ctrl_data_write_lmk_kill_occurred((pid_t)pid, (uid_t)uid); mem_st.process_start_time_ns = starttime * (NS_PER_SEC / sysconf(_SC_CLK_TCK)); mem_st.rss_in_bytes = rss_in_pages * PAGE_SIZE; - stats_write_lmk_kill_occurred_pid(uid, pid, oom_score_adj, - min_score_adj, 0, &mem_st); + + struct kill_stat kill_st = { + .uid = static_cast(uid), + .kill_reason = NONE, + .oom_score = oom_score_adj, + .min_oom_score = min_score_adj, + .free_mem_kb = 0, + .free_swap_kb = 0, + .tasksize = 0, + + }; + stats_write_lmk_kill_occurred_pid(pid, &kill_st, &mem_st); } free(taskname); @@ -2041,7 +2051,7 @@ static void start_wait_for_proc_kill(int pid_or_fd) { } /* Kill one process specified by procp. Returns the size of the process killed */ -static int kill_one_process(struct proc* procp, int min_oom_score, int kill_reason, +static int kill_one_process(struct proc* procp, int min_oom_score, enum kill_reasons kill_reason, const char *kill_desc, union meminfo *mi, struct wakeup_info *wi, struct timespec *tm) { int pid = procp->pid; @@ -2054,6 +2064,7 @@ static int kill_one_process(struct proc* procp, int min_oom_score, int kill_reas int result = -1; struct memory_stat *mem_st; char buf[LINE_MAX]; + struct kill_stat kill_st; tgid = proc_get_tgid(pid); if (tgid >= 0 && tgid != pid) { @@ -2109,7 +2120,15 @@ static int kill_one_process(struct proc* procp, int min_oom_score, int kill_reas uid, procp->oomadj, tasksize * page_k); } - stats_write_lmk_kill_occurred(uid, taskname, procp->oomadj, min_oom_score, tasksize, mem_st); + kill_st.uid = static_cast(uid); + kill_st.taskname = taskname; + kill_st.kill_reason = kill_reason; + kill_st.oom_score = procp->oomadj; + kill_st.min_oom_score = min_oom_score; + kill_st.free_mem_kb = mi->field.nr_free_pages * page_k; + kill_st.free_swap_kb = mi->field.free_swap * page_k; + kill_st.tasksize = tasksize; + stats_write_lmk_kill_occurred(&kill_st, mem_st); ctrl_data_write_lmk_kill_occurred((pid_t)pid, uid); @@ -2128,8 +2147,9 @@ out: * Find one process to kill at or above the given oom_adj level. * Returns size of the killed process. */ -static int find_and_kill_process(int min_score_adj, int kill_reason, const char *kill_desc, - union meminfo *mi, struct wakeup_info *wi, struct timespec *tm) { +static int find_and_kill_process(int min_score_adj, enum kill_reasons kill_reason, + const char *kill_desc, union meminfo *mi, + struct wakeup_info *wi, struct timespec *tm) { int i; int killed_size = 0; bool lmk_state_change_start = false; @@ -2276,16 +2296,6 @@ void calc_zone_watermarks(struct zoneinfo *zi, struct zone_watermarks *watermark } static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_params) { - enum kill_reasons { - NONE = -1, /* To denote no kill condition */ - PRESSURE_AFTER_KILL = 0, - NOT_RESPONDING, - LOW_SWAP_AND_THRASHING, - LOW_MEM_AND_SWAP, - LOW_MEM_AND_THRASHING, - DIRECT_RECL_AND_THRASHING, - KILL_REASON_COUNT - }; enum reclaim_state { NO_RECLAIM = 0, KSWAPD_RECLAIM, @@ -2723,7 +2733,7 @@ static void mp_event_common(int data, uint32_t events, struct polling_params *po do_kill: if (low_ram_device) { /* For Go devices kill only one task */ - if (find_and_kill_process(level_oomadj[level], -1, NULL, &mi, &wi, &curr_tm) == 0) { + if (find_and_kill_process(level_oomadj[level], NONE, NULL, &mi, &wi, &curr_tm) == 0) { if (debug_process_killing) { ALOGI("Nothing to kill"); } @@ -2746,7 +2756,7 @@ do_kill: min_score_adj = level_oomadj[level]; } - pages_freed = find_and_kill_process(min_score_adj, -1, NULL, &mi, &wi, &curr_tm); + pages_freed = find_and_kill_process(min_score_adj, NONE, NULL, &mi, &wi, &curr_tm); if (pages_freed == 0) { /* Rate limit kill reports when nothing was reclaimed */ diff --git a/statslog.cpp b/statslog.cpp index 8fb441c..a9606f9 100644 --- a/statslog.cpp +++ b/statslog.cpp @@ -69,41 +69,61 @@ static struct proc* pid_lookup(int pid) { return procp; } +inline int32_t map_kill_reason(enum kill_reasons reason) { + switch (reason) { + case PRESSURE_AFTER_KILL: + return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__PRESSURE_AFTER_KILL; + case NOT_RESPONDING: + return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__NOT_RESPONDING; + case LOW_SWAP_AND_THRASHING: + return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__LOW_SWAP_AND_THRASHING; + case LOW_MEM_AND_SWAP: + return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__LOW_MEM_AND_SWAP; + case LOW_MEM_AND_THRASHING: + return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__LOW_MEM_AND_THRASHING; + case DIRECT_RECL_AND_THRASHING: + return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__DIRECT_RECL_AND_THRASHING; + case LOW_MEM_AND_SWAP_UTIL: + return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__LOW_MEM_AND_SWAP_UTIL; + default: + return android::lmkd::stats::LMK_KILL_OCCURRED__REASON__UNKNOWN; + } +} + /** * Logs the event when LMKD kills a process to reduce memory pressure. * Code: LMK_KILL_OCCURRED = 51 */ -int -stats_write_lmk_kill_occurred(int32_t uid, char const* process_name, - int32_t oom_score, int32_t min_oom_score, int tasksize, - struct memory_stat *mem_st) { +int stats_write_lmk_kill_occurred(struct kill_stat *kill_st, struct memory_stat *mem_st) { if (enable_stats_log) { return android::lmkd::stats::stats_write( android::lmkd::stats::LMK_KILL_OCCURRED, - uid, - process_name, - oom_score, + kill_st->uid, + kill_st->taskname, + kill_st->oom_score, mem_st ? mem_st->pgfault : -1, mem_st ? mem_st->pgmajfault : -1, - mem_st ? mem_st->rss_in_bytes : tasksize * BYTES_IN_KILOBYTE, + mem_st ? mem_st->rss_in_bytes : kill_st->tasksize * BYTES_IN_KILOBYTE, mem_st ? mem_st->cache_in_bytes : -1, mem_st ? mem_st->swap_in_bytes : -1, mem_st ? mem_st->process_start_time_ns : -1, - min_oom_score + kill_st->min_oom_score, + kill_st->free_mem_kb, + kill_st->free_swap_kb, + map_kill_reason(kill_st->kill_reason) ); } else { return -EINVAL; } } -int stats_write_lmk_kill_occurred_pid(int32_t uid, int pid, int32_t oom_score, - int32_t min_oom_score, int tasksize, +int stats_write_lmk_kill_occurred_pid(int pid, struct kill_stat *kill_st, struct memory_stat* mem_st) { struct proc* proc = pid_lookup(pid); if (!proc) return -EINVAL; - return stats_write_lmk_kill_occurred(uid, proc->taskname, oom_score, min_oom_score, - tasksize, mem_st); + kill_st->taskname = proc->taskname; + return stats_write_lmk_kill_occurred(kill_st, mem_st); } static void memory_stat_parse_line(char* line, struct memory_stat* mem_st) { diff --git a/statslog.h b/statslog.h index 9cba6b2..6e59ef2 100644 --- a/statslog.h +++ b/statslog.h @@ -37,6 +37,30 @@ struct memory_stat { int64_t process_start_time_ns; }; +// If you update this, also update the corresponding stats enum mapping. +enum kill_reasons { + NONE = -1, /* To denote no kill condition */ + PRESSURE_AFTER_KILL = 0, + NOT_RESPONDING, + LOW_SWAP_AND_THRASHING, + LOW_MEM_AND_SWAP, + LOW_MEM_AND_THRASHING, + DIRECT_RECL_AND_THRASHING, + LOW_MEM_AND_SWAP_UTIL, + KILL_REASON_COUNT +}; + +struct kill_stat { + int32_t uid; + char *taskname; + enum kill_reasons kill_reason; + int32_t oom_score; + int32_t min_oom_score; + int64_t free_mem_kb; + int64_t free_swap_kb; + int tasksize; +}; + #ifdef LMKD_LOG_STATS #define MEMCG_PROCESS_MEMORY_STAT_PATH "/dev/memcg/apps/uid_%u/pid_%u/memory.stat" @@ -56,17 +80,13 @@ stats_write_lmk_state_changed(int32_t state); * Logs the event when LMKD kills a process to reduce memory pressure. * Code: LMK_KILL_OCCURRED = 51 */ -int -stats_write_lmk_kill_occurred(int32_t uid, char const* process_name, - int32_t oom_score, int32_t min_oom_score, - int tasksize, struct memory_stat *mem_st); +int stats_write_lmk_kill_occurred(struct kill_stat *kill_st, struct memory_stat *mem_st); /** * Logs the event when LMKD kills a process to reduce memory pressure. * Code: LMK_KILL_OCCURRED = 51 */ -int stats_write_lmk_kill_occurred_pid(int32_t uid, int pid, int32_t oom_score, - int32_t min_oom_score, int tasksize, +int stats_write_lmk_kill_occurred_pid(int pid, struct kill_stat *kill_st, struct memory_stat* mem_st); struct memory_stat *stats_read_memory_stat(bool per_app_memcg, int pid, uid_t uid); @@ -92,16 +112,13 @@ static inline int stats_write_lmk_state_changed(int32_t state __unused) { return -EINVAL; } static inline int -stats_write_lmk_kill_occurred(int32_t uid __unused, - char const* process_name __unused, int32_t oom_score __unused, - int32_t min_oom_score __unused, int tasksize __unused, - struct memory_stat *mem_st __unused) { return -EINVAL; } +stats_write_lmk_kill_occurred(struct kill_stat *kill_st __unused, + struct memory_stat *mem_st __unused) { + return -EINVAL; +} -static inline int stats_write_lmk_kill_occurred_pid(int32_t uid __unused, - int pid __unused, int32_t oom_score __unused, - int32_t min_oom_score __unused, - int tasksize __unused, - struct memory_stat* mem_st __unused) { +int stats_write_lmk_kill_occurred_pid(int pid __unused, struct kill_stat *kill_st __unused, + struct memory_stat* mem_st __unused) { return -EINVAL; }