From 1f72f5fa4bee58c27fe413898e17d98e7ee61eb1 Mon Sep 17 00:00:00 2001 From: Martin Liu Date: Fri, 21 Aug 2020 13:18:50 +0800 Subject: [PATCH] lmkd: adjust thrashing dection strategy When a device is thrashing the file cache, workingset refaults can grow slowly because of variant reasons. Current thrashing detection mechanism could reset the thrashing counter frequently as it relies on presence of reclaim activity, however refaults can keep increasing even when the device is not actively reclaiming. In addition, the thrashing counter gets reset when conditions require a kill but lmkd could not find an eligible process to be killed. This is problematic because when this happens thrashing is being ignored. Use a fixed 1 sec periods to aggregate the thrashing counter. Also we need to keep monitoring thrashing counter while retrying as someone could release the memory to mitigate the thrashing. If thrashing counter is greater than the limit at the end of the 1 sec period this means lmkd failed to find an eligible process to kill. In this case we store accumulated thrashing in case a new eligible process appears until accumulated thrashing is less that the limit or we miss an entire 1 sec window. Bug: 163134367 Test: heavy loading launch Signed-off-by: Martin Liu Change-Id: Ie9f4121ea604179c0ad510cc8430e7a6aec6e6b2 --- lmkd.cpp | 52 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/lmkd.cpp b/lmkd.cpp index 1daf198..a4367c3 100644 --- a/lmkd.cpp +++ b/lmkd.cpp @@ -100,6 +100,7 @@ #define EIGHT_MEGA (1 << 23) #define TARGET_UPDATE_MIN_INTERVAL_MS 1000 +#define THRASHING_RESET_INTERVAL_MS 1000 #define NS_PER_MS (NS_PER_SEC / MS_PER_SEC) #define US_PER_MS (US_PER_SEC / MS_PER_SEC) @@ -2313,16 +2314,18 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ DIRECT_RECLAIM, }; static int64_t init_ws_refault; + static int64_t prev_workingset_refault; static int64_t base_file_lru; static int64_t init_pgscan_kswapd; static int64_t init_pgscan_direct; static int64_t swap_low_threshold; static bool killing; - static int thrashing_limit; - static bool in_reclaim; + static int thrashing_limit = thrashing_limit_pct; static struct zone_watermarks watermarks; static struct timespec wmark_update_tm; static struct wakeup_info wi; + static struct timespec thrashing_reset_tm; + static int64_t prev_thrash_growth = 0; union meminfo mi; union vmstat vs; @@ -2338,6 +2341,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ bool cut_thrashing_limit = false; int min_score_adj = 0; int swap_util = 0; + long since_thrashing_reset_ms; if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) { ALOGE("Failed to get current time"); @@ -2376,6 +2380,8 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ /* Reset file-backed pagecache size and refault amounts after a kill */ base_file_lru = vs.field.nr_inactive_file + vs.field.nr_active_file; init_ws_refault = vs.field.workingset_refault; + thrashing_reset_tm = curr_tm; + prev_thrash_growth = 0; } /* Check free swap levels */ @@ -2394,22 +2400,50 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ } else if (vs.field.pgscan_kswapd > init_pgscan_kswapd) { init_pgscan_kswapd = vs.field.pgscan_kswapd; reclaim = KSWAPD_RECLAIM; - } else { - in_reclaim = false; - /* Skip if system is not reclaiming */ + } else if (vs.field.workingset_refault == prev_workingset_refault) { + /* Device is not thrashing and not reclaiming, bail out early until we see these stats changing*/ goto no_kill; } - if (!in_reclaim) { - /* Record file-backed pagecache size when entering reclaim cycle */ + prev_workingset_refault = vs.field.workingset_refault; + + /* + * It's possible we fail to find an eligible process to kill (ex. no process is + * above oom_adj_min). When this happens, we should retry to find a new process + * for a kill whenever a new eligible process is available. This is especially + * important for a slow growing refault case. While retrying, we should keep + * monitoring new thrashing counter as someone could release the memory to mitigate + * the thrashing. Thus, when thrashing reset window comes, we decay the prev thrashing + * counter by window counts. if the counter is still greater than thrashing limit, + * we preserve the current prev_thrash counter so we will retry kill again. Otherwise, + * we reset the prev_thrash counter so we will stop retrying. + */ + since_thrashing_reset_ms = get_time_diff_ms(&thrashing_reset_tm, &curr_tm); + if (since_thrashing_reset_ms > THRASHING_RESET_INTERVAL_MS) { + long windows_passed; + /* Calculate prev_thrash_growth if we crossed THRASHING_RESET_INTERVAL_MS */ + prev_thrash_growth = (vs.field.workingset_refault - init_ws_refault) * 100 / base_file_lru; + windows_passed = (since_thrashing_reset_ms / THRASHING_RESET_INTERVAL_MS); + /* + * Decay prev_thrashing unless over-the-limit thrashing was registered in the window we + * just crossed, which means there were no eligible processes to kill. We preserve the + * counter in that case to ensure a kill if a new eligible process appears. + */ + if (windows_passed > 1 || prev_thrash_growth < thrashing_limit) { + prev_thrash_growth >>= windows_passed; + } + + /* Record file-backed pagecache size when crossing THRASHING_RESET_INTERVAL_MS */ base_file_lru = vs.field.nr_inactive_file + vs.field.nr_active_file; init_ws_refault = vs.field.workingset_refault; + thrashing_reset_tm = curr_tm; thrashing_limit = thrashing_limit_pct; } else { /* Calculate what % of the file-backed pagecache refaulted so far */ thrashing = (vs.field.workingset_refault - init_ws_refault) * 100 / base_file_lru; } - in_reclaim = true; + /* Add previous cycle's decayed thrashing amount */ + thrashing += prev_thrash_growth; /* * Refresh watermarks once per min in case user updated one of the margins. @@ -2426,7 +2460,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ calc_zone_watermarks(&zi, &watermarks); wmark_update_tm = curr_tm; - } + } /* Find out which watermark is breached if any */ wmark = get_lowest_watermark(&mi, &watermarks);