diff --git a/lmkd.cpp b/lmkd.cpp index 1daf198..a4367c3 100644 --- a/lmkd.cpp +++ b/lmkd.cpp @@ -100,6 +100,7 @@ #define EIGHT_MEGA (1 << 23) #define TARGET_UPDATE_MIN_INTERVAL_MS 1000 +#define THRASHING_RESET_INTERVAL_MS 1000 #define NS_PER_MS (NS_PER_SEC / MS_PER_SEC) #define US_PER_MS (US_PER_SEC / MS_PER_SEC) @@ -2313,16 +2314,18 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ DIRECT_RECLAIM, }; static int64_t init_ws_refault; + static int64_t prev_workingset_refault; static int64_t base_file_lru; static int64_t init_pgscan_kswapd; static int64_t init_pgscan_direct; static int64_t swap_low_threshold; static bool killing; - static int thrashing_limit; - static bool in_reclaim; + static int thrashing_limit = thrashing_limit_pct; static struct zone_watermarks watermarks; static struct timespec wmark_update_tm; static struct wakeup_info wi; + static struct timespec thrashing_reset_tm; + static int64_t prev_thrash_growth = 0; union meminfo mi; union vmstat vs; @@ -2338,6 +2341,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ bool cut_thrashing_limit = false; int min_score_adj = 0; int swap_util = 0; + long since_thrashing_reset_ms; if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) { ALOGE("Failed to get current time"); @@ -2376,6 +2380,8 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ /* Reset file-backed pagecache size and refault amounts after a kill */ base_file_lru = vs.field.nr_inactive_file + vs.field.nr_active_file; init_ws_refault = vs.field.workingset_refault; + thrashing_reset_tm = curr_tm; + prev_thrash_growth = 0; } /* Check free swap levels */ @@ -2394,22 +2400,50 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ } else if (vs.field.pgscan_kswapd > init_pgscan_kswapd) { init_pgscan_kswapd = vs.field.pgscan_kswapd; reclaim = KSWAPD_RECLAIM; - } else { - in_reclaim = false; - /* Skip if system is not reclaiming */ + } else if (vs.field.workingset_refault == prev_workingset_refault) { + /* Device is not thrashing and not reclaiming, bail out early until we see these stats changing*/ goto no_kill; } - if (!in_reclaim) { - /* Record file-backed pagecache size when entering reclaim cycle */ + prev_workingset_refault = vs.field.workingset_refault; + + /* + * It's possible we fail to find an eligible process to kill (ex. no process is + * above oom_adj_min). When this happens, we should retry to find a new process + * for a kill whenever a new eligible process is available. This is especially + * important for a slow growing refault case. While retrying, we should keep + * monitoring new thrashing counter as someone could release the memory to mitigate + * the thrashing. Thus, when thrashing reset window comes, we decay the prev thrashing + * counter by window counts. if the counter is still greater than thrashing limit, + * we preserve the current prev_thrash counter so we will retry kill again. Otherwise, + * we reset the prev_thrash counter so we will stop retrying. + */ + since_thrashing_reset_ms = get_time_diff_ms(&thrashing_reset_tm, &curr_tm); + if (since_thrashing_reset_ms > THRASHING_RESET_INTERVAL_MS) { + long windows_passed; + /* Calculate prev_thrash_growth if we crossed THRASHING_RESET_INTERVAL_MS */ + prev_thrash_growth = (vs.field.workingset_refault - init_ws_refault) * 100 / base_file_lru; + windows_passed = (since_thrashing_reset_ms / THRASHING_RESET_INTERVAL_MS); + /* + * Decay prev_thrashing unless over-the-limit thrashing was registered in the window we + * just crossed, which means there were no eligible processes to kill. We preserve the + * counter in that case to ensure a kill if a new eligible process appears. + */ + if (windows_passed > 1 || prev_thrash_growth < thrashing_limit) { + prev_thrash_growth >>= windows_passed; + } + + /* Record file-backed pagecache size when crossing THRASHING_RESET_INTERVAL_MS */ base_file_lru = vs.field.nr_inactive_file + vs.field.nr_active_file; init_ws_refault = vs.field.workingset_refault; + thrashing_reset_tm = curr_tm; thrashing_limit = thrashing_limit_pct; } else { /* Calculate what % of the file-backed pagecache refaulted so far */ thrashing = (vs.field.workingset_refault - init_ws_refault) * 100 / base_file_lru; } - in_reclaim = true; + /* Add previous cycle's decayed thrashing amount */ + thrashing += prev_thrash_growth; /* * Refresh watermarks once per min in case user updated one of the margins. @@ -2426,7 +2460,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ calc_zone_watermarks(&zi, &watermarks); wmark_update_tm = curr_tm; - } + } /* Find out which watermark is breached if any */ wmark = get_lowest_watermark(&mi, &watermarks);