lmkd: Allow lmkd to kill perceptible apps during heavy thrashing

Occasionally a system can get into heavy file cache thrashing situation
and become unresponsive. In these situations we observe lmkd wakeups,
however it does not kill because all non-perceptible apps are already
killed and the system manages to reclaim enough memory to stay above
min watermark.
Add ro.lmk.thrashing_limit_critical property which when breached will
allow lmkd to kill perceptible apps. The property represents the
percentage of refaulted workingset pages as a fraction of overall file
cache size. By default it is disabled.

Bug: 181778155
Test: thrashing.py 500 10 200
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Change-Id: Icb38ef6c90adaa4f5c956593b6ea0c4febc91dc0
This commit is contained in:
Suren Baghdasaryan 2021-03-03 11:11:09 -08:00
parent 84623bef7b
commit 0142b3c166
1 changed files with 15 additions and 8 deletions

View File

@ -197,6 +197,7 @@ static int psi_partial_stall_ms;
static int psi_complete_stall_ms; static int psi_complete_stall_ms;
static int thrashing_limit_pct; static int thrashing_limit_pct;
static int thrashing_limit_decay_pct; static int thrashing_limit_decay_pct;
static int thrashing_critical_pct;
static int swap_util_max; static int swap_util_max;
static bool use_psi_monitors = false; static bool use_psi_monitors = false;
static int kpoll_fd; static int kpoll_fd;
@ -2501,8 +2502,8 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
snprintf(kill_desc, sizeof(kill_desc), "device is low on swap (%" PRId64 snprintf(kill_desc, sizeof(kill_desc), "device is low on swap (%" PRId64
"kB < %" PRId64 "kB) and thrashing (%" PRId64 "%%)", "kB < %" PRId64 "kB) and thrashing (%" PRId64 "%%)",
mi.field.free_swap * page_k, swap_low_threshold * page_k, thrashing); mi.field.free_swap * page_k, swap_low_threshold * page_k, thrashing);
/* Do not kill perceptible apps unless below min watermark */ /* Do not kill perceptible apps unless below min watermark or heavily thrashing */
if (wmark > WMARK_MIN) { if (wmark > WMARK_MIN && thrashing < thrashing_critical_pct) {
min_score_adj = PERCEPTIBLE_APP_ADJ + 1; min_score_adj = PERCEPTIBLE_APP_ADJ + 1;
} }
} else if (swap_is_low && wmark < WMARK_HIGH) { } else if (swap_is_low && wmark < WMARK_HIGH) {
@ -2511,8 +2512,8 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
snprintf(kill_desc, sizeof(kill_desc), "%s watermark is breached and swap is low (%" snprintf(kill_desc, sizeof(kill_desc), "%s watermark is breached and swap is low (%"
PRId64 "kB < %" PRId64 "kB)", wmark < WMARK_LOW ? "min" : "low", PRId64 "kB < %" PRId64 "kB)", wmark < WMARK_LOW ? "min" : "low",
mi.field.free_swap * page_k, swap_low_threshold * page_k); mi.field.free_swap * page_k, swap_low_threshold * page_k);
/* Do not kill perceptible apps unless below min watermark */ /* Do not kill perceptible apps unless below min watermark or heavily thrashing */
if (wmark > WMARK_MIN) { if (wmark > WMARK_MIN && thrashing < thrashing_critical_pct) {
min_score_adj = PERCEPTIBLE_APP_ADJ + 1; min_score_adj = PERCEPTIBLE_APP_ADJ + 1;
} }
} else if (wmark < WMARK_HIGH && swap_util_max < 100 && } else if (wmark < WMARK_HIGH && swap_util_max < 100 &&
@ -2531,17 +2532,21 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
snprintf(kill_desc, sizeof(kill_desc), "%s watermark is breached and thrashing (%" snprintf(kill_desc, sizeof(kill_desc), "%s watermark is breached and thrashing (%"
PRId64 "%%)", wmark < WMARK_LOW ? "min" : "low", thrashing); PRId64 "%%)", wmark < WMARK_LOW ? "min" : "low", thrashing);
cut_thrashing_limit = true; cut_thrashing_limit = true;
/* Do not kill perceptible apps because of thrashing */ /* Do not kill perceptible apps unless thrashing at critical levels */
if (thrashing < thrashing_critical_pct) {
min_score_adj = PERCEPTIBLE_APP_ADJ + 1; min_score_adj = PERCEPTIBLE_APP_ADJ + 1;
}
} else if (reclaim == DIRECT_RECLAIM && thrashing > thrashing_limit) { } else if (reclaim == DIRECT_RECLAIM && thrashing > thrashing_limit) {
/* Page cache is thrashing while in direct reclaim (mostly happens on lowram devices) */ /* Page cache is thrashing while in direct reclaim (mostly happens on lowram devices) */
kill_reason = DIRECT_RECL_AND_THRASHING; kill_reason = DIRECT_RECL_AND_THRASHING;
snprintf(kill_desc, sizeof(kill_desc), "device is in direct reclaim and thrashing (%" snprintf(kill_desc, sizeof(kill_desc), "device is in direct reclaim and thrashing (%"
PRId64 "%%)", thrashing); PRId64 "%%)", thrashing);
cut_thrashing_limit = true; cut_thrashing_limit = true;
/* Do not kill perceptible apps because of thrashing */ /* Do not kill perceptible apps unless thrashing at critical levels */
if (thrashing < thrashing_critical_pct) {
min_score_adj = PERCEPTIBLE_APP_ADJ + 1; min_score_adj = PERCEPTIBLE_APP_ADJ + 1;
} }
}
/* Kill a process if necessary */ /* Kill a process if necessary */
if (kill_reason != NONE) { if (kill_reason != NONE) {
@ -3337,6 +3342,8 @@ static void update_props() {
low_ram_device ? DEF_THRASHING_LOWRAM : DEF_THRASHING)); low_ram_device ? DEF_THRASHING_LOWRAM : DEF_THRASHING));
thrashing_limit_decay_pct = clamp(0, 100, property_get_int32("ro.lmk.thrashing_limit_decay", thrashing_limit_decay_pct = clamp(0, 100, property_get_int32("ro.lmk.thrashing_limit_decay",
low_ram_device ? DEF_THRASHING_DECAY_LOWRAM : DEF_THRASHING_DECAY)); low_ram_device ? DEF_THRASHING_DECAY_LOWRAM : DEF_THRASHING_DECAY));
thrashing_critical_pct = max(0, property_get_int32("ro.lmk.thrashing_limit_critical",
thrashing_limit_pct * 2));
swap_util_max = clamp(0, 100, property_get_int32("ro.lmk.swap_util_max", 100)); swap_util_max = clamp(0, 100, property_get_int32("ro.lmk.swap_util_max", 100));
} }