lmkd: Introduce kill strategy based on direct reclaim length am: 81a7c21087

Original change: https://android-review.googlesource.com/c/platform/system/memory/lmkd/+/2959942

Change-Id: Ic643aaf44e45f21b941deeebdcf674c2591e88c2
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
This commit is contained in:
Carlos Galo 2024-03-26 20:33:12 +00:00 committed by Automerger Merge Worker
commit ed5f6f9c6b
4 changed files with 24 additions and 1 deletions

View File

@ -92,6 +92,9 @@ properties:
cycle after kill used to allow blocking of killing
critical processes when not enough memory was freed
in a kill cycle. Default score = 0.
- `ro.lmk.direct_reclaim_threshold_ms`: direct reclaim duration threshold in
milliseconds to consider the system as stuck in
direct reclaim. Default = 0 (disabled)
lmkd will set the following Android properties according to current system
configurations:

View File

@ -40,7 +40,6 @@
#include <shared_mutex>
#include <vector>
#include <bpf/KernelUtils.h>
#include <bpf/WaitForProgsLoaded.h>
#include <cutils/properties.h>
#include <cutils/sockets.h>
@ -161,6 +160,8 @@ static inline void trace_kill_end() {}
#define DEF_PARTIAL_STALL 70
/* ro.lmk.psi_complete_stall_ms property defaults */
#define DEF_COMPLETE_STALL 700
/* ro.lmk.direct_reclaim_threshold_ms property defaults */
#define DEF_DIRECT_RECL_THRESH_MS 0
#define LMKD_REINIT_PROP "lmkd.reinit"
@ -228,6 +229,7 @@ static int64_t stall_limit_critical;
static bool use_psi_monitors = false;
static int kpoll_fd;
static bool delay_monitors_until_boot;
static int direct_reclaim_threshold_ms;
static struct psi_threshold psi_thresholds[VMPRESS_LEVEL_COUNT] = {
{ PSI_SOME, 70 }, /* 70ms out of 1sec for partial stall */
{ PSI_SOME, 100 }, /* 100ms out of 1sec for partial stall */
@ -2631,6 +2633,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
int64_t workingset_refault_file;
bool critical_stall = false;
bool in_direct_reclaim;
long direct_reclaim_duration_ms;
if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) {
ALOGE("Failed to get current time");
@ -2692,6 +2695,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
init_pgscan_direct = vs.field.pgscan_direct;
init_pgscan_kswapd = vs.field.pgscan_kswapd;
init_pgrefill = vs.field.pgrefill;
direct_reclaim_duration_ms = get_time_diff_ms(&direct_reclaim_start_tm, &curr_tm);
reclaim = DIRECT_RECLAIM;
} else if (vs.field.pgscan_kswapd != init_pgscan_kswapd) {
init_pgscan_kswapd = vs.field.pgscan_kswapd;
@ -2849,6 +2853,12 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
min_score_adj = PERCEPTIBLE_APP_ADJ + 1;
}
check_filecache = true;
} else if (reclaim == DIRECT_RECLAIM && direct_reclaim_threshold_ms > 0 &&
direct_reclaim_duration_ms > direct_reclaim_threshold_ms) {
kill_reason = DIRECT_RECL_STUCK;
snprintf(kill_desc, sizeof(kill_desc),
"device is stuck in direct reclaim (%" PRId64 "ms > %dms)",
direct_reclaim_duration_ms, direct_reclaim_threshold_ms);
} else if (check_filecache) {
int64_t file_lru_kb = (vs.field.nr_inactive_file + vs.field.nr_active_file) * page_k;
@ -3499,6 +3509,10 @@ static bool init_monitors() {
ALOGI("Using memevents for direct reclaim detection");
} else {
ALOGI("Using vmstats for direct reclaim detection");
if (direct_reclaim_threshold_ms > 0) {
ALOGW("Kernel support for direct_reclaim_threshold_ms is not found");
direct_reclaim_threshold_ms = 0;
}
}
monitors_initialized = true;
@ -3916,6 +3930,8 @@ static bool update_props() {
filecache_min_kb = GET_LMK_PROPERTY(int64, "filecache_min_kb", 0);
stall_limit_critical = GET_LMK_PROPERTY(int64, "stall_limit_critical", 100);
delay_monitors_until_boot = GET_LMK_PROPERTY(bool, "delay_monitors_until_boot", false);
direct_reclaim_threshold_ms =
GET_LMK_PROPERTY(int64, "direct_reclaim_threshold_ms", DEF_DIRECT_RECL_THRESH_MS);
reaper.enable_debug(debug_process_killing);

View File

@ -49,3 +49,6 @@ on property:persist.device_config.lmkd_native.swap_util_max=*
on property:persist.device_config.lmkd_native.filecache_min_kb=*
setprop lmkd.reinit ${sys.boot_completed:-0}
on property:persist.device_config.lmkd_native.direct_reclaim_threshold_ms=*
setprop lmkd.reinit ${sys.boot_completed:-0}

View File

@ -65,6 +65,7 @@ enum kill_reasons {
LOW_MEM_AND_SWAP_UTIL,
LOW_FILECACHE_AFTER_THRASHING,
LOW_MEM,
DIRECT_RECL_STUCK,
KILL_REASON_COUNT
};