lmkd: Introduce kill strategy based on direct reclaim length am: 81a7c21087

Original change: https://android-review.googlesource.com/c/platform/system/memory/lmkd/+/2959942

Change-Id: Ic643aaf44e45f21b941deeebdcf674c2591e88c2
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
This commit is contained in:
Carlos Galo 2024-03-26 20:33:12 +00:00 committed by Automerger Merge Worker
commit ed5f6f9c6b
4 changed files with 24 additions and 1 deletions

View File

@ -92,6 +92,9 @@ properties:
cycle after kill used to allow blocking of killing cycle after kill used to allow blocking of killing
critical processes when not enough memory was freed critical processes when not enough memory was freed
in a kill cycle. Default score = 0. in a kill cycle. Default score = 0.
- `ro.lmk.direct_reclaim_threshold_ms`: direct reclaim duration threshold in
milliseconds to consider the system as stuck in
direct reclaim. Default = 0 (disabled)
lmkd will set the following Android properties according to current system lmkd will set the following Android properties according to current system
configurations: configurations:

View File

@ -40,7 +40,6 @@
#include <shared_mutex> #include <shared_mutex>
#include <vector> #include <vector>
#include <bpf/KernelUtils.h>
#include <bpf/WaitForProgsLoaded.h> #include <bpf/WaitForProgsLoaded.h>
#include <cutils/properties.h> #include <cutils/properties.h>
#include <cutils/sockets.h> #include <cutils/sockets.h>
@ -161,6 +160,8 @@ static inline void trace_kill_end() {}
#define DEF_PARTIAL_STALL 70 #define DEF_PARTIAL_STALL 70
/* ro.lmk.psi_complete_stall_ms property defaults */ /* ro.lmk.psi_complete_stall_ms property defaults */
#define DEF_COMPLETE_STALL 700 #define DEF_COMPLETE_STALL 700
/* ro.lmk.direct_reclaim_threshold_ms property defaults */
#define DEF_DIRECT_RECL_THRESH_MS 0
#define LMKD_REINIT_PROP "lmkd.reinit" #define LMKD_REINIT_PROP "lmkd.reinit"
@ -228,6 +229,7 @@ static int64_t stall_limit_critical;
static bool use_psi_monitors = false; static bool use_psi_monitors = false;
static int kpoll_fd; static int kpoll_fd;
static bool delay_monitors_until_boot; static bool delay_monitors_until_boot;
static int direct_reclaim_threshold_ms;
static struct psi_threshold psi_thresholds[VMPRESS_LEVEL_COUNT] = { static struct psi_threshold psi_thresholds[VMPRESS_LEVEL_COUNT] = {
{ PSI_SOME, 70 }, /* 70ms out of 1sec for partial stall */ { PSI_SOME, 70 }, /* 70ms out of 1sec for partial stall */
{ PSI_SOME, 100 }, /* 100ms out of 1sec for partial stall */ { PSI_SOME, 100 }, /* 100ms out of 1sec for partial stall */
@ -2631,6 +2633,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
int64_t workingset_refault_file; int64_t workingset_refault_file;
bool critical_stall = false; bool critical_stall = false;
bool in_direct_reclaim; bool in_direct_reclaim;
long direct_reclaim_duration_ms;
if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) { if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) {
ALOGE("Failed to get current time"); ALOGE("Failed to get current time");
@ -2692,6 +2695,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
init_pgscan_direct = vs.field.pgscan_direct; init_pgscan_direct = vs.field.pgscan_direct;
init_pgscan_kswapd = vs.field.pgscan_kswapd; init_pgscan_kswapd = vs.field.pgscan_kswapd;
init_pgrefill = vs.field.pgrefill; init_pgrefill = vs.field.pgrefill;
direct_reclaim_duration_ms = get_time_diff_ms(&direct_reclaim_start_tm, &curr_tm);
reclaim = DIRECT_RECLAIM; reclaim = DIRECT_RECLAIM;
} else if (vs.field.pgscan_kswapd != init_pgscan_kswapd) { } else if (vs.field.pgscan_kswapd != init_pgscan_kswapd) {
init_pgscan_kswapd = vs.field.pgscan_kswapd; init_pgscan_kswapd = vs.field.pgscan_kswapd;
@ -2849,6 +2853,12 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
min_score_adj = PERCEPTIBLE_APP_ADJ + 1; min_score_adj = PERCEPTIBLE_APP_ADJ + 1;
} }
check_filecache = true; check_filecache = true;
} else if (reclaim == DIRECT_RECLAIM && direct_reclaim_threshold_ms > 0 &&
direct_reclaim_duration_ms > direct_reclaim_threshold_ms) {
kill_reason = DIRECT_RECL_STUCK;
snprintf(kill_desc, sizeof(kill_desc),
"device is stuck in direct reclaim (%" PRId64 "ms > %dms)",
direct_reclaim_duration_ms, direct_reclaim_threshold_ms);
} else if (check_filecache) { } else if (check_filecache) {
int64_t file_lru_kb = (vs.field.nr_inactive_file + vs.field.nr_active_file) * page_k; int64_t file_lru_kb = (vs.field.nr_inactive_file + vs.field.nr_active_file) * page_k;
@ -3499,6 +3509,10 @@ static bool init_monitors() {
ALOGI("Using memevents for direct reclaim detection"); ALOGI("Using memevents for direct reclaim detection");
} else { } else {
ALOGI("Using vmstats for direct reclaim detection"); ALOGI("Using vmstats for direct reclaim detection");
if (direct_reclaim_threshold_ms > 0) {
ALOGW("Kernel support for direct_reclaim_threshold_ms is not found");
direct_reclaim_threshold_ms = 0;
}
} }
monitors_initialized = true; monitors_initialized = true;
@ -3916,6 +3930,8 @@ static bool update_props() {
filecache_min_kb = GET_LMK_PROPERTY(int64, "filecache_min_kb", 0); filecache_min_kb = GET_LMK_PROPERTY(int64, "filecache_min_kb", 0);
stall_limit_critical = GET_LMK_PROPERTY(int64, "stall_limit_critical", 100); stall_limit_critical = GET_LMK_PROPERTY(int64, "stall_limit_critical", 100);
delay_monitors_until_boot = GET_LMK_PROPERTY(bool, "delay_monitors_until_boot", false); delay_monitors_until_boot = GET_LMK_PROPERTY(bool, "delay_monitors_until_boot", false);
direct_reclaim_threshold_ms =
GET_LMK_PROPERTY(int64, "direct_reclaim_threshold_ms", DEF_DIRECT_RECL_THRESH_MS);
reaper.enable_debug(debug_process_killing); reaper.enable_debug(debug_process_killing);

View File

@ -49,3 +49,6 @@ on property:persist.device_config.lmkd_native.swap_util_max=*
on property:persist.device_config.lmkd_native.filecache_min_kb=* on property:persist.device_config.lmkd_native.filecache_min_kb=*
setprop lmkd.reinit ${sys.boot_completed:-0} setprop lmkd.reinit ${sys.boot_completed:-0}
on property:persist.device_config.lmkd_native.direct_reclaim_threshold_ms=*
setprop lmkd.reinit ${sys.boot_completed:-0}

View File

@ -65,6 +65,7 @@ enum kill_reasons {
LOW_MEM_AND_SWAP_UTIL, LOW_MEM_AND_SWAP_UTIL,
LOW_FILECACHE_AFTER_THRASHING, LOW_FILECACHE_AFTER_THRASHING,
LOW_MEM, LOW_MEM,
DIRECT_RECL_STUCK,
KILL_REASON_COUNT KILL_REASON_COUNT
}; };