diff --git a/README.md b/README.md index ae406e3..c378bda 100644 --- a/README.md +++ b/README.md @@ -92,6 +92,9 @@ properties: cycle after kill used to allow blocking of killing critical processes when not enough memory was freed in a kill cycle. Default score = 0. + - `ro.lmk.direct_reclaim_threshold_ms`: direct reclaim duration threshold in + milliseconds to consider the system as stuck in + direct reclaim. Default = 0 (disabled) lmkd will set the following Android properties according to current system configurations: diff --git a/lmkd.cpp b/lmkd.cpp index ded3510..30b4338 100644 --- a/lmkd.cpp +++ b/lmkd.cpp @@ -40,7 +40,6 @@ #include #include -#include #include #include #include @@ -161,6 +160,8 @@ static inline void trace_kill_end() {} #define DEF_PARTIAL_STALL 70 /* ro.lmk.psi_complete_stall_ms property defaults */ #define DEF_COMPLETE_STALL 700 +/* ro.lmk.direct_reclaim_threshold_ms property defaults */ +#define DEF_DIRECT_RECL_THRESH_MS 0 #define LMKD_REINIT_PROP "lmkd.reinit" @@ -228,6 +229,7 @@ static int64_t stall_limit_critical; static bool use_psi_monitors = false; static int kpoll_fd; static bool delay_monitors_until_boot; +static int direct_reclaim_threshold_ms; static struct psi_threshold psi_thresholds[VMPRESS_LEVEL_COUNT] = { { PSI_SOME, 70 }, /* 70ms out of 1sec for partial stall */ { PSI_SOME, 100 }, /* 100ms out of 1sec for partial stall */ @@ -2631,6 +2633,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ int64_t workingset_refault_file; bool critical_stall = false; bool in_direct_reclaim; + long direct_reclaim_duration_ms; if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) { ALOGE("Failed to get current time"); @@ -2692,6 +2695,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ init_pgscan_direct = vs.field.pgscan_direct; init_pgscan_kswapd = vs.field.pgscan_kswapd; init_pgrefill = vs.field.pgrefill; + direct_reclaim_duration_ms = get_time_diff_ms(&direct_reclaim_start_tm, &curr_tm); reclaim = DIRECT_RECLAIM; } else if (vs.field.pgscan_kswapd != init_pgscan_kswapd) { init_pgscan_kswapd = vs.field.pgscan_kswapd; @@ -2849,6 +2853,12 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ min_score_adj = PERCEPTIBLE_APP_ADJ + 1; } check_filecache = true; + } else if (reclaim == DIRECT_RECLAIM && direct_reclaim_threshold_ms > 0 && + direct_reclaim_duration_ms > direct_reclaim_threshold_ms) { + kill_reason = DIRECT_RECL_STUCK; + snprintf(kill_desc, sizeof(kill_desc), + "device is stuck in direct reclaim (%" PRId64 "ms > %dms)", + direct_reclaim_duration_ms, direct_reclaim_threshold_ms); } else if (check_filecache) { int64_t file_lru_kb = (vs.field.nr_inactive_file + vs.field.nr_active_file) * page_k; @@ -3499,6 +3509,10 @@ static bool init_monitors() { ALOGI("Using memevents for direct reclaim detection"); } else { ALOGI("Using vmstats for direct reclaim detection"); + if (direct_reclaim_threshold_ms > 0) { + ALOGW("Kernel support for direct_reclaim_threshold_ms is not found"); + direct_reclaim_threshold_ms = 0; + } } monitors_initialized = true; @@ -3916,6 +3930,8 @@ static bool update_props() { filecache_min_kb = GET_LMK_PROPERTY(int64, "filecache_min_kb", 0); stall_limit_critical = GET_LMK_PROPERTY(int64, "stall_limit_critical", 100); delay_monitors_until_boot = GET_LMK_PROPERTY(bool, "delay_monitors_until_boot", false); + direct_reclaim_threshold_ms = + GET_LMK_PROPERTY(int64, "direct_reclaim_threshold_ms", DEF_DIRECT_RECL_THRESH_MS); reaper.enable_debug(debug_process_killing); diff --git a/lmkd.rc b/lmkd.rc index ba662b4..ffe0bc6 100644 --- a/lmkd.rc +++ b/lmkd.rc @@ -49,3 +49,6 @@ on property:persist.device_config.lmkd_native.swap_util_max=* on property:persist.device_config.lmkd_native.filecache_min_kb=* setprop lmkd.reinit ${sys.boot_completed:-0} + +on property:persist.device_config.lmkd_native.direct_reclaim_threshold_ms=* + setprop lmkd.reinit ${sys.boot_completed:-0} diff --git a/statslog.h b/statslog.h index 292d556..60c7016 100644 --- a/statslog.h +++ b/statslog.h @@ -65,6 +65,7 @@ enum kill_reasons { LOW_MEM_AND_SWAP_UTIL, LOW_FILECACHE_AFTER_THRASHING, LOW_MEM, + DIRECT_RECL_STUCK, KILL_REASON_COUNT };