From 65e7d14e2a5ba8708d6ad9c7b209f8b9d2cf0ac1 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Tue, 22 Feb 2022 14:57:18 +0800 Subject: [PATCH] lmkd: Allow killing perceptible apps when recorded stall is too high When system is under heavy memory pressure the system might be able to keep free memory above the min watermark avoiding perceptible app kills. In such situation system might end up using all its cpu capacity on memory reclaim and not doing productive work. To detect this condition, check memory full stall and compare it with the new ro.lmk.stall_limit_critical tunable representing the stall threshold. When the recorded level is over ro.lmk.stall_limit_critical, lmkd will be allowed to kill perceptible apps. ro.lmk.stall_limit_critical represents the max memory full stall in % that is allowed before perceptible apps will get killed. By default it is set to 100%, which effectively disables the feature. Currently system stall is measured based on psi memory stall 10s average value, however this definition might change in the future if better metrics are developed. Setting ro.lmk.stall_limit_critical to 5 means the system should be fully stalled (no productive work is done) for 5% of the 10sec period, resulting in 0.5 sec loss due to the stall. Bug: 205182133 Test: verify on heavy memory pressure test Signed-off-by: Suren Baghdasaryan Signed-off-by: Martin Liu Change-Id: I9713e30d82641d86d1b7edb5e1ba2971b935c898 Merged-In: I9713e30d82641d86d1b7edb5e1ba2971b935c898 --- libpsi/include/psi/psi.h | 20 +++++++++++++++++++ libpsi/psi.cpp | 22 +++++++++++++++----- lmkd.cpp | 43 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 5 deletions(-) diff --git a/libpsi/include/psi/psi.h b/libpsi/include/psi/psi.h index cd49e8b..f29a7e7 100644 --- a/libpsi/include/psi/psi.h +++ b/libpsi/include/psi/psi.h @@ -22,12 +22,25 @@ __BEGIN_DECLS +#define PSI_PATH_MEMORY "/proc/pressure/memory" + enum psi_stall_type { PSI_SOME, PSI_FULL, PSI_TYPE_COUNT }; +struct psi_stats { + float avg10; + float avg60; + float avg300; + unsigned long total; +}; + +struct psi_data { + struct psi_stats mem_stats[PSI_TYPE_COUNT]; +}; + /* * Initializes psi monitor. * stall_type, threshold_us and window_us are monitor parameters @@ -63,6 +76,13 @@ int unregister_psi_monitor(int epollfd, int fd); */ void destroy_psi_monitor(int fd); +/* + * Parse psi file line content. Expected file format is: + * some avg10=0.00 avg60=0.00 avg300=0.00 total=0 + * full avg10=0.00 avg60=0.00 avg300=0.00 total=0 + */ +int parse_psi_line(char *line, enum psi_stall_type stall_type, struct psi_stats stats[]); + __END_DECLS #endif // __ANDROID_PSI_H__ diff --git a/libpsi/psi.cpp b/libpsi/psi.cpp index 89f07ed..54f9971 100644 --- a/libpsi/psi.cpp +++ b/libpsi/psi.cpp @@ -28,8 +28,6 @@ #include #include "psi/psi.h" -#define PSI_MON_FILE_MEMORY "/proc/pressure/memory" - static const char* stall_type_name[] = { "some", "full", @@ -41,7 +39,7 @@ int init_psi_monitor(enum psi_stall_type stall_type, int res; char buf[256]; - fd = TEMP_FAILURE_RETRY(open(PSI_MON_FILE_MEMORY, O_WRONLY | O_CLOEXEC)); + fd = TEMP_FAILURE_RETRY(open(PSI_PATH_MEMORY, O_WRONLY | O_CLOEXEC)); if (fd < 0) { ALOGE("No kernel psi monitor support (errno=%d)", errno); return -1; @@ -61,7 +59,7 @@ int init_psi_monitor(enum psi_stall_type stall_type, if (res >= (ssize_t)sizeof(buf)) { ALOGE("%s line overflow for psi stall type '%s'", - PSI_MON_FILE_MEMORY, stall_type_name[stall_type]); + PSI_PATH_MEMORY, stall_type_name[stall_type]); errno = EINVAL; goto err; } @@ -69,7 +67,7 @@ int init_psi_monitor(enum psi_stall_type stall_type, res = TEMP_FAILURE_RETRY(write(fd, buf, strlen(buf) + 1)); if (res < 0) { ALOGE("%s write failed for psi stall type '%s'; errno=%d", - PSI_MON_FILE_MEMORY, stall_type_name[stall_type], errno); + PSI_PATH_MEMORY, stall_type_name[stall_type], errno); goto err; } @@ -102,3 +100,17 @@ void destroy_psi_monitor(int fd) { close(fd); } } + +int parse_psi_line(char *line, enum psi_stall_type stall_type, struct psi_stats stats[]) { + char type_name[5]; + struct psi_stats *stat = &stats[stall_type]; + + if (!line || sscanf(line, "%4s avg10=%f avg60=%f avg300=%f total=%lu", + type_name, &stat->avg10, &stat->avg60, &stat->avg300, &stat->total) != 5) { + return -1; + } + if (strcmp(type_name, stall_type_name[stall_type])) { + return -1; + } + return 0; +} diff --git a/lmkd.cpp b/lmkd.cpp index 5fe5f06..1e174e5 100644 --- a/lmkd.cpp +++ b/lmkd.cpp @@ -212,6 +212,7 @@ static int thrashing_limit_decay_pct; static int thrashing_critical_pct; static int swap_util_max; static int64_t filecache_min_kb; +static int64_t stall_limit_critical; static bool use_psi_monitors = false; static int kpoll_fd; static struct psi_threshold psi_thresholds[VMPRESS_LEVEL_COUNT] = { @@ -1897,6 +1898,37 @@ static int vmstat_parse(union vmstat *vs) { return 0; } +static int psi_parse(struct reread_data *file_data, struct psi_stats stats[], bool full) { + char *buf; + char *save_ptr; + char *line; + + if ((buf = reread_file(file_data)) == NULL) { + return -1; + } + + line = strtok_r(buf, "\n", &save_ptr); + if (parse_psi_line(line, PSI_SOME, stats)) { + return -1; + } + if (full) { + line = strtok_r(NULL, "\n", &save_ptr); + if (parse_psi_line(line, PSI_FULL, stats)) { + return -1; + } + } + + return 0; +} + +static int psi_parse_mem(struct psi_data *psi_data) { + static struct reread_data file_data = { + .filename = PSI_PATH_MEMORY, + .fd = -1, + }; + return psi_parse(&file_data, psi_data->mem_stats, true); +} + enum wakeup_reason { Event, Polling @@ -2425,6 +2457,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ union meminfo mi; union vmstat vs; + struct psi_data psi_data; struct timespec curr_tm; int64_t thrashing = 0; bool swap_is_low = false; @@ -2439,6 +2472,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ int swap_util = 0; long since_thrashing_reset_ms; int64_t workingset_refault_file; + bool critical_stall = false; if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) { ALOGE("Failed to get current time"); @@ -2571,6 +2605,9 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ /* Find out which watermark is breached if any */ wmark = get_lowest_watermark(&mi, &watermarks); + if (!psi_parse_mem(&psi_data)) { + critical_stall = psi_data.mem_stats[PSI_FULL].avg10 > (float)stall_limit_critical; + } /* * TODO: move this logic into a separate function * Decide if killing a process is necessary and record the reason @@ -2668,6 +2705,11 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_ .thrashing = (int)thrashing, .max_thrashing = max_thrashing, }; + + /* Allow killing perceptible apps if the system is stalled */ + if (critical_stall) { + min_score_adj = 0; + } int pages_freed = find_and_kill_process(min_score_adj, &ki, &mi, &wi, &curr_tm); if (pages_freed > 0) { killing = true; @@ -3464,6 +3506,7 @@ static void update_props() { thrashing_limit_pct * 2)); swap_util_max = clamp(0, 100, GET_LMK_PROPERTY(int32, "swap_util_max", 100)); filecache_min_kb = GET_LMK_PROPERTY(int64, "filecache_min_kb", 0); + stall_limit_critical = GET_LMK_PROPERTY(int64, "stall_limit_critical", 100); } int main(int argc, char **argv) {