lmkd: Allow killing perceptible apps when recorded stall is too high
When system is under heavy memory pressure the system might be able to keep free memory above the min watermark avoiding perceptible app kills. In such situation system might end up using all its cpu capacity on memory reclaim and not doing productive work. To detect this condition, check memory full stall and compare it with the new ro.lmk.stall_limit_critical tunable representing the stall threshold. When the recorded level is over ro.lmk.stall_limit_critical, lmkd will be allowed to kill perceptible apps. ro.lmk.stall_limit_critical represents the max memory full stall in % that is allowed before perceptible apps will get killed. By default it is set to 100%, which effectively disables the feature. Currently system stall is measured based on psi memory stall 10s average value, however this definition might change in the future if better metrics are developed. Setting ro.lmk.stall_limit_critical to 5 means the system should be fully stalled (no productive work is done) for 5% of the 10sec period, resulting in 0.5 sec loss due to the stall. Bug: 205182133 Signed-off-by: Suren Baghdasaryan <surenb@google.com> Change-Id: I9713e30d82641d86d1b7edb5e1ba2971b935c898
This commit is contained in:
parent
2bf5487381
commit
5ae47a9563
|
|
@ -22,12 +22,25 @@
|
|||
|
||||
__BEGIN_DECLS
|
||||
|
||||
#define PSI_PATH_MEMORY "/proc/pressure/memory"
|
||||
|
||||
enum psi_stall_type {
|
||||
PSI_SOME,
|
||||
PSI_FULL,
|
||||
PSI_TYPE_COUNT
|
||||
};
|
||||
|
||||
struct psi_stats {
|
||||
float avg10;
|
||||
float avg60;
|
||||
float avg300;
|
||||
unsigned long total;
|
||||
};
|
||||
|
||||
struct psi_data {
|
||||
struct psi_stats mem_stats[PSI_TYPE_COUNT];
|
||||
};
|
||||
|
||||
/*
|
||||
* Initializes psi monitor.
|
||||
* stall_type, threshold_us and window_us are monitor parameters
|
||||
|
|
@ -63,6 +76,13 @@ int unregister_psi_monitor(int epollfd, int fd);
|
|||
*/
|
||||
void destroy_psi_monitor(int fd);
|
||||
|
||||
/*
|
||||
* Parse psi file line content. Expected file format is:
|
||||
* some avg10=0.00 avg60=0.00 avg300=0.00 total=0
|
||||
* full avg10=0.00 avg60=0.00 avg300=0.00 total=0
|
||||
*/
|
||||
int parse_psi_line(char *line, enum psi_stall_type stall_type, struct psi_stats stats[]);
|
||||
|
||||
__END_DECLS
|
||||
|
||||
#endif // __ANDROID_PSI_H__
|
||||
|
|
|
|||
|
|
@ -28,8 +28,6 @@
|
|||
#include <stdio.h>
|
||||
#include "psi/psi.h"
|
||||
|
||||
#define PSI_MON_FILE_MEMORY "/proc/pressure/memory"
|
||||
|
||||
static const char* stall_type_name[] = {
|
||||
"some",
|
||||
"full",
|
||||
|
|
@ -41,7 +39,7 @@ int init_psi_monitor(enum psi_stall_type stall_type,
|
|||
int res;
|
||||
char buf[256];
|
||||
|
||||
fd = TEMP_FAILURE_RETRY(open(PSI_MON_FILE_MEMORY, O_WRONLY | O_CLOEXEC));
|
||||
fd = TEMP_FAILURE_RETRY(open(PSI_PATH_MEMORY, O_WRONLY | O_CLOEXEC));
|
||||
if (fd < 0) {
|
||||
ALOGE("No kernel psi monitor support (errno=%d)", errno);
|
||||
return -1;
|
||||
|
|
@ -61,7 +59,7 @@ int init_psi_monitor(enum psi_stall_type stall_type,
|
|||
|
||||
if (res >= (ssize_t)sizeof(buf)) {
|
||||
ALOGE("%s line overflow for psi stall type '%s'",
|
||||
PSI_MON_FILE_MEMORY, stall_type_name[stall_type]);
|
||||
PSI_PATH_MEMORY, stall_type_name[stall_type]);
|
||||
errno = EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
|
@ -69,7 +67,7 @@ int init_psi_monitor(enum psi_stall_type stall_type,
|
|||
res = TEMP_FAILURE_RETRY(write(fd, buf, strlen(buf) + 1));
|
||||
if (res < 0) {
|
||||
ALOGE("%s write failed for psi stall type '%s'; errno=%d",
|
||||
PSI_MON_FILE_MEMORY, stall_type_name[stall_type], errno);
|
||||
PSI_PATH_MEMORY, stall_type_name[stall_type], errno);
|
||||
goto err;
|
||||
}
|
||||
|
||||
|
|
@ -102,3 +100,17 @@ void destroy_psi_monitor(int fd) {
|
|||
close(fd);
|
||||
}
|
||||
}
|
||||
|
||||
int parse_psi_line(char *line, enum psi_stall_type stall_type, struct psi_stats stats[]) {
|
||||
char type_name[5];
|
||||
struct psi_stats *stat = &stats[stall_type];
|
||||
|
||||
if (!line || sscanf(line, "%4s avg10=%f avg60=%f avg300=%f total=%lu",
|
||||
type_name, &stat->avg10, &stat->avg60, &stat->avg300, &stat->total) != 5) {
|
||||
return -1;
|
||||
}
|
||||
if (strcmp(type_name, stall_type_name[stall_type])) {
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
43
lmkd.cpp
43
lmkd.cpp
|
|
@ -218,6 +218,7 @@ static int thrashing_limit_decay_pct;
|
|||
static int thrashing_critical_pct;
|
||||
static int swap_util_max;
|
||||
static int64_t filecache_min_kb;
|
||||
static int64_t stall_limit_critical;
|
||||
static bool use_psi_monitors = false;
|
||||
static int kpoll_fd;
|
||||
static struct psi_threshold psi_thresholds[VMPRESS_LEVEL_COUNT] = {
|
||||
|
|
@ -1915,6 +1916,37 @@ static int vmstat_parse(union vmstat *vs) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int psi_parse(struct reread_data *file_data, struct psi_stats stats[], bool full) {
|
||||
char *buf;
|
||||
char *save_ptr;
|
||||
char *line;
|
||||
|
||||
if ((buf = reread_file(file_data)) == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
line = strtok_r(buf, "\n", &save_ptr);
|
||||
if (parse_psi_line(line, PSI_SOME, stats)) {
|
||||
return -1;
|
||||
}
|
||||
if (full) {
|
||||
line = strtok_r(NULL, "\n", &save_ptr);
|
||||
if (parse_psi_line(line, PSI_FULL, stats)) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int psi_parse_mem(struct psi_data *psi_data) {
|
||||
static struct reread_data file_data = {
|
||||
.filename = PSI_PATH_MEMORY,
|
||||
.fd = -1,
|
||||
};
|
||||
return psi_parse(&file_data, psi_data->mem_stats, true);
|
||||
}
|
||||
|
||||
enum wakeup_reason {
|
||||
Event,
|
||||
Polling
|
||||
|
|
@ -2500,6 +2532,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
|
|||
|
||||
union meminfo mi;
|
||||
union vmstat vs;
|
||||
struct psi_data psi_data;
|
||||
struct timespec curr_tm;
|
||||
int64_t thrashing = 0;
|
||||
bool swap_is_low = false;
|
||||
|
|
@ -2515,6 +2548,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
|
|||
int64_t swap_low_threshold;
|
||||
long since_thrashing_reset_ms;
|
||||
int64_t workingset_refault_file;
|
||||
bool critical_stall = false;
|
||||
|
||||
if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) {
|
||||
ALOGE("Failed to get current time");
|
||||
|
|
@ -2647,6 +2681,9 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
|
|||
/* Find out which watermark is breached if any */
|
||||
wmark = get_lowest_watermark(&mi, &watermarks);
|
||||
|
||||
if (!psi_parse_mem(&psi_data)) {
|
||||
critical_stall = psi_data.mem_stats[PSI_FULL].avg10 > (float)stall_limit_critical;
|
||||
}
|
||||
/*
|
||||
* TODO: move this logic into a separate function
|
||||
* Decide if killing a process is necessary and record the reason
|
||||
|
|
@ -2744,6 +2781,11 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
|
|||
.thrashing = (int)thrashing,
|
||||
.max_thrashing = max_thrashing,
|
||||
};
|
||||
|
||||
/* Allow killing perceptible apps if the system is stalled */
|
||||
if (critical_stall) {
|
||||
min_score_adj = 0;
|
||||
}
|
||||
int pages_freed = find_and_kill_process(min_score_adj, &ki, &mi, &wi, &curr_tm);
|
||||
if (pages_freed > 0) {
|
||||
killing = true;
|
||||
|
|
@ -3601,6 +3643,7 @@ static void update_props() {
|
|||
thrashing_limit_pct * 2));
|
||||
swap_util_max = clamp(0, 100, GET_LMK_PROPERTY(int32, "swap_util_max", 100));
|
||||
filecache_min_kb = GET_LMK_PROPERTY(int64, "filecache_min_kb", 0);
|
||||
stall_limit_critical = GET_LMK_PROPERTY(int64, "stall_limit_critical", 100);
|
||||
|
||||
reaper.enable_debug(debug_process_killing);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue