lmkd: rate-limit and cleanup failed kill reports
Excessive number of failed kill reports when lmkd can't find an eligible process to kill or frees not enough memory pollutes logs and bugreports. Cleanup kill reports to remove duplicate information and rate limit failed kill attempts at 1 report per sec. The number of suppressed failed kills will be reported in the next lmkd report. Bug: 113864581 Test: Verified using lmkd_unit_test Change-Id: I67fa1fec97613f136c7582115edcbc56b1503c9c Signed-off-by: Suren Baghdasaryan <surenb@google.com>
This commit is contained in:
parent
1ed0db1a19
commit
53be36e6d8
73
lmkd.c
73
lmkd.c
|
|
@ -97,6 +97,8 @@
|
||||||
|
|
||||||
#define min(a, b) (((a) < (b)) ? (a) : (b))
|
#define min(a, b) (((a) < (b)) ? (a) : (b))
|
||||||
|
|
||||||
|
#define FAIL_REPORT_RLIMIT_MS 1000
|
||||||
|
|
||||||
/* default to old in-kernel interface if no memory pressure events */
|
/* default to old in-kernel interface if no memory pressure events */
|
||||||
static bool use_inkernel_interface = true;
|
static bool use_inkernel_interface = true;
|
||||||
static bool has_inkernel_module;
|
static bool has_inkernel_module;
|
||||||
|
|
@ -1097,8 +1099,7 @@ static struct proc *proc_get_heaviest(int oomadj) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Kill one process specified by procp. Returns the size of the process killed */
|
/* Kill one process specified by procp. Returns the size of the process killed */
|
||||||
static int kill_one_process(struct proc* procp, int min_score_adj,
|
static int kill_one_process(struct proc* procp) {
|
||||||
enum vmpressure_level level) {
|
|
||||||
int pid = procp->pid;
|
int pid = procp->pid;
|
||||||
uid_t uid = procp->uid;
|
uid_t uid = procp->uid;
|
||||||
char *taskname;
|
char *taskname;
|
||||||
|
|
@ -1132,11 +1133,8 @@ static int kill_one_process(struct proc* procp, int min_score_adj,
|
||||||
|
|
||||||
/* CAP_KILL required */
|
/* CAP_KILL required */
|
||||||
r = kill(pid, SIGKILL);
|
r = kill(pid, SIGKILL);
|
||||||
ALOGI(
|
ALOGI("Kill '%s' (%d), uid %d, oom_adj %d to free %ldkB",
|
||||||
"Killing '%s' (%d), uid %d, adj %d\n"
|
taskname, pid, uid, procp->oomadj, tasksize * page_k);
|
||||||
" to free %ldkB because system is under %s memory pressure (min_oom_adj=%d)\n",
|
|
||||||
taskname, pid, uid, procp->oomadj, tasksize * page_k,
|
|
||||||
level_name[level], min_score_adj);
|
|
||||||
pid_remove(pid);
|
pid_remove(pid);
|
||||||
|
|
||||||
TRACE_KILL_END();
|
TRACE_KILL_END();
|
||||||
|
|
@ -1163,8 +1161,7 @@ static int kill_one_process(struct proc* procp, int min_score_adj,
|
||||||
* If pages_to_free is set to 0 only one process will be killed.
|
* If pages_to_free is set to 0 only one process will be killed.
|
||||||
* Returns the size of the killed processes.
|
* Returns the size of the killed processes.
|
||||||
*/
|
*/
|
||||||
static int find_and_kill_processes(enum vmpressure_level level,
|
static int find_and_kill_processes(int min_score_adj, int pages_to_free) {
|
||||||
int min_score_adj, int pages_to_free) {
|
|
||||||
int i;
|
int i;
|
||||||
int killed_size;
|
int killed_size;
|
||||||
int pages_freed = 0;
|
int pages_freed = 0;
|
||||||
|
|
@ -1183,7 +1180,7 @@ static int find_and_kill_processes(enum vmpressure_level level,
|
||||||
if (!procp)
|
if (!procp)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
killed_size = kill_one_process(procp, min_score_adj, level);
|
killed_size = kill_one_process(procp);
|
||||||
if (killed_size >= 0) {
|
if (killed_size >= 0) {
|
||||||
#ifdef LMKD_LOG_STATS
|
#ifdef LMKD_LOG_STATS
|
||||||
if (enable_stats_log && !lmk_state_change_start) {
|
if (enable_stats_log && !lmk_state_change_start) {
|
||||||
|
|
@ -1282,6 +1279,7 @@ static void mp_event_common(int data, uint32_t events __unused) {
|
||||||
enum vmpressure_level lvl;
|
enum vmpressure_level lvl;
|
||||||
union meminfo mi;
|
union meminfo mi;
|
||||||
union zoneinfo zi;
|
union zoneinfo zi;
|
||||||
|
struct timespec curr_tm;
|
||||||
static struct timespec last_kill_tm;
|
static struct timespec last_kill_tm;
|
||||||
static unsigned long kill_skip_count = 0;
|
static unsigned long kill_skip_count = 0;
|
||||||
enum vmpressure_level level = (enum vmpressure_level)data;
|
enum vmpressure_level level = (enum vmpressure_level)data;
|
||||||
|
|
@ -1312,14 +1310,12 @@ static void mp_event_common(int data, uint32_t events __unused) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (kill_timeout_ms) {
|
|
||||||
struct timespec curr_tm;
|
|
||||||
|
|
||||||
if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) {
|
if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) {
|
||||||
ALOGE("Failed to get current time");
|
ALOGE("Failed to get current time");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (kill_timeout_ms) {
|
||||||
if (get_time_diff_ms(&last_kill_tm, &curr_tm) < kill_timeout_ms) {
|
if (get_time_diff_ms(&last_kill_tm, &curr_tm) < kill_timeout_ms) {
|
||||||
kill_skip_count++;
|
kill_skip_count++;
|
||||||
return;
|
return;
|
||||||
|
|
@ -1425,7 +1421,7 @@ static void mp_event_common(int data, uint32_t events __unused) {
|
||||||
do_kill:
|
do_kill:
|
||||||
if (low_ram_device) {
|
if (low_ram_device) {
|
||||||
/* For Go devices kill only one task */
|
/* For Go devices kill only one task */
|
||||||
if (find_and_kill_processes(level, level_oomadj[level], 0) == 0) {
|
if (find_and_kill_processes(level_oomadj[level], 0) == 0) {
|
||||||
if (debug_process_killing) {
|
if (debug_process_killing) {
|
||||||
ALOGI("Nothing to kill");
|
ALOGI("Nothing to kill");
|
||||||
}
|
}
|
||||||
|
|
@ -1434,6 +1430,8 @@ do_kill:
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
int pages_freed;
|
int pages_freed;
|
||||||
|
static struct timespec last_report_tm;
|
||||||
|
static unsigned long report_skip_count = 0;
|
||||||
|
|
||||||
if (!use_minfree_levels) {
|
if (!use_minfree_levels) {
|
||||||
/* If pressure level is less than critical and enough free swap then ignore */
|
/* If pressure level is less than critical and enough free swap then ignore */
|
||||||
|
|
@ -1461,30 +1459,41 @@ do_kill:
|
||||||
min_score_adj = level_oomadj[level];
|
min_score_adj = level_oomadj[level];
|
||||||
}
|
}
|
||||||
|
|
||||||
pages_freed = find_and_kill_processes(level, min_score_adj, pages_to_free);
|
pages_freed = find_and_kill_processes(min_score_adj, pages_to_free);
|
||||||
|
|
||||||
if (use_minfree_levels) {
|
if (pages_freed == 0) {
|
||||||
ALOGI("Killing because cache %ldkB is below "
|
/* Rate limit kill reports when nothing was reclaimed */
|
||||||
"limit %ldkB for oom_adj %d\n"
|
if (get_time_diff_ms(&last_report_tm, &curr_tm) < FAIL_REPORT_RLIMIT_MS) {
|
||||||
" Free memory is %ldkB %s reserved",
|
report_skip_count++;
|
||||||
other_file * page_k, minfree * page_k, min_score_adj,
|
|
||||||
other_free * page_k, other_free >= 0 ? "above" : "below");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pages_freed < pages_to_free) {
|
|
||||||
ALOGI("Unable to free enough memory (pages to free=%d, pages freed=%d)",
|
|
||||||
pages_to_free, pages_freed);
|
|
||||||
} else {
|
|
||||||
ALOGI("Reclaimed enough memory (pages to free=%d, pages freed=%d)",
|
|
||||||
pages_to_free, pages_freed);
|
|
||||||
if (clock_gettime(CLOCK_MONOTONIC_COARSE, &last_kill_tm) != 0) {
|
|
||||||
ALOGE("Failed to get current time");
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (pages_freed > 0) {
|
|
||||||
|
/* Log meminfo whenever we kill or when report rate limit allows */
|
||||||
meminfo_log(&mi);
|
meminfo_log(&mi);
|
||||||
|
if (pages_freed >= pages_to_free) {
|
||||||
|
/* Reset kill time only if reclaimed enough memory */
|
||||||
|
last_kill_tm = curr_tm;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (use_minfree_levels) {
|
||||||
|
ALOGI("Killing to reclaim %ldkB, reclaimed %ldkB, cache(%ldkB) and "
|
||||||
|
"free(%" PRId64 "kB)-reserved(%" PRId64 "kB) below min(%ldkB) for oom_adj %d",
|
||||||
|
pages_to_free * page_k, pages_freed * page_k,
|
||||||
|
other_file * page_k, mi.field.nr_free_pages * page_k,
|
||||||
|
zi.field.totalreserve_pages * page_k,
|
||||||
|
minfree * page_k, min_score_adj);
|
||||||
|
} else {
|
||||||
|
ALOGI("Killing to reclaim %ldkB, reclaimed %ldkB at oom_adj %d",
|
||||||
|
pages_to_free * page_k, pages_freed * page_k, min_score_adj);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (report_skip_count > 0) {
|
||||||
|
ALOGI("Suppressed %lu failed kill reports", report_skip_count);
|
||||||
|
report_skip_count = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
last_report_tm = curr_tm;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,7 @@ using namespace android::base;
|
||||||
#define LMKDTEST_RESPAWN_FLAG "LMKDTEST_RESPAWN"
|
#define LMKDTEST_RESPAWN_FLAG "LMKDTEST_RESPAWN"
|
||||||
|
|
||||||
#define LMKD_LOGCAT_MARKER "lowmemorykiller"
|
#define LMKD_LOGCAT_MARKER "lowmemorykiller"
|
||||||
#define LMKD_KILL_MARKER_TEMPLATE LMKD_LOGCAT_MARKER ": Killing '%s'"
|
#define LMKD_KILL_MARKER_TEMPLATE LMKD_LOGCAT_MARKER ": Kill '%s'"
|
||||||
#define OOM_MARKER "Out of memory"
|
#define OOM_MARKER "Out of memory"
|
||||||
#define OOM_KILL_MARKER "Killed process"
|
#define OOM_KILL_MARKER "Killed process"
|
||||||
#define MIN_LOG_SIZE 100
|
#define MIN_LOG_SIZE 100
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue