Emit swap size in the killed process' statsd atoms

Changes:
- We are already reading /proc/pid/status to resolve the tgid. While we
are at it, also parse RSS and swap values.
- Use the RSS and swap values for non memcg builds when creating the
statsd outputs
- Given we already read RSS, remove the separate read of /proc/pid/statm
that used to get tasksize.

Bug: 163116785
Test: manual, out/host/linux-x86/bin/statsd_testdrive 51
Change-Id: I9d98b9ffe8be0b014bb09174ec9532382cae1f38
This commit is contained in:
Ioannis Ilkos 2020-08-01 10:50:40 +01:00
parent d7b4fcb8a5
commit 279268a07f
3 changed files with 83 additions and 65 deletions

105
lmkd.cpp
View File

@ -83,6 +83,8 @@
#define MEMINFO_PATH "/proc/meminfo" #define MEMINFO_PATH "/proc/meminfo"
#define VMSTAT_PATH "/proc/vmstat" #define VMSTAT_PATH "/proc/vmstat"
#define PROC_STATUS_TGID_FIELD "Tgid:" #define PROC_STATUS_TGID_FIELD "Tgid:"
#define PROC_STATUS_RSS_FIELD "VmRSS:"
#define PROC_STATUS_SWAP_FIELD "VmSwap:"
#define LINE_MAX 128 #define LINE_MAX 128
#define PERCEPTIBLE_APP_ADJ 200 #define PERCEPTIBLE_APP_ADJ 200
@ -798,7 +800,7 @@ static void poll_kernel(int poll_fd) {
mem_st.process_start_time_ns = starttime * (NS_PER_SEC / sysconf(_SC_CLK_TCK)); mem_st.process_start_time_ns = starttime * (NS_PER_SEC / sysconf(_SC_CLK_TCK));
mem_st.rss_in_bytes = rss_in_pages * PAGE_SIZE; mem_st.rss_in_bytes = rss_in_pages * PAGE_SIZE;
stats_write_lmk_kill_occurred_pid(uid, pid, oom_score_adj, stats_write_lmk_kill_occurred_pid(uid, pid, oom_score_adj,
min_score_adj, 0, &mem_st); min_score_adj, &mem_st);
} }
free(taskname); free(taskname);
@ -930,30 +932,33 @@ static inline long get_time_diff_ms(struct timespec *from,
(to->tv_nsec - from->tv_nsec) / (long)NS_PER_MS; (to->tv_nsec - from->tv_nsec) / (long)NS_PER_MS;
} }
static int proc_get_tgid(int pid) { /* Reads /proc/pid/status into buf. */
static bool read_proc_status(int pid, char *buf, size_t buf_sz) {
char path[PATH_MAX]; char path[PATH_MAX];
char buf[PAGE_SIZE];
int fd; int fd;
ssize_t size; ssize_t size;
char *pos;
int64_t tgid = -1;
snprintf(path, PATH_MAX, "/proc/%d/status", pid); snprintf(path, PATH_MAX, "/proc/%d/status", pid);
fd = open(path, O_RDONLY | O_CLOEXEC); fd = open(path, O_RDONLY | O_CLOEXEC);
if (fd < 0) { if (fd < 0) {
return -1; return false;
} }
size = read_all(fd, buf, sizeof(buf) - 1); size = read_all(fd, buf, buf_sz - 1);
close(fd);
if (size < 0) { if (size < 0) {
goto out; return false;
} }
buf[size] = 0; buf[size] = 0;
return true;
}
pos = buf; /* Looks for tag in buf and parses the first integer */
static bool parse_status_tag(char *buf, const char *tag, int64_t *out) {
char *pos = buf;
while (true) { while (true) {
pos = strstr(pos, PROC_STATUS_TGID_FIELD); pos = strstr(pos, tag);
/* Stop if TGID tag not found or found at the line beginning */ /* Stop if tag not found or found at the line beginning */
if (pos == NULL || pos == buf || pos[-1] == '\n') { if (pos == NULL || pos == buf || pos[-1] == '\n') {
break; break;
} }
@ -961,16 +966,12 @@ static int proc_get_tgid(int pid) {
} }
if (pos == NULL) { if (pos == NULL) {
goto out; return false;
} }
pos += strlen(PROC_STATUS_TGID_FIELD); pos += strlen(tag);
while (*pos == ' ') pos++; while (*pos == ' ') ++pos;
parse_int64(pos, &tgid); return parse_int64(pos, out);
out:
close(fd);
return (int)tgid;
} }
static int proc_get_size(int pid) { static int proc_get_size(int pid) {
@ -1034,7 +1035,8 @@ static void cmd_procprio(LMKD_CTRL_PACKET packet, int field_count, struct ucred
struct lmk_procprio params; struct lmk_procprio params;
bool is_system_server; bool is_system_server;
struct passwd *pwdrec; struct passwd *pwdrec;
int tgid; int64_t tgid;
char buf[PAGE_SIZE];
lmkd_pack_get_procprio(packet, field_count, &params); lmkd_pack_get_procprio(packet, field_count, &params);
@ -1050,12 +1052,13 @@ static void cmd_procprio(LMKD_CTRL_PACKET packet, int field_count, struct ucred
} }
/* Check if registered process is a thread group leader */ /* Check if registered process is a thread group leader */
tgid = proc_get_tgid(params.pid); if (read_proc_status(params.pid, buf, sizeof(buf))) {
if (tgid >= 0 && tgid != params.pid) { if (parse_status_tag(buf, PROC_STATUS_TGID_FIELD, &tgid) && tgid != params.pid) {
ALOGE("Attempt to register a task that is not a thread group leader (tid %d, tgid %d)", ALOGE("Attempt to register a task that is not a thread group leader "
params.pid, tgid); "(tid %d, tgid %" PRId64 ")", params.pid, tgid);
return; return;
} }
}
/* gid containing AID_READPROC required */ /* gid containing AID_READPROC required */
/* CAP_SYS_RESOURCE required */ /* CAP_SYS_RESOURCE required */
@ -1854,7 +1857,7 @@ static void record_wakeup_time(struct timespec *tm, enum wakeup_reason reason,
} }
} }
static void killinfo_log(struct proc* procp, int min_oom_score, int tasksize, static void killinfo_log(struct proc* procp, int min_oom_score, int rss_kb,
int kill_reason, union meminfo *mi, int kill_reason, union meminfo *mi,
struct wakeup_info *wi, struct timespec *tm) { struct wakeup_info *wi, struct timespec *tm) {
/* log process information */ /* log process information */
@ -1862,7 +1865,7 @@ static void killinfo_log(struct proc* procp, int min_oom_score, int tasksize,
android_log_write_int32(ctx, procp->uid); android_log_write_int32(ctx, procp->uid);
android_log_write_int32(ctx, procp->oomadj); android_log_write_int32(ctx, procp->oomadj);
android_log_write_int32(ctx, min_oom_score); android_log_write_int32(ctx, min_oom_score);
android_log_write_int32(ctx, (int32_t)min(tasksize * page_k, INT32_MAX)); android_log_write_int32(ctx, (int32_t)min(rss_kb, INT32_MAX));
android_log_write_int32(ctx, kill_reason); android_log_write_int32(ctx, kill_reason);
/* log meminfo fields */ /* log meminfo fields */
@ -2040,38 +2043,48 @@ static void start_wait_for_proc_kill(int pid_or_fd) {
maxevents++; maxevents++;
} }
/* Kill one process specified by procp. Returns the size of the process killed */ /* Kill one process specified by procp. Returns the size (in pages) of the process killed */
static int kill_one_process(struct proc* procp, int min_oom_score, int kill_reason, static int kill_one_process(struct proc* procp, int min_oom_score, int kill_reason,
const char *kill_desc, union meminfo *mi, struct wakeup_info *wi, const char *kill_desc, union meminfo *mi, struct wakeup_info *wi,
struct timespec *tm) { struct timespec *tm) {
int pid = procp->pid; int pid = procp->pid;
int pidfd = procp->pidfd; int pidfd = procp->pidfd;
uid_t uid = procp->uid; uid_t uid = procp->uid;
int tgid;
char *taskname; char *taskname;
int tasksize;
int r; int r;
int result = -1; int result = -1;
struct memory_stat *mem_st; struct memory_stat *mem_st;
char buf[LINE_MAX]; int64_t tgid;
int64_t rss_kb;
int64_t swap_kb;
char buf[PAGE_SIZE];
tgid = proc_get_tgid(pid); if (!read_proc_status(pid, buf, sizeof(buf))) {
if (tgid >= 0 && tgid != pid) { goto out;
ALOGE("Possible pid reuse detected (pid %d, tgid %d)!", pid, tgid); }
if (!parse_status_tag(buf, PROC_STATUS_TGID_FIELD, &tgid)) {
ALOGE("Unable to parse tgid from /proc/%d/status", pid);
goto out;
}
if (tgid != pid) {
ALOGE("Possible pid reuse detected (pid %d, tgid %" PRId64 ")!", pid, tgid);
goto out;
}
// Zombie processes will not have RSS / Swap fields.
if (!parse_status_tag(buf, PROC_STATUS_RSS_FIELD, &rss_kb)) {
goto out;
}
if (!parse_status_tag(buf, PROC_STATUS_SWAP_FIELD, &swap_kb)) {
goto out; goto out;
} }
taskname = proc_get_name(pid, buf, sizeof(buf)); taskname = proc_get_name(pid, buf, sizeof(buf));
// taskname will point inside buf, do not reuse buf onwards.
if (!taskname) { if (!taskname) {
goto out; goto out;
} }
tasksize = proc_get_size(pid); mem_st = stats_read_memory_stat(per_app_memcg, pid, uid, rss_kb * 1024, swap_kb * 1024);
if (tasksize <= 0) {
goto out;
}
mem_st = stats_read_memory_stat(per_app_memcg, pid, uid);
TRACE_KILL_START(pid); TRACE_KILL_START(pid);
@ -2099,21 +2112,21 @@ static int kill_one_process(struct proc* procp, int min_oom_score, int kill_reas
inc_killcnt(procp->oomadj); inc_killcnt(procp->oomadj);
killinfo_log(procp, min_oom_score, tasksize, kill_reason, mi, wi, tm); killinfo_log(procp, min_oom_score, rss_kb, kill_reason, mi, wi, tm);
if (kill_desc) { if (kill_desc) {
ALOGI("Kill '%s' (%d), uid %d, oom_adj %d to free %ldkB; reason: %s", taskname, pid, ALOGI("Kill '%s' (%d), uid %d, oom_adj %d to free %" PRId64 "kB; reason: %s", taskname, pid,
uid, procp->oomadj, tasksize * page_k, kill_desc); uid, procp->oomadj, rss_kb, kill_desc);
} else { } else {
ALOGI("Kill '%s' (%d), uid %d, oom_adj %d to free %ldkB", taskname, pid, ALOGI("Kill '%s' (%d), uid %d, oom_adj %d to free %" PRId64 "kB", taskname, pid,
uid, procp->oomadj, tasksize * page_k); uid, procp->oomadj, rss_kb);
} }
stats_write_lmk_kill_occurred(uid, taskname, procp->oomadj, min_oom_score, tasksize, mem_st); stats_write_lmk_kill_occurred(uid, taskname, procp->oomadj, min_oom_score, mem_st);
ctrl_data_write_lmk_kill_occurred((pid_t)pid, uid); ctrl_data_write_lmk_kill_occurred((pid_t)pid, uid);
result = tasksize; result = rss_kb / page_k;
out: out:
/* /*

View File

@ -77,7 +77,7 @@ static struct proc* pid_lookup(int pid) {
*/ */
int int
stats_write_lmk_kill_occurred(int32_t uid, char const* process_name, stats_write_lmk_kill_occurred(int32_t uid, char const* process_name,
int32_t oom_score, int32_t min_oom_score, int tasksize, int32_t oom_score, int32_t min_oom_score,
struct memory_stat *mem_st) { struct memory_stat *mem_st) {
if (enable_stats_log) { if (enable_stats_log) {
return android::lmkd::stats::stats_write( return android::lmkd::stats::stats_write(
@ -87,7 +87,7 @@ stats_write_lmk_kill_occurred(int32_t uid, char const* process_name,
oom_score, oom_score,
mem_st ? mem_st->pgfault : -1, mem_st ? mem_st->pgfault : -1,
mem_st ? mem_st->pgmajfault : -1, mem_st ? mem_st->pgmajfault : -1,
mem_st ? mem_st->rss_in_bytes : tasksize * BYTES_IN_KILOBYTE, mem_st ? mem_st->rss_in_bytes : -1,
mem_st ? mem_st->cache_in_bytes : -1, mem_st ? mem_st->cache_in_bytes : -1,
mem_st ? mem_st->swap_in_bytes : -1, mem_st ? mem_st->swap_in_bytes : -1,
mem_st ? mem_st->process_start_time_ns : -1, mem_st ? mem_st->process_start_time_ns : -1,
@ -99,13 +99,12 @@ stats_write_lmk_kill_occurred(int32_t uid, char const* process_name,
} }
int stats_write_lmk_kill_occurred_pid(int32_t uid, int pid, int32_t oom_score, int stats_write_lmk_kill_occurred_pid(int32_t uid, int pid, int32_t oom_score,
int32_t min_oom_score, int tasksize, int32_t min_oom_score,
struct memory_stat* mem_st) { struct memory_stat* mem_st) {
struct proc* proc = pid_lookup(pid); struct proc* proc = pid_lookup(pid);
if (!proc) return -EINVAL; if (!proc) return -EINVAL;
return stats_write_lmk_kill_occurred(uid, proc->taskname, oom_score, min_oom_score, return stats_write_lmk_kill_occurred(uid, proc->taskname, oom_score, min_oom_score, mem_st);
tasksize, mem_st);
} }
static void memory_stat_parse_line(char* line, struct memory_stat* mem_st) { static void memory_stat_parse_line(char* line, struct memory_stat* mem_st) {
@ -170,26 +169,24 @@ static int memory_stat_from_procfs(struct memory_stat* mem_st, int pid) {
// field 10 is pgfault // field 10 is pgfault
// field 12 is pgmajfault // field 12 is pgmajfault
// field 22 is starttime // field 22 is starttime
// field 24 is rss_in_pages int64_t pgfault = 0, pgmajfault = 0, starttime = 0;
int64_t pgfault = 0, pgmajfault = 0, starttime = 0, rss_in_pages = 0;
if (sscanf(buffer, if (sscanf(buffer,
"%*u %*s %*s %*d %*d %*d %*d %*d %*d %" SCNd64 " %*d " "%*u %*s %*s %*d %*d %*d %*d %*d %*d %" SCNd64 " %*d "
"%" SCNd64 " %*d %*u %*u %*d %*d %*d %*d %*d %*d " "%" SCNd64 " %*d %*u %*u %*d %*d %*d %*d %*d %*d "
"%" SCNd64 " %*d %" SCNd64 "", "%" SCNd64 "",
&pgfault, &pgmajfault, &starttime, &rss_in_pages) != 4) { &pgfault, &pgmajfault, &starttime) != 3) {
return -1; return -1;
} }
mem_st->pgfault = pgfault; mem_st->pgfault = pgfault;
mem_st->pgmajfault = pgmajfault; mem_st->pgmajfault = pgmajfault;
mem_st->rss_in_bytes = (rss_in_pages * PAGE_SIZE);
mem_st->process_start_time_ns = starttime * (NS_PER_SEC / sysconf(_SC_CLK_TCK)); mem_st->process_start_time_ns = starttime * (NS_PER_SEC / sysconf(_SC_CLK_TCK));
return 0; return 0;
} }
struct memory_stat *stats_read_memory_stat(bool per_app_memcg, int pid, uid_t uid) { struct memory_stat *stats_read_memory_stat(bool per_app_memcg, int pid, uid_t uid,
int64_t rss_bytes, int64_t swap_bytes) {
static struct memory_stat mem_st = {}; static struct memory_stat mem_st = {};
if (!enable_stats_log) { if (!enable_stats_log) {
return NULL; return NULL;
} }
@ -200,6 +197,8 @@ struct memory_stat *stats_read_memory_stat(bool per_app_memcg, int pid, uid_t ui
} }
} else { } else {
if (memory_stat_from_procfs(&mem_st, pid) == 0) { if (memory_stat_from_procfs(&mem_st, pid) == 0) {
mem_st.rss_in_bytes = rss_bytes;
mem_st.swap_in_bytes = swap_bytes;
return &mem_st; return &mem_st;
} }
} }

View File

@ -59,17 +59,21 @@ stats_write_lmk_state_changed(int32_t state);
int int
stats_write_lmk_kill_occurred(int32_t uid, char const* process_name, stats_write_lmk_kill_occurred(int32_t uid, char const* process_name,
int32_t oom_score, int32_t min_oom_score, int32_t oom_score, int32_t min_oom_score,
int tasksize, struct memory_stat *mem_st); struct memory_stat *mem_st);
/** /**
* Logs the event when LMKD kills a process to reduce memory pressure. * Logs the event when LMKD kills a process to reduce memory pressure.
* Code: LMK_KILL_OCCURRED = 51 * Code: LMK_KILL_OCCURRED = 51
*/ */
int stats_write_lmk_kill_occurred_pid(int32_t uid, int pid, int32_t oom_score, int stats_write_lmk_kill_occurred_pid(int32_t uid, int pid, int32_t oom_score,
int32_t min_oom_score, int tasksize, int32_t min_oom_score,
struct memory_stat* mem_st); struct memory_stat* mem_st);
struct memory_stat *stats_read_memory_stat(bool per_app_memcg, int pid, uid_t uid); /**
* Reads memory stats used to log the statsd atom. Returns non-null ptr on success.
*/
struct memory_stat *stats_read_memory_stat(bool per_app_memcg, int pid, uid_t uid,
int64_t rss_bytes, int64_t swap_bytes);
/** /**
* Registers a process taskname by pid, while it is still alive. * Registers a process taskname by pid, while it is still alive.
@ -94,19 +98,21 @@ stats_write_lmk_state_changed(int32_t state __unused) { return -EINVAL; }
static inline int static inline int
stats_write_lmk_kill_occurred(int32_t uid __unused, stats_write_lmk_kill_occurred(int32_t uid __unused,
char const* process_name __unused, int32_t oom_score __unused, char const* process_name __unused, int32_t oom_score __unused,
int32_t min_oom_score __unused, int tasksize __unused, int32_t min_oom_score __unused,
struct memory_stat *mem_st __unused) { return -EINVAL; } struct memory_stat *mem_st __unused) { return -EINVAL; }
static inline int stats_write_lmk_kill_occurred_pid(int32_t uid __unused, static inline int stats_write_lmk_kill_occurred_pid(int32_t uid __unused,
int pid __unused, int32_t oom_score __unused, int pid __unused, int32_t oom_score __unused,
int32_t min_oom_score __unused, int32_t min_oom_score __unused,
int tasksize __unused,
struct memory_stat* mem_st __unused) { struct memory_stat* mem_st __unused) {
return -EINVAL; return -EINVAL;
} }
static inline struct memory_stat *stats_read_memory_stat(bool per_app_memcg __unused, static inline struct memory_stat *stats_read_memory_stat(bool per_app_memcg __unused,
int pid __unused, uid_t uid __unused) { return NULL; } int pid __unused, uid_t uid __unused,
int64_t rss_bytes __unused, int64_t swap_bytes __unused) {
return NULL;
}
static inline void stats_store_taskname(int pid __unused, const char* taskname __unused) {} static inline void stats_store_taskname(int pid __unused, const char* taskname __unused) {}