commit b3a78b59602cfebc284887c3682a1f0c56612b74 Author: Wenchao Hao Date: Mon May 19 19:36:35 2025 +0800 init Signed-off-by: Wenchao Hao diff --git a/applist b/applist new file mode 100644 index 0000000..19465b6 --- /dev/null +++ b/applist @@ -0,0 +1,40 @@ +tv.danmaku.bili +com.xingin.xhs +com.jingdong.app.mall +com.taobao.taobao +com.xunmeng.pinduoduo +com.ss.android.article.news +com.sina.weibo +com.tencent.mobileqq +com.tencent.qqlive +com.smile.gifmaker +com.netease.cloudmusic +air.tv.douyu.android +com.autonavi.minimap +com.sankuai.meituan +com.tencent.mm +com.eg.android.AlipayGphone +com.ss.android.ugc.aweme.lite +com.kuaishou.nebula +com.sup.android.superb +com.baidu.searchbox +com.tencent.weread +com.tencent.tmgp.sgame +com.qiyi.video +com.UCMobile +com.ss.android.ugc.aweme +com.taobao.idlefish +com.happyelements.AndroidAnimal +com.android.deskclock +com.tencent.qqmusic +com.android.camera +com.tencent.mtt +com.dragon.read +com.baidu.tieba +com.miHoYo.Yuanshen +com.quark.browser +com.tencent.tmgp.pubgmhd +com.tencent.jkchess +com.kmxs.reader +com.ss.android.article.lite +com.duowan.kiwi diff --git a/clear_apps.py b/clear_apps.py new file mode 100755 index 0000000..3fd0cd7 --- /dev/null +++ b/clear_apps.py @@ -0,0 +1,29 @@ +#!/usr/bin/python3 + +import uiautomator2 as u2 +import time + +def clear_all_apps(): + d = u2.connect() + while True: + try: + d.keyevent("HOME") + d.keyevent("HOME") + d.keyevent("MENU") + time.sleep(1) + if "com.miui.home:id/recents_container" not in d.dump_hierarchy(): + continue + if "近期没有任何内容" in d.dump_hierarchy(): + d.keyevent("HOME") + d.keyevent("HOME") + exit(0) + d.xpath('//*[@resource-id="com.miui.home:id/clearAnimView"]').click_exists(timeout=3) + d.keyevent("HOME") + d.keyevent("HOME") + except Exception as e: + print("clear_all_apps errors", e) + d = u2.connect() + continue + +if __name__=="__main__": + clear_all_apps() diff --git a/handle_perfetto.py b/handle_perfetto.py new file mode 100755 index 0000000..138ff57 --- /dev/null +++ b/handle_perfetto.py @@ -0,0 +1,217 @@ +import pandas as pd +from perfetto.trace_processor import TraceProcessor, TraceProcessorConfig +import os +import re +import readline +import sys + +import pandas as pd + +def query_sql(lstart, lend, slice_name, cond): + return f""" + select slice_id,process.name as process_name,process.upid,process.pid,thread.name as thread_name,thread.tid,slice.ts,slice.dur, + MAX(slice.ts,{lstart}) as lstart, MIN(slice.ts+slice.dur,{lend}) as lend, + (case when slice.ts < {lstart} then MIN(slice.ts+slice.dur,{lend})-{lstart} when (slice.ts+slice.dur) > {lend} then ({lend} -MAX({lstart}, slice.ts)) else slice.dur end) as ldur, + (select total(case when thread_state.ts < MAX(slice.ts,{lstart}) then MIN(MIN((slice.ts+slice.dur),{lend}) ,thread_state.ts+thread_state.dur)-MAX(slice.ts,{lstart}) when (thread_state.ts+thread_state.dur) > MIN((slice.ts+slice.dur),{lend}) then (MIN((slice.ts+slice.dur),{lend})-MAX(thread_state.ts,MAX(slice.ts,{lstart}))) else thread_state.dur end) from thread_state where thread_state.utid=thread.utid and thread_state.state='Running' and thread_state.ts < {lend} and (thread_state.ts+thread_state.dur) > {lstart} and thread_state.ts < (slice.ts+slice.dur) and (thread_state.ts+thread_state.dur) > slice.ts) as total_running, + (select total(case when thread_state.ts < MAX(slice.ts,{lstart}) then MIN(MIN((slice.ts+slice.dur),{lend}) ,thread_state.ts+thread_state.dur)-MAX(slice.ts,{lstart}) when (thread_state.ts+thread_state.dur) > MIN((slice.ts+slice.dur),{lend}) then (MIN((slice.ts+slice.dur),{lend}) -MAX(thread_state.ts,MAX(slice.ts,{lstart}))) else thread_state.dur end) from thread_state where thread_state.utid=thread.utid and thread_state.state='S' and thread_state.ts < {lend} and (thread_state.ts+thread_state.dur) > {lstart} and thread_state.ts < (slice.ts+slice.dur) and (thread_state.ts+thread_state.dur) > slice.ts) as total_s, + (select total(case when thread_state.ts < MAX(slice.ts,{lstart}) then MIN(MIN((slice.ts+slice.dur),{lend}) ,thread_state.ts+thread_state.dur)-MAX(slice.ts,{lstart}) when (thread_state.ts+thread_state.dur) > MIN((slice.ts+slice.dur),{lend}) then (MIN((slice.ts+slice.dur),{lend}) -MAX(thread_state.ts,MAX(slice.ts,{lstart}))) else thread_state.dur end) from thread_state where thread_state.utid=thread.utid and thread_state.state='D' and thread_state.ts < {lend} and (thread_state.ts+thread_state.dur) > {lstart} and thread_state.ts < (slice.ts+slice.dur) and (thread_state.ts+thread_state.dur) > slice.ts) as total_d, + (select total(case when thread_state.ts < MAX(slice.ts,{lstart}) then MIN(MIN((slice.ts+slice.dur),{lend}) ,thread_state.ts+thread_state.dur)-MAX(slice.ts,{lstart}) when (thread_state.ts+thread_state.dur) > MIN((slice.ts+slice.dur),{lend}) then (MIN((slice.ts+slice.dur),{lend}) -MAX(thread_state.ts,MAX(slice.ts,{lstart}))) else thread_state.dur end) from thread_state where thread_state.utid=thread.utid and thread_state.state='R' and thread_state.ts < {lend} and (thread_state.ts+thread_state.dur) > {lstart} and thread_state.ts < (slice.ts+slice.dur) and (thread_state.ts+thread_state.dur) > slice.ts) as total_r, + (select total(case when thread_state.ts < MAX(slice.ts,{lstart}) then MIN(MIN((slice.ts+slice.dur),{lend}) ,thread_state.ts+thread_state.dur)-MAX(slice.ts,{lstart}) when (thread_state.ts+thread_state.dur) > MIN((slice.ts+slice.dur),{lend}) then (MIN((slice.ts+slice.dur),{lend}) -MAX(thread_state.ts,MAX(slice.ts,{lstart}))) else thread_state.dur end) from thread_state where thread_state.utid=thread.utid and thread_state.state='R+' and thread_state.ts < {lend} and (thread_state.ts+thread_state.dur) > {lstart} and thread_state.ts < (slice.ts+slice.dur) and (thread_state.ts+thread_state.dur) > slice.ts) as total_rr, + slice.name from slice JOIN thread_track ON slice.track_id = thread_track.id JOIN thread USING(utid) JOIN process USING(upid) WHERE slice.dur > 0 and slice.ts < {lend} and (slice.ts+slice.dur) > {lstart} and slice.name like '{slice_name}%' AND {cond} order by slice.dur desc""" +def perfetto_parse(slice_name): + from perfetto.trace_processor import TraceProcessor, TraceProcessorConfig + + tp = TraceProcessor( + trace=sys.argv[1], + config=TraceProcessorConfig(bin_path='./trace_processor_shell')) + qr_it = tp.query(f""" + INCLUDE PERFETTO MODULE android.startup.startups; + SELECT * FROM android_startups """) + lstart = 0 + lend = 0 + pkg = None + for r in qr_it: + if r.package != 'com.miui.home' and r.package != 'com.miui.securitycenter': + lstart = r.ts + lend = r.ts_end + pkg = r.package + #print(f"{pkg}: lacunch time:{lend-lstart} {r.startup_type} [{slice_name}]") + print(f"{lend-lstart} {r.startup_type}") + return + + qr_it = tp.query(f""" + select thread_state.id,thread_state.utid,thread_state.ts,thread_state.dur,thread_state.state,thread_state.waker_utid from thread_state JOIN thread USING(utid) JOIN process USING(upid) where thread_state.ts < {lend} and (thread_state.ts+thread_state.dur) > {lstart} and thread.is_main_thread=1 and process.name='{pkg}' + """) + s = False + sts = 0 + sdur = 0 + sleep_list = [] + for q in qr_it: + if q.state == 'S': + sts = q.ts + sdur = q.dur + s = True + continue + elif q.state == 'R' and s: + sleep_list.append([q.waker_utid,sts,sdur]) + if s: + s = False + # app_main_thread_id = q.utid + if len(qr_it) > 0: + sleep_list.append([q.utid, lstart, lend-lstart]) + launch_info = [[">1ms",0,0,0,0,0,0,0,0,0], + ["500us-1ms",0,0,0,0,0,0,0,0,0], + ["100us-500us",0,0,0,0,0,0,0,0,0], + ["10us-100us",0,0,0,0,0,0,0,0,0], + ["<10us",0,0,0,0,0,0,0,0,0]] + launch_info_total = [0,0,0,0,0,0,0,0,0,0] + max_time, min_time = 0, 1000000000 + for sl in sleep_list: + qutid = sl[0] + qlstart = sl[1] + qlend = sl[1] + sl[2] + + # print(len(qr_it)) + vmq = tp.query(f""" + select tmp.id as dp_id,(case when tmp.ts < {qlstart} then MIN(tmp.ts+tmp.dur,{qlend})-{qlstart} when (tmp.ts+tmp.dur) > {qlend} then ({qlend} -MAX({qlstart}, tmp.ts)) else tmp.dur end) as ldur + from slice JOIN(slice) as tmp ON slice.parent_id=tmp.id JOIN thread_track ON slice.track_id = thread_track.id + where thread_track.utid={qutid} and slice.name='mm_vmscan_direct_reclaim' + and tmp.name like '{slice_name}%' and slice.ts < {qlend} and (slice.ts+slice.dur) > {qlstart} + """) + vm_len = [0,0,0,0,0] + vm_count = [0, 0, 0, 0, 0] + vm_slice = [set(), set(), set(), set(), set()] + for qr in vmq: + if qr.ldur > 1000000: + vm_len[0] += 1 + launch_info[0][7] += 1 + if qr.dp_id not in vm_slice[0]: + vm_slice[0].add(qr.dp_id) + vm_count[0] += 1 + launch_info[0][9] += 1 + # print(qutid, vm_slice[0]) + elif qr.ldur > 500000: + vm_len[1] += 1 + launch_info[1][7] += 1 + if qr.dp_id not in vm_slice[1]: + vm_slice[1].add(qr.dp_id) + vm_count[1] += 1 + launch_info[1][9] += 1 + elif qr.ldur > 100000: + vm_len[2] += 1 + launch_info[2][7] += 1 + if qr.dp_id not in vm_slice[2]: + vm_slice[2].add(qr.dp_id) + vm_count[2] += 1 + launch_info[2][9] += 1 + elif qr.ldur > 10000: + vm_len[3] += 1 + launch_info[3][7] += 1 + if qr.dp_id not in vm_slice[3]: + vm_slice[3].add(qr.dp_id) + vm_count[3] += 1 + launch_info[3][9] += 1 + else: + vm_len[4] += 1 + launch_info[4][7] += 1 + if qr.dp_id not in vm_slice[4]: + vm_slice[4].add(qr.dp_id) + vm_count[4] += 1 + launch_info[4][9] += 1 + launch_info_total[7] += sum(vm_len) + launch_info_total[9] += sum(vm_count) + # print(vm_slice) + # if len(vm_slice[0]) > 0: + # print(qutid, qlstart, qlend, vm_slice[0]) + qr_it = tp.query(query_sql(qlstart, qlend, slice_name, f"thread.utid={qutid}")) + for qr in qr_it: + slid = qr.slice_id + # qr_it_tmp = tp.query(f"select count(*) as count from slice where parent_id={slid} and name='mm_vmscan_direct_reclaim'") + # vm_len = len(qr_it_tmp) + # for qt in qr_it_tmp: + # print("qr_it_tmp", qt.count) + # vm_len = qt.count + # if len(vm_slice[0]) > 0: + # print(qr) + if (qr.total_running + qr.total_s + qr.total_d + qr.total_r + qr.total_rr) != qr.ldur: + for i in range(len(vm_slice)): + if qr.slice_id in vm_slice[i]: + launch_info[i][9] -= 1 + continue + # pass + launch_info_total[0] += qr.ldur + launch_info_total[1] += 1 + launch_info_total[2] += qr.total_running + launch_info_total[3] += qr.total_s + launch_info_total[4] += qr.total_d + launch_info_total[5] += qr.total_r + launch_info_total[6] += qr.total_rr + tmp_list = [qr.total_running, qr.total_s, qr.total_d, qr.total_r, qr.total_rr] + idx = tmp_list.index(max(tmp_list)) + dc = qr.total_d > 0 + launch_info_total[8] += dc + max_time = max(max_time, qr.ldur) + min_time = min(min_time, qr.ldur) + + # debug + #if qr.ldur > 100000: + # print(qr) + + if qr.ldur > 1000000: + #print(qr) + launch_info[0][1] += 1 + launch_info[0][idx+2] += 1 + launch_info[0][8] += dc + # print(qr.slice_id, qr.ldur, qr.total_running) + # launch_info[0][7] += vm_len[0] + elif qr.ldur > 500000: + launch_info[1][1] += 1 + launch_info[1][idx + 2] += 1 + launch_info[1][8] += dc + # launch_info[1][7] += vm_len[1] + elif qr.ldur > 100000: + launch_info[2][1] += 1 + launch_info[2][idx + 2] += 1 + launch_info[2][8] += dc + # launch_info[2][7] += vm_len[2] + elif qr.ldur > 10000: + #if qr.ldur > 50000 and qr.total_rr==0: + # print(qr) + launch_info[3][1] += 1 + launch_info[3][idx + 2] += 1 + launch_info[3][8] += dc + # launch_info[3][7] += vm_len[3] + else: + launch_info[4][1] += 1 + launch_info[4][idx + 2] += 1 + launch_info[4][8] += dc + # launch_info[4][7] += vm_len[4] + launch_info.append(launch_info_total) + df = pd.DataFrame(launch_info, columns=['time', 'count', 'running', 's', 'd', 'r', 'r+','dr_total','d_count','dr_count'], index=None) + d_count = df.pop("d_count") + df.insert(5, 'd_count', d_count) + print(df.to_string()) + print(f"max:{max_time} min:{min_time}") + # print(launch_info) + + qr_it = tp.query(query_sql(lstart, lend, slice_name, f"process.name='{pkg}'")) + launch_ret = [0,0,0,0,0,0] + + for q in qr_it: + launch_ret[0] += q.ldur + launch_ret[1] += q.total_running + launch_ret[2] += q.total_s + launch_ret[3] += q.total_d + launch_ret[4] += q.total_r + launch_ret[5] += q.total_rr + # print(q) + count = len(qr_it) + launch_ret.insert(0, count) + + #print(f"{pkg}: ", launch_ret) + + +def main(): + perfetto_parse(sys.argv[2]) +if __name__ == '__main__': + main() diff --git a/perfconf.txt b/perfconf.txt new file mode 100644 index 0000000..ef566f0 --- /dev/null +++ b/perfconf.txt @@ -0,0 +1,102 @@ +buffers: { + size_kb: 634880 + fill_policy: DISCARD +} +buffers: { + size_kb: 2048 + fill_policy: DISCARD +} +data_sources: { + config { + name: "android.gpu.memory" + } +} +data_sources: { + config { + name: "linux.process_stats" + target_buffer: 1 + process_stats_config { + scan_all_processes_on_start: true + } + } +} +data_sources: { + config { + name: "android.log" + android_log_config { + log_ids: LID_EVENTS + log_ids: LID_CRASH + log_ids: LID_KERNEL + log_ids: LID_DEFAULT + log_ids: LID_RADIO + log_ids: LID_SECURITY + log_ids: LID_STATS + log_ids: LID_SYSTEM + } + } +} +data_sources: { + config { + name: "android.surfaceflinger.frametimeline" + } +} +data_sources: { + config { + name: "linux.sys_stats" + sys_stats_config { + stat_period_ms: 1000 + stat_counters: STAT_CPU_TIMES + stat_counters: STAT_FORK_COUNT + } + } +} +data_sources: { + config { + name: "linux.ftrace" + ftrace_config { + ftrace_events: "sched/sched_switch" + ftrace_events: "power/suspend_resume" + ftrace_events: "sched/sched_wakeup" + ftrace_events: "sched/sched_wakeup_new" + ftrace_events: "sched/sched_waking" + ftrace_events: "power/cpu_frequency" + ftrace_events: "power/cpu_idle" + ftrace_events: "power/gpu_frequency" + ftrace_events: "gpu_mem/gpu_mem_total" + ftrace_events: "sched/sched_process_exit" + ftrace_events: "sched/sched_process_free" + ftrace_events: "task/task_newtask" + ftrace_events: "task/task_rename" + ftrace_events: "ftrace/print" + atrace_categories: "am" + atrace_categories: "adb" + atrace_categories: "aidl" + atrace_categories: "dalvik" + atrace_categories: "audio" + atrace_categories: "binder_lock" + atrace_categories: "binder_driver" + atrace_categories: "bionic" + atrace_categories: "camera" + atrace_categories: "database" + atrace_categories: "gfx" + atrace_categories: "hal" + atrace_categories: "input" + atrace_categories: "network" + atrace_categories: "nnapi" + atrace_categories: "pm" + atrace_categories: "power" + atrace_categories: "rs" + atrace_categories: "res" + atrace_categories: "rro" + atrace_categories: "sm" + atrace_categories: "ss" + atrace_categories: "vibrator" + atrace_categories: "video" + atrace_categories: "view" + atrace_categories: "webview" + atrace_categories: "wm" + } + } +} +duration_ms: 8000 + diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..6963e85 --- /dev/null +++ b/run.sh @@ -0,0 +1,199 @@ +#$dirput: round app start_time(ms) start_type rss(MB) pss(MB) ratio pagefault minorfaults majorfaults itlbmiss dtlbmiss cpustalled + +cycle=3 +wait=3 +clear_background=0 + +while getopts ":s:c:w:h:C" opt +do + case $opt in + s) + export ANDROID_SERIAL=$OPTARG + ;; + c) + cycle=$OPTARG + ;; + C) + clear_background=1 + ;; + w) + wait=$OPTARG + ;; + h) + echo "usage: run.sh -s sid -c cycle -w wait_seconds" + exit 1 + ;; + ?) + echo "unrecognized parameters" $opt + echo "usage: run.sh -s sid -c cycle -w wait_time [-C]" + echo "-s: specify serial id of device" + echo "-c: specify applist startup cycle" + echo "-w: specify wait time after app start, before start next app, default 10 s" + echo "-C: if to kill all background apps before start app" + exit 1 + ;; + esac +done + +# 检查多个设备 +# 如果有多台设备需要先手动export ANDROID_SERIAL=xxx运作这个脚本 +nr_dev=$(adb devices | grep -v "List of devices" | grep device -c) +if [ $nr_dev -gt 1 ] && [ -z "$ANDROID_SERIAL" ] ; then + echo "adb: more than one device/emulator" + exit +fi + +# 如果只有1台设备默认测试该设备 +if [ -z "$ANDROID_SERIAL" ]; then + sid=$(adb devices | grep -v "List of devices" | grep device | awk '{print $1}') + export ANDROID_SERIAL=$sid +fi + + +# adb root并推送perfetto配置文件 +adb root +adb push perfconf.txt /data/misc/perfetto-configs + +dir=$ANDROID_SERIAL/$(/usr/bin/date +%Y-%m%d-%H%M) +mkdir -p $dir +mkdir $dir/system_mem +mkdir $dir/battery +mkdir $dir/traces +mkdir $dir/meminfo +mkdir $dir/simpleperf +mkdir $dir/thpmaps + +echo "round app start_type start_time rss pss pagefaults minorfaults majorfaults itlbmiss dtlbmiss cpustall inc_ratio order4_ratio order4_cont inc_total" > $dir/result + +for round in $(seq 1 $cycle); do + for app in `cat applist`; do + sleep 3 + + # 抓内存相关信息 + adb shell free -h > $dir/system_mem/before_free-$app-$round + adb shell cat /proc/meminfo > $dir/system_mem/before_meminfo-$app-$round + adb shell cat /proc/zoneinfo > $dir/system_mem/before_zoneinfo-$app-$round + adb shell cat /proc/buddyinfo > $dir/system_mem/before_buddyinfo-$app-$round + adb shell cat /proc/pagetypeinfo > $dir/system_mem/before_pagetypeinfo-$app-$round + adb shell cat /proc/vmstat > $dir/system_mem/before_vmstat-$app-$round + + # 开始抓simpleperf和perfetto + adb shell "simpleperf stat -e dTLB-loads,dTLB-load-misses,iTLB-loads,iTLB-load-misses,page-faults,raw-stall-backend-mem,minor-faults,major-faults --app $app --duration 3 -o /data/local/tmp/simpleperf-"$app"-"$round".txt" & + adb shell "perfetto -c /data/misc/perfetto-configs/perfconf.txt --txt -o /data/misc/perfetto-traces/trace-"$app"-"$round".ptrace" & + perfetto_pid=$! + + # 获取应用activity + if [ "$app" == "com.qiyi.video" ]; then + activity="com.qiyi.video/com.qiyi.video.WelcomeActivity" + elif [ "$app" == "com.netease.cloudmusic" ]; then + activity="com.netease.cloudmusic/com.netease.cloudmusic.activity.IconChangeDefaultAlias" + else + activity=$(adb shell dumpsys package $app | grep -A 1 -w "android.intent.action.MAIN:" | head -n 2 | tail -n 1 | awk '{print $2}') + fi + + if [ -z "$activity" ]; then + continue + fi + + # 睡眠1s并准备抓大页数据 + sleep 1 + echo "starting $activity" + + # 抓电池信息 + adb shell "dumpsys battery" > $dir/battery/battery-$app-$round + + prev_alloc=$(adb shell cat /sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/anon_fault_alloc) + prev_alloc_fallback=$(adb shell cat /sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/anon_fault_fallback) + + # 启动应用 + adb shell am start -n $activity + + # 睡眠15s并等待perfetto退出 + sleep 10 + wait + + # 后面的dumpsys meminfo也会分配大页 + # 所以先看分配大页数 + alloc=$(adb shell cat /sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/anon_fault_alloc) + alloc_fallback=$(adb shell cat /sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/anon_fault_fallback) + + # dump内存占用 + adb shell "dumpsys meminfo --package $app" > $dir/meminfo/meminfo-"$app"-"$round".txt + + # 抓启动后的内存信息 + adb shell free -h > $dir/system_mem/after_free-$app-$round + adb shell cat /proc/meminfo > $dir/system_mem/after_meminfo-$app-$round + adb shell cat /proc/zoneinfo > $dir/system_mem/after_zoneinfo-$app-$round + adb shell cat /proc/buddyinfo > $dir/system_mem/after_buddyinfo-$app-$round + adb shell cat /proc/pagetypeinfo > $dir/system_mem/after_pagetypeinfo-$app-$round + adb shell cat /proc/vmstat > $dir/system_mem/after_vmstat-$app-$round + + # 睡眠3s后拉simpleperf和perfetto数据 + sleep $wait + adb pull /data/misc/perfetto-traces/trace-"$app"-"$round".ptrace $dir/traces + adb pull /data/local/tmp/simpleperf-"$app"-"$round".txt $dir/simpleperf/ + adb shell "rm /data/misc/perfetto-traces/trace-"$app"-"$round".ptrace" + adb shell "rm /data/local/tmp/simpleperf-"$app"-"$round".txt" + + # 回到桌面,应用退后台 + adb shell input keyevent 3 + + # 计算启动耗时 + python3 handle_perfetto.py $dir/traces/trace-"$app"-"$round".ptrace mm > tmp.txt + if [ $(cat tmp.txt | wc -l) -ne 1 ]; then + start_time=0 + start_type="NONE" + else + start_time=$(cat tmp.txt | awk '{print $1}') + start_type=$(cat tmp.txt | awk '{print $2}') + fi + + start_time=$((start_time/10000)) + start_time=$(echo "scale=2; $start_time / 100" | bc) + + # 计算PSS RSS + if [ $(cat $dir/meminfo/meminfo-"$app"-"$round".txt | grep "MEMINFO in pid" -c) -ne 1 ]; then + cat $dir/meminfo/meminfo-"$app"-"$round".txt | grep "TOTAL PSS" | tail -n 1 > tmp.txt + pss=$(cat tmp.txt | awk '{print $3}') + rss=$(cat tmp.txt | awk '{print $6}') + else + cat $dir/meminfo/meminfo-"$app"-"$round".txt | grep "TOTAL PSS" > tmp.txt + pss=$(cat tmp.txt | awk '{print $3}') + rss=$(cat tmp.txt | awk '{print $6}') + fi + + pss=$(echo "scale=2; $pss / 1024" | bc) + rss=$(echo "scale=2; $rss / 1024" | bc) + + # 计算大页分配成功率 + inc_alloc=$((alloc-prev_alloc)) + inc_alloc_fallback=$((alloc_fallback-prev_alloc_fallback)) + inc_total=$((inc_alloc+inc_alloc_fallback)) + if [ $inc_total -ne 0 ]; then + inc_ratio=$(echo "scale=4; $inc_alloc / $inc_total" | bc) + else + inc_ratio=1 + fi + + # 计算大页占比 + app_pid=$(adb shell "ps -ef" | grep $app | head -n 1 | awk '{print $2}') + python3 thpmaps_v3_vir_cont.py --pid $app_pid --rollup --inc-empty --cont 64k > $dir/thpmaps/thp-$app-$round + order4_ratio=$(cat $dir/thpmaps/thp-$app-$round | grep "anon-thp-pte-aligned-64kB" | awk -F '(' '{print $2}' | sed 's/)//g') + order4_cont=$(cat $dir/thpmaps/thp-$app-$round | grep "anon-cont-pte-aligned-64kB" | awk -F '(' '{print $2}' | sed 's/)//g') + + pagefaults=$(cat $dir/simpleperf/simpleperf-"$app"-"$round".txt | grep 'page-faults' | awk '{print $4}') + itlbmiss=$(cat $dir/simpleperf/simpleperf-"$app"-"$round".txt | grep 'iTLB-load-misses' | awk '{print $4}') + dtlbmiss=$(cat $dir/simpleperf/simpleperf-"$app"-"$round".txt | grep 'dTLB-load-misses' | awk '{print $4}') + cpustall=$(cat $dir/simpleperf/simpleperf-"$app"-"$round".txt | grep 'raw-stall-backend-mem' | awk '{print $4}') + minorfaults=$(cat $dir/simpleperf/simpleperf-"$app"-"$round".txt | grep 'minor-faults' | awk '{print $1}' | sed s/,//) + majorfaults=$(cat $dir/simpleperf/simpleperf-"$app"-"$round".txt | grep 'major-faults' | awk '{print $1}' | sed s/,//) + + echo $round $app $start_type $start_time $rss $pss $pagefaults $minorfaults $majorfaults $itlbmiss $dtlbmiss $cpustall $inc_ratio $inc_total $order4_ratio $order4_cont>> $dir/result + + if [ $clear_background -ne 0 ]; then + python3 clear_apps.py + fi + + rm tmp.txt + done +done diff --git a/thpmaps_v3_vir_cont.py b/thpmaps_v3_vir_cont.py new file mode 100644 index 0000000..455ead4 --- /dev/null +++ b/thpmaps_v3_vir_cont.py @@ -0,0 +1,746 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2024 ARM Ltd. +# +# Utility providing smaps-like output detailing transparent hugepage usage. +# For more info, run: +# ./thpmaps --help +# +# Requires numpy: +# pip3 install numpy + + +import argparse +import collections +import math +import os +import re +import resource +import shutil +import sys +import textwrap +import time +import numpy as np + +def init(): + if LOCAL == False: + os.system("adb root") + get_huge_args() + +def get_log(path, pid): + if LOCAL: + return + if pid != 0: + pagemap_path = f'/proc/{pid}/pagemap'.format(pid = pid) + kpageflags_path = f'/proc/kpageflags' + smaps_path = f'/proc/{pid}/smaps'.format(pid = pid) + remote_path_1 = [smaps_path, kpageflags_path, pagemap_path] + remote_path_2 = ['/data/local/tmp/smaps', '/data/local/tmp/kpageflags', '/data/local/tmp/pagemap_path'] + os.system('adb shell "cat ' + remote_path_1[0] + ' > ' + remote_path_2[0] + '"') + os.system('adb shell "cat ' + remote_path_1[1] + ' > ' + remote_path_2[1] + '"') + os.system('adb shell "cat ' + remote_path_1[2] + ' > ' + remote_path_2[2] + '"') + # os.system('adb shell "cat ' + remote_path_1[1] + ' > /data/local/tmp/kpageflags"') + # os.system('adb shell "cat ' + remote_path_1[2] + ' > /data/local/tmp/pagemap_path"') + + for i, p in enumerate(remote_path_1): + if os.path.exists('.' + p) == True: + os.system('rm -rf ' + '.' + p) + os.makedirs(os.path.dirname('.' + p), exist_ok=True) + os.system('adb shell "cat ' + remote_path_2[i] + '" >' + '.' + p) + return + + if os.path.exists('.' + path) == True: + os.system('rm -rf ' + '.' + path) + os.makedirs(os.path.dirname('.' + path), exist_ok=True) + os.system('adb shell cat ' + path + ' >' + '.' + path) + +def get_huge_args(): + global PMD_SIZE + global PAGE_SHIFT + global PAGE_SIZE + global PMD_ORDER + get_log('/sys/kernel/mm/transparent_hugepage/hpage_pmd_size', 0) + with open('./sys/kernel/mm/transparent_hugepage/hpage_pmd_size') as f: + PAGE_SIZE = resource.getpagesize() + PAGE_SHIFT = int(math.log2(PAGE_SIZE)) + PMD_SIZE = int(f.read()) + PMD_ORDER = int(math.log2(PMD_SIZE / PAGE_SIZE)) + + +def align_forward(v, a): + return (v + (a - 1)) & ~(a - 1) + + +def align_offset(v, a): + return v & (a - 1) + + +def kbnr(kb): + # Convert KB to number of pages. + return (kb << 10) >> PAGE_SHIFT + + +def nrkb(nr): + # Convert number of pages to KB. + return (nr << PAGE_SHIFT) >> 10 + + +def odkb(order): + # Convert page order to KB. + return (PAGE_SIZE << order) >> 10 + + +def cont_ranges_all(search, index): + # Given a list of arrays, find the ranges for which values are monotonically + # incrementing in all arrays. all arrays in search and index must be the + # same size. + sz = len(search[0]) + r = np.full(sz, 2) + d = np.diff(search[0]) == 1 + for dd in [np.diff(arr) == 1 for arr in search[1:]]: + d &= dd + r[1:] -= d + r[:-1] -= d + return [np.repeat(arr, r).reshape(-1, 2) for arr in index] + + +class ArgException(Exception): + pass + + +class FileIOException(Exception): + pass + + +class BinArrayFile: + # Base class used to read /proc//pagemap and /proc/kpageflags into a + # numpy array. Use inherrited class in a with clause to ensure file is + # closed when it goes out of scope. + def __init__(self, filename, element_size): + self.element_size = element_size + self.filename = filename + self.fd = os.open(self.filename, os.O_RDONLY) + + def cleanup(self): + os.close(self.fd) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.cleanup() + + def _readin(self, offset, buffer): + length = os.preadv(self.fd, (buffer,), offset) + if len(buffer) != length: + raise FileIOException('error: {} failed to read {} bytes at {:x}' + .format(self.filename, len(buffer), offset)) + + def _toarray(self, buf): + assert(self.element_size == 8) + return np.frombuffer(buf, dtype=np.uint64) + + def getv(self, vec): + vec *= self.element_size + offsets = vec[:, 0] + lengths = (np.diff(vec) + self.element_size).reshape(len(vec)) + buf = bytearray(int(np.sum(lengths))) + view = memoryview(buf) + pos = 0 + for offset, length in zip(offsets, lengths): + offset = int(offset) + length = int(length) + self._readin(offset, view[pos:pos+length]) + pos += length + return self._toarray(buf) + + def get(self, index, nr=1): + offset = index * self.element_size + length = nr * self.element_size + buf = bytearray(length) + self._readin(offset, buf) + return self._toarray(buf) + + +PM_PAGE_PRESENT = 1 << 63 +PM_PFN_MASK = (1 << 55) - 1 + +class PageMap(BinArrayFile): + # Read ranges of a given pid's pagemap into a numpy array. + def __init__(self, pid='self'): + get_log(f'/proc/{pid}/pagemap', pid) + super().__init__(f'./proc/{pid}/pagemap', 8) + + +KPF_ANON = 1 << 12 +KPF_COMPOUND_HEAD = 1 << 15 +KPF_COMPOUND_TAIL = 1 << 16 +KPF_THP = 1 << 22 + +class KPageFlags(BinArrayFile): + # Read ranges of /proc/kpageflags into a numpy array. + def __init__(self): + # get_log(f'/proc/kpageflags') + super().__init__(f'./proc/kpageflags', 8) + + +vma_all_stats = set([ + "Size", + "Rss", + "Pss", + "Pss_Dirty", + "Shared_Clean", + "Shared_Dirty", + "Private_Clean", + "Private_Dirty", + "Referenced", + "Anonymous", + "KSM", + "LazyFree", + "AnonHugePages", + "ShmemPmdMapped", + "FilePmdMapped", + "Shared_Hugetlb", + "Private_Hugetlb", + "Swap", + "SwapPss", + "Locked", +]) + +vma_min_stats = set([ + "Rss", + "Anonymous", + "AnonHugePages", + "ShmemPmdMapped", + "FilePmdMapped", +]) + +VMA = collections.namedtuple('VMA', [ + 'name', + 'start', + 'end', + 'read', + 'write', + 'execute', + 'private', + 'pgoff', + 'major', + 'minor', + 'inode', + 'stats', +]) + +class VMAList: + # A container for VMAs, parsed from /proc//smaps. Iterate over the + # instance to receive VMAs. + def __init__(self, pid='self', stats=[]): + self.vmas = [] + # get_log(f'/proc/{pid}/smaps') + with open(f'./proc/{pid}/smaps', 'r') as file: + for line in file: + elements = line.split() + if '-' in elements[0]: + start, end = map(lambda x: int(x, 16), elements[0].split('-')) + major, minor = map(lambda x: int(x, 16), elements[3].split(':')) + self.vmas.append(VMA( + name=elements[5] if len(elements) == 6 else '', + start=start, + end=end, + read=elements[1][0] == 'r', + write=elements[1][1] == 'w', + execute=elements[1][2] == 'x', + private=elements[1][3] == 'p', + pgoff=int(elements[2], 16), + major=major, + minor=minor, + inode=int(elements[4], 16), + stats={}, + )) + else: + param = elements[0][:-1] + if param in stats: + value = int(elements[1]) + self.vmas[-1].stats[param] = {'type': None, 'value': value} + + def __iter__(self): + yield from self.vmas + + +def thp_parse(vma, kpageflags, ranges, indexes, vfns, pfns, anons, heads): + # Given 4 same-sized arrays representing a range within a page table backed + # by THPs (vfns: virtual frame numbers, pfns: physical frame numbers, anons: + # True if page is anonymous, heads: True if page is head of a THP), return a + # dictionary of statistics describing the mapped THPs. + stats = { + 'file': { + 'partial': 0, + 'aligned': [0] * (PMD_ORDER + 1), + 'unaligned': [0] * (PMD_ORDER + 1), + }, + 'anon': { + 'partial': 0, + 'aligned': [0] * (PMD_ORDER + 1), + 'unaligned': [0] * (PMD_ORDER + 1), + }, + } + + for rindex, rpfn in zip(ranges[0], ranges[2]): + index_next = int(rindex[0]) + index_end = int(rindex[1]) + 1 + pfn_end = int(rpfn[1]) + 1 + + folios = indexes[index_next:index_end][heads[index_next:index_end]] + + # Account pages for any partially mapped THP at the front. In that case, + # the first page of the range is a tail. + nr = (int(folios[0]) if len(folios) else index_end) - index_next + stats['anon' if anons[index_next] else 'file']['partial'] += nr + + # Account pages for any partially mapped THP at the back. In that case, + # the next page after the range is a tail. + if len(folios): + flags = int(kpageflags.get(pfn_end)[0]) + if flags & KPF_COMPOUND_TAIL: + nr = index_end - int(folios[-1]) + folios = folios[:-1] + index_end -= nr + stats['anon' if anons[index_end - 1] else 'file']['partial'] += nr + + # Account fully mapped THPs in the middle of the range. + if len(folios): + folio_nrs = np.append(np.diff(folios), np.uint64(index_end - folios[-1])) + folio_orders = np.log2(folio_nrs).astype(np.uint64) + for index, order in zip(folios, folio_orders): + index = int(index) + order = int(order) + nr = 1 << order + vfn = int(vfns[index]) + align = 'aligned' if align_forward(vfn, nr) == vfn else 'unaligned' + anon = 'anon' if anons[index] else 'file' + stats[anon][align][order] += nr + + # Account PMD-mapped THPs spearately, so filter out of the stats. There is a + # race between acquiring the smaps stats and reading pagemap, where memory + # could be deallocated. So clamp to zero incase it would have gone negative. + anon_pmd_mapped = vma.stats['AnonHugePages']['value'] + file_pmd_mapped = vma.stats['ShmemPmdMapped']['value'] + \ + vma.stats['FilePmdMapped']['value'] + stats['anon']['aligned'][PMD_ORDER] = max(0, stats['anon']['aligned'][PMD_ORDER] - kbnr(anon_pmd_mapped)) + stats['file']['aligned'][PMD_ORDER] = max(0, stats['file']['aligned'][PMD_ORDER] - kbnr(file_pmd_mapped)) + + rstats = { + f"anon-thp-pmd-aligned-{odkb(PMD_ORDER)}kB": {'type': 'anon', 'value': anon_pmd_mapped}, + f"file-thp-pmd-aligned-{odkb(PMD_ORDER)}kB": {'type': 'file', 'value': file_pmd_mapped}, + } + + def flatten_sub(type, subtype, stats): + param = f"{type}-thp-pte-{subtype}-{{}}kB" + for od, nr in enumerate(stats[2:], 2): + rstats[param.format(odkb(od))] = {'type': type, 'value': nrkb(nr)} + + def flatten_type(type, stats): + flatten_sub(type, 'aligned', stats['aligned']) + flatten_sub(type, 'unaligned', stats['unaligned']) + rstats[f"{type}-thp-pte-partial"] = {'type': type, 'value': nrkb(stats['partial'])} + + flatten_type('anon', stats['anon']) + flatten_type('file', stats['file']) + + return rstats + + +def cont_parse(vma, order, ranges, anons, heads, anon): + # Given 4 same-sized arrays representing a range within a page table backed + # by THPs (vfns: virtual frame numbers, pfns: physical frame numbers, anons: + # True if page is anonymous, heads: True if page is head of a THP), return a + # dictionary of statistics describing the contiguous blocks. + nr_cont = 1 << order + nr_anon = 0 + nr_file = 0 + + for rindex, rvfn in zip(*ranges): + index_next = int(rindex[0]) + index_end = int(rindex[1]) + 1 + vfn_start = int(rvfn[0]) + #pfn_start = int(rpfn[0]) + + #if align_offset(pfn_start, nr_cont) != align_offset(vfn_start, nr_cont): + # continue + + #off = align_forward(vfn_start, nr_cont) - vfn_start + #index_next += off + + while index_next + nr_cont <= index_end: + folio_boundary = heads[index_next+1:index_next+nr_cont].any() + if not folio_boundary: + if anon: + nr_anon += nr_cont + else: + nr_file += nr_cont + index_next += nr_cont + + # Account blocks that are PMD-mapped spearately, so filter out of the stats. + # There is a race between acquiring the smaps stats and reading pagemap, + # where memory could be deallocated. So clamp to zero incase it would have + # gone negative. + anon_pmd_mapped = vma.stats['AnonHugePages']['value'] + file_pmd_mapped = vma.stats['ShmemPmdMapped']['value'] + \ + vma.stats['FilePmdMapped']['value'] + nr_anon = max(0, nr_anon - kbnr(anon_pmd_mapped)) + nr_file = max(0, nr_file - kbnr(file_pmd_mapped)) + + if anon: + rstats = { + f"anon-cont-pmd-aligned-{nrkb(nr_cont)}kB": {'type': 'anon', 'value': anon_pmd_mapped}, + f"anon-cont-pte-aligned-{nrkb(nr_cont)}kB": {'type': 'anon', 'value': nrkb(nr_anon)} + } + else: + rstats = { + f"file-cont-pmd-aligned-{nrkb(nr_cont)}kB": {'type': 'file', 'value': file_pmd_mapped}, + f"file-cont-pte-aligned-{nrkb(nr_cont)}kB": {'type': 'file', 'value': nrkb(nr_file)} + } + + return rstats + + +def vma_print(vma, pid): + # Prints a VMA instance in a format similar to smaps. The main difference is + # that the pid is included as the first value. + print("{:010d}: {:016x}-{:016x} {}{}{}{} {:08x} {:02x}:{:02x} {:08x} {}" + .format( + pid, vma.start, vma.end, + 'r' if vma.read else '-', 'w' if vma.write else '-', + 'x' if vma.execute else '-', 'p' if vma.private else 's', + vma.pgoff, vma.major, vma.minor, vma.inode, vma.name + )) + + +def stats_print(stats, tot_anon, tot_file, inc_empty): + # Print a statistics dictionary. + label_field = 32 + for label, stat in stats.items(): + type = stat['type'] + value = stat['value'] + if value or inc_empty: + pad = max(0, label_field - len(label) - 1) + if type == 'anon' and tot_anon > 0: + percent = f' ({value / tot_anon:3.0%})' + elif type == 'file' and tot_file > 0: + percent = f' ({value / tot_file:3.0%})' + else: + percent = '' + print(f"{label}:{' ' * pad}{value:8} kB{percent}") + + +def vma_parse(vma, pagemap, kpageflags, contorders): + # Generate thp and cont statistics for a single VMA. + start = vma.start >> PAGE_SHIFT + end = vma.end >> PAGE_SHIFT + + pmes = pagemap.get(start, end - start) + present = pmes & PM_PAGE_PRESENT != 0 + pfns = pmes & PM_PFN_MASK + pfns = pfns[present] + vfns = np.arange(start, end, dtype=np.uint64) + vfns = vfns[present] + + pfn_vec = cont_ranges_all([pfns], [pfns])[0] + flags = kpageflags.getv(pfn_vec) + anons = flags & KPF_ANON != 0 + heads = flags & KPF_COMPOUND_HEAD != 0 + cont_heads = flags & 0 != 0 + thps = flags & KPF_THP != 0 + + + tot_anon = np.count_nonzero(anons) + tot_file = np.size(anons) - tot_anon + tot_anon = nrkb(tot_anon) + tot_file = nrkb(tot_file) + + cont_anons = anons + cont_anon_vfns = vfns[anons] + cont_file_vfns = vfns[~anons] + cont_pfns = pfns + + vfns = vfns[thps] + pfns = pfns[thps] + anons = anons[thps] + heads = heads[thps] + + indexes = np.arange(len(vfns), dtype=np.uint64) + ranges = cont_ranges_all([vfns, pfns], [indexes, vfns, pfns]) + + cont_anon_indexes = np.arange(len(cont_anon_vfns), dtype=np.uint64) + cont_file_indexes = np.arange(len(cont_file_vfns), dtype=np.uint64) + cont_anon_ranges = cont_ranges_all([cont_anon_vfns], [cont_anon_indexes, cont_anon_vfns]) + cont_file_ranges = cont_ranges_all([cont_file_vfns], [cont_file_indexes, cont_file_vfns]) + + thpstats = thp_parse(vma, kpageflags, ranges, indexes, vfns, pfns, anons, heads) + anon_ontstats = [cont_parse(vma, order, cont_anon_ranges, cont_anons, cont_heads, True) for order in contorders] + file_ontstats = [cont_parse(vma, order, cont_file_ranges, cont_anons, cont_heads, False) for order in contorders] + anon_ontstats.extend(file_ontstats) + + + #tot_anon = vma.stats['Anonymous']['value'] + #tot_file = vma.stats['Rss']['value'] - tot_anon + + return { + **thpstats, + **{k: v for s in anon_ontstats for k, v in s.items()} + }, tot_anon, tot_file + + +def do_main(args): + pids = set() + rollup = {} + rollup_anon = 0 + rollup_file = 0 + + if args.cgroup: + strict = False + for walk_info in os.walk(args.cgroup): + cgroup = walk_info[0] + get_log(f'{cgroup}/cgroup.procs', 1) + with open(f'.{cgroup}/cgroup.procs') as pidfile: + for line in pidfile.readlines(): + pids.add(int(line.strip())) + elif args.pid: + strict = True + pids = pids.union(args.pid) + else: + strict = False + for pid in os.listdir('/proc'): + if pid.isdigit(): + pids.add(int(pid)) + + if not args.rollup: + print(" PID START END PROT OFFSET DEV INODE OBJECT") + + for pid in pids: + try: + with PageMap(pid) as pagemap: + with KPageFlags() as kpageflags: + for vma in VMAList(pid, vma_all_stats if args.inc_smaps else vma_min_stats): + if (vma.read or vma.write or vma.execute) and vma.stats['Rss']['value'] > 0: + stats, vma_anon, vma_file = vma_parse(vma, pagemap, kpageflags, args.cont) + else: + stats = {} + vma_anon = 0 + vma_file = 0 + if args.inc_smaps: + stats = {**vma.stats, **stats} + if args.rollup: + for k, v in stats.items(): + if k in rollup: + assert(rollup[k]['type'] == v['type']) + rollup[k]['value'] += v['value'] + else: + rollup[k] = v + rollup_anon += vma_anon + rollup_file += vma_file + else: + vma_print(vma, pid) + stats_print(stats, vma_anon, vma_file, args.inc_empty) + except (FileNotFoundError, ProcessLookupError, FileIOException): + if strict: + raise + + if args.rollup: + stats_print(rollup, rollup_anon, rollup_file, args.inc_empty) + + +def main(): + docs_width = shutil.get_terminal_size().columns + docs_width -= 2 + docs_width = min(80, docs_width) + + def format(string): + text = re.sub(r'\s+', ' ', string) + text = re.sub(r'\s*\\n\s*', '\n', text) + paras = text.split('\n') + paras = [textwrap.fill(p, width=docs_width) for p in paras] + return '\n'.join(paras) + + def formatter(prog): + return argparse.RawDescriptionHelpFormatter(prog, width=docs_width) + + def size2order(human): + units = { + "K": 2**10, "M": 2**20, "G": 2**30, + "k": 2**10, "m": 2**20, "g": 2**30, + } + unit = 1 + if human[-1] in units: + unit = units[human[-1]] + human = human[:-1] + try: + size = int(human) + except ValueError: + raise ArgException('error: --cont value must be integer size with optional KMG unit') + size *= unit + order = int(math.log2(size / PAGE_SIZE)) + #if order < 1: + # raise ArgException('error: --cont value must be size of at least 2 pages') + if (1 << order) * PAGE_SIZE != size: + raise ArgException('error: --cont value must be size of power-of-2 pages') + if order > PMD_ORDER: + raise ArgException('error: --cont value must be less than or equal to PMD order') + return order + + parser = argparse.ArgumentParser(formatter_class=formatter, + description=format("""Prints information about how transparent huge + pages are mapped, either system-wide, or for a specified + process or cgroup.\\n + \\n + When run with --pid, the user explicitly specifies the set + of pids to scan. e.g. "--pid 10 [--pid 134 ...]". When run + with --cgroup, the user passes either a v1 or v2 cgroup and + all pids that belong to the cgroup subtree are scanned. When + run with neither --pid nor --cgroup, the full set of pids on + the system is gathered from /proc and scanned as if the user + had provided "--pid 1 --pid 2 ...".\\n + \\n + A default set of statistics is always generated for THP + mappings. However, it is also possible to generate + additional statistics for "contiguous block mappings" where + the block size is user-defined.\\n + \\n + Statistics are maintained independently for anonymous and + file-backed (pagecache) memory and are shown both in kB and + as a percentage of either total anonymous or total + file-backed memory as appropriate.\\n + \\n + THP Statistics\\n + --------------\\n + \\n + Statistics are always generated for fully- and + contiguously-mapped THPs whose mapping address is aligned to + their size, for each supported by the system. + Separate counters describe THPs mapped by PTE vs those + mapped by PMD. (Although note a THP can only be mapped by + PMD if it is PMD-sized):\\n + \\n + - anon-thp-pte-aligned-kB\\n + - file-thp-pte-aligned-kB\\n + - anon-thp-pmd-aligned-kB\\n + - file-thp-pmd-aligned-kB\\n + \\n + Similarly, statistics are always generated for fully- and + contiguously-mapped THPs whose mapping address is *not* + aligned to their size, for each supported by the + system. Due to the unaligned mapping, it is impossible to + map by PMD, so there are only PTE counters for this case:\\n + \\n + - anon-thp-pte-unaligned-kB\\n + - file-thp-pte-unaligned-kB\\n + \\n + Statistics are also always generated for mapped pages that + belong to a THP but where the is THP is *not* fully- and + contiguously- mapped. These "partial" mappings are all + counted in the same counter regardless of the size of the + THP that is partially mapped:\\n + \\n + - anon-thp-pte-partial\\n + - file-thp-pte-partial\\n + \\n + Contiguous Block Statistics\\n + ---------------------------\\n + \\n + An optional, additional set of statistics is generated for + every contiguous block size specified with `--cont `. + These statistics show how much memory is mapped in + contiguous blocks of and also aligned to . A + given contiguous block must all belong to the same THP, but + there is no requirement for it to be the *whole* THP. + Separate counters describe contiguous blocks mapped by PTE + vs those mapped by PMD:\\n + \\n + - anon-cont-pte-aligned-kB\\n + - file-cont-pte-aligned-kB\\n + - anon-cont-pmd-aligned-kB\\n + - file-cont-pmd-aligned-kB\\n + \\n + As an example, if monitoring 64K contiguous blocks (--cont + 64K), there are a number of sources that could provide such + blocks: a fully- and contiguously-mapped 64K THP that is + aligned to a 64K boundary would provide 1 block. A fully- + and contiguously-mapped 128K THP that is aligned to at least + a 64K boundary would provide 2 blocks. Or a 128K THP that + maps its first 100K, but contiguously and starting at a 64K + boundary would provide 1 block. A fully- and + contiguously-mapped 2M THP would provide 32 blocks. There + are many other possible permutations.\\n"""), + epilog=format("""Requires root privilege to access pagemap and + kpageflags.""")) + + group = parser.add_mutually_exclusive_group(required=False) + group.add_argument('--pid', + metavar='pid', required=False, type=int, default=[], action='append', + help="""Process id of the target process. Maybe issued multiple times to + scan multiple processes. --pid and --cgroup are mutually exclusive. + If neither are provided, all processes are scanned to provide + system-wide information.""") + + group.add_argument('--cgroup', + metavar='path', required=False, + help="""Path to the target cgroup in sysfs. Iterates over every pid in + the cgroup and its children. --pid and --cgroup are mutually + exclusive. If neither are provided, all processes are scanned to + provide system-wide information.""") + + parser.add_argument('--rollup', + required=False, default=False, action='store_true', + help="""Sum the per-vma statistics to provide a summary over the whole + system, process or cgroup.""") + + parser.add_argument('--cont', + metavar='size[KMG]', required=False, default=[], action='append', + help="""Adds stats for memory that is mapped in contiguous blocks of + and also aligned to . May be issued multiple times to + track multiple sized blocks. Useful to infer e.g. arm64 contpte and + hpa mappings. Size must be a power-of-2 number of pages.""") + + parser.add_argument('--inc-smaps', + required=False, default=False, action='store_true', + help="""Include all numerical, additive /proc//smaps stats in the + output.""") + + parser.add_argument('--inc-empty', + required=False, default=False, action='store_true', + help="""Show all statistics including those whose value is 0.""") + + parser.add_argument('--periodic', + metavar='sleep_ms', required=False, type=int, + help="""Run in a loop, polling every sleep_ms milliseconds.""") + + parser.add_argument('--local', + required=False, default=False, action='store_true', + help="""Parse local data.""") + + args = parser.parse_args() + + try: + global LOCAL + LOCAL = args.local + init() + args.cont = [size2order(cont) for cont in args.cont] + except ArgException as e: + parser.print_usage() + raise + + if args.periodic: + while True: + do_main(args) + print() + time.sleep(args.periodic / 1000) + else: + do_main(args) + + +if __name__ == "__main__": + try: + main() + except Exception as e: + prog = os.path.basename(sys.argv[0]) + print(f'{prog}: {e}') + exit(1) diff --git a/trace_processor_shell b/trace_processor_shell new file mode 100755 index 0000000..9534a03 Binary files /dev/null and b/trace_processor_shell differ