#!/usr/bin/stap # statsinfo profile simple_probe # version 0.1 ## stap script for statsinfo # This script requires postgres built with "--enable-dtace". # Not necessary with "--enable-debug", because this scripts # only use pre-define probe points. ## please perform this scripts as following command # stap -m statsinfo_prof statsinfo_simple_probe.stap # NOTE: "postgres" user must join the "stapdev" group. # If not, please do "usermod -G stapdev postgres" # Because, this script generates /proc/systemtap/xxx/xxx and # this file's owner is expected as "postgres". ## Global variables # global # "run_pids' is a array for storing running process. # Elements are PIDs of running pricesses. # This elements will be registered at fork_process(), and # delete at proc_exit() run_pids, # "pg_cond" is a array for storing each backend condition. # This array use two indexes-key as [pid(), stack] and ist value # is "PG_COND". "stack" is also array. see bellow. # Elements for [pid(),*] will be delete at pid()'s transaction_end pg_cond, # pg_cond[pid(), stack] # "stack" is a array for storing each backend condition stacks. # This array use a key as [pid()]. # This value is necessary for changing condtions. # A elements will delete at pid()'s transaction_end... stack, # stack[pid()] # MAX_STACK is a stopper. We alerts if number of stacks over this. MAX_STACK = 128, # max stack # "cond_cnt" is a array for counting which conditions are running. # This array has numbers of each PG_COND. # Elements are increase/decrease on real time. cond_cnt, # cond_cnt[PG_COND] # "resut" is a array for periodical counting of cond_cnt # This script report this values. result, # "cond_cnt_tmp" is a tmp-array # cond_cnt_tmp, ### PG_COND values ## These are condition values. This scripts does not use all values. ## But, in the future, some values will be neede... PGCOND_UNUSED = 0, ## 10000 - CPU ## PGCOND_CPU = 10000, ## 11000 - CPU:PARSE ## PGCOND_CPU_PARSE = 11000, #PGCOND_CPU_PARSE_ANALYZE = 11100, ## 12000 - CPU:REWRITE ## PGCOND_CPU_REWRITE = 12000, ## 13000 - CPU:PLAN ## PGCOND_CPU_PLAN = 13000, ## 14000 - CPU:EXECUTE ## PGCOND_CPU_EXECUTE = 14000, #PGCOND_CPU_TRIGGER = 14100, #PGCOND_CPU_SORT = 14200, PGCOND_CPU_SORT_HEAP = 14210, PGCOND_CPU_SORT_INDEX = 14220, PGCOND_CPU_SORT_DATUM = 14230, ## 15000 - CPU:UTILITY ## #PGCOND_CPU_UTILITY = 15000, #PGCOND_CPU_COMMIT = 15100, #PGCOND_CPU_ROLLBACK = 15200, ## 16000 - CPU:TEXT ## #PGCOND_CPU_TEXT = 16000, #PGCOND_CPU_DECODE = 16100, #PGCOND_CPU_ENCODE = 16200, #PGCOND_CPU_LIKE = 16310, #PGCOND_CPU_ILIKE = 16320, #PGCOND_CPU_RE = 16400, #PGCOND_CPU_RE_COMPILE = 16410, #PGCOND_CPU_RE_EXECUTE = 16420, ## 20000 - NETWORK ## #PGCOND_NETWORK = 20000, #PGCOND_NETWORK_RECV = 21000, #PGCOND_NETWORK_SEND = 22000, ## 30000 - IDLE (should be larger th #PGCOND_IDLE = 30000, #PGCOND_IDLE_IN_TRANSACTION = 31000, #PGCOND_IDLE_SLEEP = 32000, ## 40000 - XLOG ## #PGCOND_XLOG = 40000, #PGCOND_XLOG_CRC = 41000, PGCOND_XLOG_INSERT = 42000, #PGCOND_XLOG_OPEN = 43000, #PGCOND_XLOG_CLOSE = 44000, #PGCOND_XLOG_WRITE = 45000, #PGCOND_XLOG_FLUSH = 46000, ## 50000 - DATA ## #PGCOND_DATA = 50000, #PGCOND_DATA_CREATE = 51000, #PGCOND_DATA_OPEN = 52000, #PGCOND_DATA_CLOSE = 53000, #PGCOND_DATA_STAT = 54000, PGCOND_DATA_READ = 55000, #PGCOND_DATA_PREFETCH = 56000, PGCOND_DATA_WRITE = 57000, #PGCOND_DATA_EXTEND = 58000, ## 60000 - TEMP ## #PGCOND_TEMP = 60000, #PGCOND_TEMP_READ = 61000, #PGCOND_TEMP_WRITE = 62000, ## 70000 - LOCK ## PGCOND_LOCK = 70000, ## 70001-70999 is reserved for lmgr ## 80000 - LWLOCK ## PGCOND_LWLOCK = 80000, ## 80001-80999 is reserved for named PGCOND_LWLOCK_BUFMAPPING = 81000, PGCOND_LWLOCK_LOCKMGR = 82000, #PGCOND_LWLOCK_PAGE = 83000, #PGCOND_LWLOCK_IO = 84000, ## 90000 - SPINLOCK ## #PGCOND_SPINLOCK = 90000, ## CONDITION_NAME # cond_name is a array for getting actual condition names. # This is initialized at starting probe. cond_name, ### LOCK_ID defines ## TODO: add PGCOND_LWLOCK:xxxx according following lodk-ids. # NUM_BUFFER_PARTITIONS = 16, # NUM_LOCK_PARTITIONS = 16, # actual lwlock-IDs # BufFreelistLock = 0, # ShmemIndexLock = 1, # OidGenLock = 2, # XidGenLock = 3, # ProcArrayLock = 4, # SInvalReadLock = 5, # SInvalWriteLock = 6, # WALInsertLock = 7, # WALWriteLock = 8, # ControlFileLock = 9, # CheckpointLock = 10, # CLogControlLock = 11, # SubtransControlLock = 12, # MultiXactGenLock = 13, # MultiXactOffsetControlLock = 14, # MultiXactMemberControlLock = 15, # RelCacheInitLock = 16, # BgWriterCommLock = 17, # TwoPhaseStateLock = 18, # TablespaceCreateLock = 19, # BtreeVacuumLock = 20, # AddinShmemInitLock = 21, # AutovacuumLock = 22, # AutovacuumScheduleLock = 23, # SyncScanLock = 24, # RelationMappingLock = 25, # AsyncCtlLock = 26, # AsyncQueueLock = 27, FirstBufMappingLock = 28, FirstLockMgrLock = 44, NumFixedLWLocks = 60 # MaxDynamicLWLock = 1000000000 probe begin { # Now this probe is starting # Initialize cond_name array cond_name[PGCOND_UNUSED] = "UNUSED_OR_IDLE" cond_name[PGCOND_CPU] = "CPU" cond_name[PGCOND_CPU_PARSE] = "CPU:PARSE" cond_name[PGCOND_CPU_REWRITE] = "CPU:REWRITE" cond_name[PGCOND_CPU_PLAN] = "CPU:PLAN" cond_name[PGCOND_CPU_EXECUTE] = "CPU:EXECUTE" cond_name[PGCOND_CPU_SORT_HEAP] = "CPU:SORT-HEAP" cond_name[PGCOND_CPU_SORT_INDEX] = "CPU:SORT_INDEX" cond_name[PGCOND_CPU_SORT_DATUM] = "CPU:SORT-DATUM" cond_name[PGCOND_XLOG_INSERT] = "XLOG:INSERT" cond_name[PGCOND_DATA_READ] = "DATA:READ" cond_name[PGCOND_DATA_WRITE] = "DATA:WRITE" cond_name[PGCOND_LOCK] = "LOCK:HEAVY-LOCK" cond_name[PGCOND_LWLOCK] = "LWLOCK:LELOCK" cond_name[PGCOND_LWLOCK_BUFMAPPING] = "LWLOCK:BUFFER-MAPPING-LOCK" cond_name[PGCOND_LWLOCK_LOCKMGR] = "LWLOCK:LOCK-MGR-LOCK" printf("probe start!\n") } ## Backend start. # TODO: could not assign "BackendStartup" to function... is systemtap bug? probe process("/usr/local/pgsql901/bin/postgres").function("PostgresMain") { printf("New backend start. pid:%d\n", pid()) run_pids[pid()] = 1 stack[pid()] = 0 pg_cond[pid(), 0] = PGCOND_UNUSED cond_cnt[PGCOND_UNUSED]++ } ## TODO # Check backends already run since before starting probe. # We can not store above process because these process not through the BackendStartup or others # # ## backend finish .. in smart probe process("/usr/local/pgsql901/bin/postgres").function("proc_exit") { if (run_pids[pid()] == 1) { delete stack[pid()] delete pg_cond[pid(), 0] delete run_pids[pid()] cond_cnt[PGCOND_UNUSED]-- printf("Backend finish. pid:%d\n", pid()) } } ## backend finish .. in force probe process("/usr/local/pgsql901/bin/postgres").function("die") { if (run_pids[pid()] == 1) { delete stack[pid()] delete pg_cond[pid(), 0] delete run_pids[pid()] cond_cnt[PGCOND_UNUSED]-- printf("Backend killed by SIGTERM. pid:%d\n", pid()) } } ## FIXME : commit action is ignored #probe process("/usr/local/pgsql901/bin/postgres").mark("transaction__commit") #{ #} ## FIXME : avort action is ignored #probe process("/usr/local/pgsql901/bin/postgres").mark("transaction__abort") #{ #} # push_condition # This function use PG_COND. Check previous PG_COND and new PG_COND, and control cond_cnt and others function push_cond(x:long) { y = pid() # Is this pid() regsitered our pid array? if (run_pids[y] == 1) { prev = pg_cond[y, stack[y]] # we change condition only when prev-PG_COND != new_PG_COND if (prev != x) { cond_cnt[prev]-- cond_cnt[x]++ } stack[y]++ pg_cond[y, stack[y]] = x # Overflow? if (stack[y] > MAX_STACK) { printf("WARNING, condtion stack overflow. pid:%d. cond:%d\n", y, x) } } } function pop_cond() { x = pid() # Is this pid() regsitered our pid array? if (run_pids[x] == 1) { # decrement old condition counter. cond_cnt[pg_cond[x, stack[x]]]-- # delete old condition element of this pid delete pg_cond[x, stack[x]] # decreament stack count stack[x]-- # Underflow? if (stack[x] < 0) { printf("WARNING, condtion stack underflow. pid:%d.\n", x) } # Now this process move pg_cond[x, stack[x]] cond_cnt[pg_cond[x, stack[x]]]++ } } probe process("/usr/local/pgsql901/bin/postgres").mark("query__start") { push_cond(PGCOND_CPU) } probe process("/usr/local/pgsql901/bin/postgres").mark("query__done") { pop_cond() } probe process("/usr/local/pgsql901/bin/postgres").mark("query__parse__start") { push_cond(PGCOND_CPU_PARSE) } probe process("/usr/local/pgsql901/bin/postgres").mark("query__parse__done") { pop_cond() } probe process("/usr/local/pgsql901/bin/postgres").mark("query__rewrite__start") { push_cond(PGCOND_CPU_REWRITE) } probe process("/usr/local/pgsql901/bin/postgres").mark("query__rewrite__done") { pop_cond() } probe process("/usr/local/pgsql901/bin/postgres").mark("query__plan__start") { push_cond(PGCOND_CPU_PLAN) } probe process("/usr/local/pgsql901/bin/postgres").mark("query__plan__done") { pop_cond() } probe process("/usr/local/pgsql901/bin/postgres").mark("query__execute__start") { push_cond(PGCOND_CPU_EXECUTE) } probe process("/usr/local/pgsql901/bin/postgres").mark("query__execute__done") { pop_cond() } probe process("/usr/local/pgsql901/bin/postgres").mark("smgr__md__read__start") { push_cond(PGCOND_DATA_READ) } probe process("/usr/local/pgsql901/bin/postgres").mark("smgr__md__read__done") { pop_cond() } probe process("/usr/local/pgsql901/bin/postgres").mark("smgr__md__write__start") { push_cond(PGCOND_DATA_WRITE) } probe process("/usr/local/pgsql901/bin/postgres").mark("smgr__md__write__done") { pop_cond() } probe process("/usr/local/pgsql901/bin/postgres").mark("sort__start") { if ($arg1 == 0) push_cond(PGCOND_CPU_SORT_HEAP) else if ($arg2 == 1) push_cond(PGCOND_CPU_SORT_INDEX) else push_cond(PGCOND_CPU_SORT_DATUM) } probe process("/usr/local/pgsql901/bin/postgres").mark("sort__done") { pop_cond() } probe process("/usr/local/pgsql901/bin/postgres").mark("lwlock__wait__start") { if ($arg1 < FirstBufMappingLock) push_cond(PGCOND_LWLOCK) ## FIXME: should be considered the lockid else if ($arg1 < FirstLockMgrLock) push_cond(PGCOND_LWLOCK_BUFMAPPING) else if ($arg1 < NumFixedLWLocks) push_cond(PGCOND_LWLOCK_LOCKMGR) else push_cond(PGCOND_LWLOCK) } probe process("/usr/local/pgsql901/bin/postgres").mark("lwlock__wait__done") { pop_cond() } probe process("/usr/local/pgsql901/bin/postgres").mark("lock__wait__start") { push_cond(PGCOND_LOCK) } probe process("/usr/local/pgsql901/bin/postgres").mark("lock__wait__done") { pop_cond() } probe process("/usr/local/pgsql901/bin/postgres").function("XLogInsert").call { push_cond(PGCOND_XLOG_INSERT) } probe process("/usr/local/pgsql901/bin/postgres").function("XLogInsert").return { pop_cond() } probe timer.s(1) { foreach(cond in cond_cnt) { result[cond] += cond_cnt[cond] } } ## probe procfs.read # This part will be called by statsinfo. probe procfs("profile").read { res # pickup and sort by high-frequency conditions foreach(cond in result) { name = cond_name[cond] # construct the return value. we set condtion name and numbers res .= sprintf("%s,%d,%d\n", name, result[cond], -1) } # store the strings and cleanup # statsinfo will read this $value $value = res delete res } ## probe procfs.write # This part is utility to reset the result. probe procfs("profile").write { delete result }