SQL to monitor health check of Greenplum database
-- check missing stats report
select * from gp_toolkit.gp_stats_missing;
-- check bloat diagnosis report
select * from gp_toolkit.gp_bloat_diag;
-- check tables with random distribution key
select table_owner,table_name, COALESCE(distribution_keys,'DISTRIBUTED RANDOMLY') distribution_keys
from
(
SELECT pgn.nspname as table_owner,
pgc.relname as table_name,
pga.attname as distribution_keys
FROM (SELECT gdp.localoid,
CASE
WHEN ( Array_upper(gdp.attrnums, 1) > 0 ) THEN
Unnest(gdp.attrnums)
ELSE NULL
END AS attnum
FROM gp_distribution_policy gdp
ORDER BY gdp.localoid) AS distrokey
INNER JOIN pg_class AS pgc
ON distrokey.localoid = pgc.oid
INNER JOIN pg_namespace pgn
ON pgc.relnamespace = pgn.oid
LEFT OUTER JOIN pg_attribute pga
ON distrokey.attnum = pga.attnum
AND distrokey.localoid = pga.attrelid
ORDER BY pgn.nspname,
pgc.relname) as a where COALESCE(distribution_keys,'DISTRIBUTED RANDOMLY')='DISTRIBUTED RANDOMLY';
-- Check data distribution skew by calculating the coefficient of variation (CV) for the data stored on each segment.
select * from gp_toolkit.gp_skew_coefficients;
-- check data distribution skew by calculating the percentage of the system that is idle during a table scan, which is an indicator of processing data skew.
select * from gp_toolkit.gp_skew_idle_fractions;
-- check table not analyzed in last 3 days
SELECT
n.nspname,
c.relname,
a.statime,
'analyze '||n.nspname||'.'||c.relname||';'
FROM
pg_class c INNER JOIN pg_namespace n
ON (c.relnamespace = n.oid)
LEFT OUTER JOIN pg_partitions pgp
ON (
pgp.schemaname = n.nspname
AND pgp.tablename = c.relname
)
LEFT OUTER JOIN (
SELECT DISTINCT
objid,
statime
FROM
pg_stat_last_operation
WHERE
staactionname in ('ANALYZE')
) a
ON (a.objid = c.oid)
WHERE
n.nspname NOT IN ('pg_catalog','information_schema','gp_toolkit')
and c.relname not like 'stg%' and c.relname not like 'swap%'
AND relkind in ('r')
AND c.relstorage = 'h'
and date_trunc('day',statime) < date_trunc('day',localtimestamp- interval '3 day')
ORDER BY reltuples DESC;
-- check worst performing SQL
\c gpperfmon
select sh.ctime,query_text,username, db,rsqname,
avg(tfinish-tstart) as run_time,
--date_part('hour',tfinish - tstart)*60+ date_part('minute',tfinish - tstart)
min(to_char(sh.ctime, 'Month')) as "month",
min(to_char(sh.ctime, 'dd')) as "day",
min(to_char(sh.ctime, 'Day')) weekday,
min(to_char(sh.ctime, 'hh24')) as "hour",
count(*) as cnt,
round(avg(100 - cpu_idle)::numeric,2) as avg_cpu_used,
round(min(100 - cpu_idle)::numeric,2) as min_cpu_used,
round(max(100 - cpu_idle)::numeric,2) as max_cpu_used,
round(avg(load2)::numeric,2) as avg_load2,
round(min(load2)::numeric,2) as min_load2,
round(max(load2)::numeric,2) as max_load2,
round(avg(disk_rb_rate)::numeric,2) avg_disk_rb_rate,
round(min(disk_rb_rate)::numeric,2) min_disk_rb_rate,
round(max(disk_rb_rate)::numeric,2) max_disk_rb_rate,
round(avg(disk_wb_rate)::numeric,2) avg_disk_wb_rate,
round(min(disk_wb_rate)::numeric,2) min_disk_wb_rate,
round(max(disk_wb_rate)::numeric,2) max_disk_wb_rate,
round(avg(net_rb_rate)::numeric,2) avg_net_rb_rate,
round(min(net_rb_rate)::numeric,2) min_net_rb_rate,
round(max(net_rb_rate)::numeric,2) max_net_rb_rate,
round(avg(net_wb_rate)::numeric,2) avg_net_wb_rate,
round(min(net_wb_rate)::numeric,2) min_net_wb_rate,
round(max(net_wb_rate)::numeric,2) max_net_wb_rate,
round(avg(mem_actual_used)/power(1024,3)::numeric,2) avg_mem_actual_used_gb,
round(min(mem_actual_used)/power(1024,3)::numeric,2) min_mem_actual_used_gb,
round(max(mem_actual_used)/power(1024,3)::numeric,2) max_mem_actual_used_gb,
round(avg(swap_used)::numeric,2) avg_swap_used,
round(min(swap_used)::numeric,2) min_swap_used,
round(max(swap_used)::numeric,2) max_swap_used
from system_history sh,queries_history qh
where sh.ctime between date_trunc('day',localtimestamp- interval '2 days') and date_trunc('day',localtimestamp)
and sh.ctime=qh.ctime
--and to_char(sh.ctime, 'dd')=28
--and username not in ('gpmon')
and db not in ('gpperfmon')
and date_part('hour',tfinish - tstart)*60+ date_part('minute',tfinish - tstart)> 30
group by sh.ctime,query_text,username, db,rsqname;