SQL to monitor health check of Greenplum database

-- check missing stats report

select * from gp_toolkit.gp_stats_missing;

 

 -- check bloat diagnosis report

select * from gp_toolkit.gp_bloat_diag;

 

 -- check tables with random distribution key

select table_owner,table_name, COALESCE(distribution_keys,'DISTRIBUTED RANDOMLY') distribution_keys

from

(

SELECT pgn.nspname as table_owner,

pgc.relname as table_name,

pga.attname as distribution_keys

FROM (SELECT gdp.localoid,

CASE

WHEN ( Array_upper(gdp.attrnums, 1) > 0 ) THEN

Unnest(gdp.attrnums)

ELSE NULL

END AS attnum

FROM gp_distribution_policy gdp

ORDER BY gdp.localoid) AS distrokey

INNER JOIN pg_class AS pgc

ON distrokey.localoid = pgc.oid

INNER JOIN pg_namespace pgn

ON pgc.relnamespace = pgn.oid

LEFT OUTER JOIN pg_attribute pga

ON distrokey.attnum = pga.attnum

AND distrokey.localoid = pga.attrelid

ORDER BY pgn.nspname,

pgc.relname) as a where COALESCE(distribution_keys,'DISTRIBUTED RANDOMLY')='DISTRIBUTED RANDOMLY';

 

-- Check data distribution skew by calculating the coefficient of variation (CV) for the data stored on each segment.

select * from gp_toolkit.gp_skew_coefficients;

 

-- check data distribution skew by calculating the percentage of the system that is idle during a table scan, which is an indicator of processing data skew.

select * from gp_toolkit.gp_skew_idle_fractions;

 

 

-- check table not analyzed in last 3 days

SELECT

n.nspname,

c.relname,

a.statime,

'analyze '||n.nspname||'.'||c.relname||';'

FROM

pg_class c INNER JOIN pg_namespace n

ON (c.relnamespace = n.oid)

LEFT OUTER JOIN pg_partitions pgp

ON (

pgp.schemaname = n.nspname

AND pgp.tablename = c.relname

)

LEFT OUTER JOIN (

SELECT DISTINCT

objid,

statime

FROM

pg_stat_last_operation

WHERE

staactionname in ('ANALYZE')

) a

ON (a.objid = c.oid)

WHERE

n.nspname NOT IN ('pg_catalog','information_schema','gp_toolkit')

and c.relname not like 'stg%' and c.relname not like 'swap%'

AND relkind in ('r')

AND c.relstorage = 'h'

and date_trunc('day',statime) < date_trunc('day',localtimestamp- interval '3 day')

ORDER BY reltuples DESC;

  

 

-- check worst performing SQL

\c gpperfmon

 

select sh.ctime,query_text,username, db,rsqname,

avg(tfinish-tstart) as run_time,

--date_part('hour',tfinish - tstart)*60+ date_part('minute',tfinish - tstart)

min(to_char(sh.ctime, 'Month')) as "month",

min(to_char(sh.ctime, 'dd')) as "day",

min(to_char(sh.ctime, 'Day')) weekday,

min(to_char(sh.ctime, 'hh24')) as "hour",

count(*) as cnt,

round(avg(100 - cpu_idle)::numeric,2) as avg_cpu_used,

round(min(100 - cpu_idle)::numeric,2) as min_cpu_used,

round(max(100 - cpu_idle)::numeric,2) as max_cpu_used,

round(avg(load2)::numeric,2) as avg_load2,

round(min(load2)::numeric,2) as min_load2,

round(max(load2)::numeric,2) as max_load2,

round(avg(disk_rb_rate)::numeric,2) avg_disk_rb_rate,

round(min(disk_rb_rate)::numeric,2) min_disk_rb_rate,

round(max(disk_rb_rate)::numeric,2) max_disk_rb_rate,

round(avg(disk_wb_rate)::numeric,2) avg_disk_wb_rate,

round(min(disk_wb_rate)::numeric,2) min_disk_wb_rate,

round(max(disk_wb_rate)::numeric,2) max_disk_wb_rate,

round(avg(net_rb_rate)::numeric,2) avg_net_rb_rate,

round(min(net_rb_rate)::numeric,2) min_net_rb_rate,

round(max(net_rb_rate)::numeric,2) max_net_rb_rate,

round(avg(net_wb_rate)::numeric,2) avg_net_wb_rate,

round(min(net_wb_rate)::numeric,2) min_net_wb_rate,

round(max(net_wb_rate)::numeric,2) max_net_wb_rate,

round(avg(mem_actual_used)/power(1024,3)::numeric,2) avg_mem_actual_used_gb,

round(min(mem_actual_used)/power(1024,3)::numeric,2) min_mem_actual_used_gb,

round(max(mem_actual_used)/power(1024,3)::numeric,2) max_mem_actual_used_gb,

round(avg(swap_used)::numeric,2) avg_swap_used,

round(min(swap_used)::numeric,2) min_swap_used,

round(max(swap_used)::numeric,2) max_swap_used

from system_history sh,queries_history qh

where sh.ctime between date_trunc('day',localtimestamp- interval '2 days') and date_trunc('day',localtimestamp)

and sh.ctime=qh.ctime

--and to_char(sh.ctime, 'dd')=28

--and username not in ('gpmon')

and db not in ('gpperfmon')

and date_part('hour',tfinish - tstart)*60+ date_part('minute',tfinish - tstart)> 30

group by sh.ctime,query_text,username, db,rsqname;