We have migrated to new Google sites!
Step 1: create config table
[gpadmin@sachi ~]$ psql -d sachi
psql (8.2.15)
Type "help" for help.
sachi=#
CREATE TABLE dbadmin.smart_analyze_config (
schemaname TEXT NOT NULL, -- schema name
tablename TEXT NOT NULL, -- table name
columns TEXT, -- Comma separated list of columns to be analyzed. Default is distribution key. If this field is RANDOM or empty, all columns will be analyzed
threshold INTERVAL NOT NULL -- Interval (in hours) of time after which statistics for this table are considered stale
)
WITH (APPENDONLY=FALSE)
DISTRIBUTED RANDOMLY;
Version 2 -> Add relpages and reltuples in the table.
sachi=#
CREATE TABLE dbadmin.smart_analyze_config (
schemaname TEXT NOT NULL, -- schema name
tablename TEXT NOT NULL, -- table name
columns TEXT, -- Comma separated list of columns to be analyzed. Default is distribution key. If this field is RANDOM or empty, all columns will be analyzed
threshold INTERVAL NOT NULL, -- Interval (in hours) of time after which statistics for this table are considered stale
relpages integer not null,
reltuples real not null
)
WITH (APPENDONLY=FALSE)
DISTRIBUTED RANDOMLY;
Step 2: Insert meta data in the table
insert into dbadmin.smart_analyze_config (schemaname,tablename,columns,threshold)
SELECT pgn.nspname ,
pgc.relname ,
pga.attname ,
'24 hours'
FROM
(SELECT gdp.localoid,
CASE WHEN ( Array_upper(gdp.attrnums, 1) > 0 ) THEN Unnest(gdp.attrnums) ELSE NULL END AS attnum
FROM gp_distribution_policy gdp
ORDER BY gdp.localoid) AS distrokey
INNER JOIN pg_class AS pgc ON distrokey.localoid = pgc.oid
INNER JOIN pg_namespace pgn ON pgc.relnamespace = pgn.oid
AND pgn.nspname NOT IN ('gp_toolkit', 'information_schema', 'pg_catalog')
AND pgc.relkind='r'
AND pgc.relstorage!='x'
LEFT OUTER JOIN pg_attribute pga ON distrokey.attnum = pga.attnum
AND distrokey.localoid = pga.attrelid
ORDER BY pgn.nspname, pgc.relname;
Version 2 -> Add relpages and reltuples in the table.
insert into smart_analyze_config (schemaname,tablename,columns,threshold,relpages,reltuples)
SELECT
pgn.nspname ,
pgc.relname ,
pga.attname ,
'72 hours',
pgc.relpages,
pgc.reltuples
FROM
(SELECT gdp.localoid,
CASE WHEN ( Array_upper(gdp.attrnums, 1) > 0 ) THEN Unnest(gdp.attrnums) ELSE NULL END AS attnum
FROM gp_distribution_policy gdp
ORDER BY gdp.localoid) AS distrokey
INNER JOIN pg_class AS pgc ON distrokey.localoid = pgc.oid
INNER JOIN pg_namespace pgn ON pgc.relnamespace = pgn.oid
AND pgn.nspname NOT IN ('gp_toolkit', 'information_schema', 'pg_catalog')
AND pgc.relkind='r' AND pgc.relstorage!='x'
LEFT OUTER JOIN pg_attribute pga ON distrokey.attnum = pga.attnum
AND distrokey.localoid = pga.attrelid ORDER BY pgn.nspname, pgc.relname;
INSERT 0 275
Time: 103.499 ms
Step 3: Create smart analyze SQL and look at the output. make any corrections if you see something not right.
select schemaname,tablename,lastnalyzed,
CASE
WHEN columns is NULL THEN 'ANALYZE '||'"'||schemaname||'"'||'.'||'"'||tablename||'"'||';'
ELSE 'ANALYZE '||'"'||schemaname||'"'||'.'||'"'||tablename||'"'||'('||columns||')'||';' END as analyzecommand
from
(
select pso.schemaname,pso.objname as tablename,columns,max(pso.statime) as lastnalyzed,max(sac.threshold) as thresholdinterval
from pg_stat_operations pso ,smart_analyze_config sac
where pso.schemaname=sac.schemaname
and pso.objname=sac.tablename
and pso.actionname='ANALYZE'
group by 1,2,3
order by 3 desc
) as a
where now()-lastnalyzed>thresholdinterval
order by 3 asc;
Step 4: Save above SQL in a file smartanalyze.sql
Step 6: From the gpadmin login run following command
[gpadmin@sachi scripts]$ psql -d sachi -Alt -f smartanalyze.sql|awk '{ print $8,$9 }'|psql -X -d sachi