We have migrated to new Google sites!
These parameters control how clients connect and authenticate to Greenplum Database.
DROP TABLE IF EXISTS dynamic_analyze_config;
CREATE TABLE dynamic_analyze_config (
schemaname TEXT NOT NULL, -- Text representation of schema name
tablename TEXT NOT NULL, -- Text representation of table name
columns TEXT[], -- Array of column names represented as text
-- objects listing the subset of columns
-- to analyze; if this field is empty, all
-- columns will be analyzed
threshold INTERVAL NOT NULL -- Interval of time after which statistics
-- for this table are considered stale
)
WITH (APPENDONLY=FALSE)
DISTRIBUTED RANDOMLY
;
-- This code is provided on an "as is" basis as an example of pothential
-- functionality, without warntees or conditions of any kind, either express or
-- implied, including, without limitation, any warranties or conditions of
-- title, non-infringement, merchantability, or fitness for a particular
-- purpose. As the user of this code you are solely responsible for determining
-- the appropriateness of using this code and mitigating risks associated with
-- it.
-- Usage forms:
--
-- SELECT * FROM dynamic_analyze_targeted(schema_name, max_analyzes);
-- - Run ANALYZE against at most max_analyzes tables belonging to the schema
-- named by schema_name that are configured in the dynamic_analyze_config
-- table. If max_analyzes is not a positive integer, no such limit will
-- be observed. Table analysis will be prioritized based upon how much
-- time has elapsed since each table has been analyzed relative to the
-- staleness threshold configured in dynamic_analyze_config.
--
-- SELECT * FROM dynamic_analyze_targeted(schema_name, max_analyzes, runtime_limit);
-- - Run ANALYZE against tables belonging to the schema named by schema_name
-- that are configured in the dynamic_analyze_config table until either:
-- 1) the number of tables that have been analyzed equals/exceeds
-- max_analyzes (assuming max_analyzes is a positive integer) or
-- 2) the total runtime after the completion of the most recent ANALYZE
-- has exceeded the runtime_limit parameter (interpreted as an
-- INTERVAL)
-- As with the other usage form, table analysis will be prioritized based
-- upon how much time has elapsed since each table has been analyzed
-- relative to the staleness threshold configured in
-- dynamic_analyze_config.
CREATE OR REPLACE FUNCTION dynamic_analyze_targeted (
schema_name TEXT, size int
) RETURNS TABLE(
tablename TEXT,
analyze_start TIMESTAMP,
analyze_stop TIMESTAMP,
time_minutes NUMERIC
) AS $$
BEGIN
FOR tablename, analyze_start, analyze_stop, time_minutes IN
SELECT * FROM dynamic_analyze_targeted (
schema_name, size, '-1 SECONDS'
)
LOOP
RETURN NEXT;
END LOOP;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION dynamic_analyze_targeted (
schema_name TEXT, size int, runtime_limit_text TEXT
) RETURNS TABLE(
tablename TEXT,
analyze_start TIMESTAMP,
analyze_stop TIMESTAMP,
time_minutes NUMERIC
) AS $$
DECLARE
cmd TEXT;
runtime_limit INTERVAL;
start_ts TIMESTAMP;
columns TEXT[];
columnstring TEXT;
tablesdone INTEGER;
BEGIN
tablesdone := 0;
start_ts := CLOCK_TIMESTAMP();
runtime_limit := runtime_limit_text::INTERVAL;
FOR tablename, columns IN
SELECT
n.nspname||'.'||c.relname,
tgts.columns
FROM
-- Assemble a list of all objects known to the catalog
pg_class c INNER JOIN pg_namespace n
ON (c.relnamespace = n.oid)
-- Determine when each object has last been analyzed
LEFT OUTER JOIN (
SELECT DISTINCT
objid,
statime
FROM
pg_stat_last_operation
WHERE
staactionname in ('ANALYZE')
) a
ON (a.objid = c.oid)
-- Get a list of objects explicitly named in the dynamic_analyze_config table
LEFT OUTER JOIN dynamic_analyze_config tgts ON (
n.nspname = tgts.schemaname AND
c.relname = tgts.tablename
)
-- Consider only tables in the named schema that have never been analyzed, or
-- for whom the timestamp of the last ANALYZE operation is outside of the
-- staleness interval set in dynamic_analyze_config
WHERE
n.nspname = schema_name
AND (
a.statime IS NULL
OR NOW()-a.statime-tgts.threshold > '0 SECONDS'::INTERVAL
)
-- Consider only tables explicitly named in dynamic_analyze_config
AND tgts.tablename IS NOT NULL
-- Prioritize tables whose statistics are more out-of-date
ORDER BY NOW()-a.statime-tgts.threshold DESC
-- Keep running until no tables remain to analyze, until the runtime
-- threshold has been exceeded, or until the table maximum has been hit.
LOOP
EXIT WHEN runtime_limit > '0 SECONDS'::INTERVAL AND CLOCK_TIMESTAMP() - start_ts > runtime_limit;
EXIT WHEN size > 0 AND tablesdone >= size;
-- Build and execute the analyze statement, taking into account whether a column
-- subset was configured in dynamic_analyze_config.
IF columns IS NULL OR columns = '{}' THEN
columnstring := '';
ELSE
columnstring := '('||array_to_string(columns, ',')||')';
END IF;
cmd := 'ANALYZE '||tablename||' '||columnstring||';';
analyze_start := CLOCK_TIMESTAMP();
EXECUTE cmd;
analyze_stop := CLOCK_TIMESTAMP();
tablesdone := tablesdone + 1;
time_minutes := EXTRACT(EPOCH FROM analyze_stop - analyze_start) / 60.0;
RETURN NEXT;
END LOOP;
END;
$$ LANGUAGE plpgsql;
-- This code is provided on an "as is" basis as an example of pothential
-- functionality, without warntees or conditions of any kind, either express or
-- implied, including, without limitation, any warranties or conditions of
-- title, non-infringement, merchantability, or fitness for a particular
-- purpose. As the user of this code you are solely responsible for determining
-- the appropriateness of using this code and mitigating risks associated with
-- it.
-- Usage forms:
--
-- SELECT * FROM dynamic_analyze_catchall(schema_name, max_analyzes, expiry);
-- - Run ANALYZE against at most max_analyzes tables belonging to the
-- schema named by schema_name that are NOT configured in the
-- dynamic_analyze_config table. If max_analyzes is not a positive
-- integer, no such limit will be observed. Tables are prioritized based
-- upon how much time has elapsed since each table has been analyzed
-- relative to the expiry parameter.
-- Each table will only be analyzed if:
-- 1) It is not configured in the dynamic_analyze_config table
-- 2) The time that has elapsed since the table was last analyzed exceeds
-- the interval specified by the expiry parameter
-- 3) If the table is part of a partitioned object, it is a lowest-level
-- child (i.e. the table actually contains data)
-- 4) The table is not a staging or swapping table (by naming convention)
--
-- SELECT * FROM dynamic_analyze_catchall(schema_name, max_analyzes, expiry, runtime_limit);
-- - Run ANALYZE against at tables belonging to the schema named by
-- schema_name that are NOT configured in the dynamic_analyze_config
-- table. Execution terminates after either:
-- 1) The number of tables analyzed equals/exceeds the max_analyzes
-- parameter (assuming it is a positive integer)
-- 2) The time elapsed since the start of the function execution exceeds
-- the interval specified by the runtime_limit parameter
-- All other constraints on individual tables as noted above apply.
CREATE OR REPLACE FUNCTION dynamic_analyze_catchall (
schema_name TEXT, size int, expiry TEXT
) RETURNS TABLE(
tablename TEXT,
analyze_start TIMESTAMP,
analyze_stop TIMESTAMP,
time_minutes NUMERIC
) AS $$
BEGIN
FOR tablename, analyze_start, analyze_stop, time_minutes IN
SELECT * FROM dynamic_analyze_catchall (
schema_name, size, expiry, '-1 SECONDS'
)
LOOP
RETURN NEXT;
END LOOP;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION dynamic_analyze_catchall (
schema_name TEXT, size int, expiry TEXT, runtime_limit_text TEXT
) RETURNS TABLE(
tablename TEXT,
analyze_start TIMESTAMP,
analyze_stop TIMESTAMP,
time_minutes NUMERIC
) AS $$
DECLARE
cmd TEXT;
runtime_limit INTERVAL;
start_ts TIMESTAMP;
tablesdone INTEGER;
BEGIN
tablesdone := 0;
start_ts := CLOCK_TIMESTAMP();
runtime_limit := runtime_limit_text::INTERVAL;
FOR cmd, tablename IN
SELECT
'analyze '||n.nspname||'.'||c.relname||';',
n.nspname||'.'||c.relname
FROM
-- Assemble a list of all objects known to the catalog
pg_class c INNER JOIN pg_namespace n
ON (c.relnamespace = n.oid)
LEFT OUTER JOIN pg_partitions pgp
ON (
pgp.schemaname = n.nspname
AND pgp.tablename = c.relname
)
-- Determine when each object has last been analyzed
LEFT OUTER JOIN (
SELECT DISTINCT
objid,
statime
FROM
pg_stat_last_operation
WHERE
staactionname in ('ANALYZE')
) a
ON (a.objid = c.oid)
-- Get a list of objects explicitly named in the dynamic_analyze_config table
LEFT OUTER JOIN dynamic_analyze_config tgts ON (
n.nspname = tgts.schemaname AND
c.relname = tgts.tablename
)
-- Restrict the list of objects to non-swap and non-staging heap tables in the
-- named schema
WHERE
n.nspname = schema_name
AND c.relname not like 'stg%' and c.relname not like 'swap%'
AND relkind in ('r')
AND c.relstorage = 'h'
-- Consider only tables that have never been analyzed, or for whom the timestamp
-- of the last ANALYZE operation is outside of the specific staleness interval
AND (
a.statime IS NULL
OR a.statime < (NOW()-expiry::INTERVAL)
)
-- If a table is partitioned, ignore the ones that are not at the lowest level
-- of partitioning (and therefore would contain no data)
AND pgp.partitiontype IS NULL
-- Ignore tables explicitly named in dynamic_analyze_config
AND tgts.tablename IS NULL
-- Prioritize each table based upon the relative staleness of its statistics
ORDER BY NOW()-a.statime-expiry::INTERVAL DESC
-- Keep running until no tables remain to analyze, until the runtime
-- threshold has been exceeded, or until the number of tables meets
-- the maximum table count.
LOOP
EXIT WHEN runtime_limit > '0 SECONDS'::INTERVAL AND CLOCK_TIMESTAMP() - start_ts > runtime_limit;
EXIT WHEN size > 0 AND tablesdone >= size;
analyze_start := CLOCK_TIMESTAMP();
EXECUTE cmd;
analyze_stop := CLOCK_TIMESTAMP();
tablesdone := tablesdone + 1;
time_minutes := EXTRACT(EPOCH FROM analyze_stop - analyze_start) / 60.0;
RETURN NEXT;
END LOOP;
END;
$$ LANGUAGE plpgsql;