Dynamic Analyze Configuration and Functions

The gadget spec URL could not be found
These parameters control how clients connect and authenticate to Greenplum Database.
The gadget spec URL could not be found
DROP TABLE IF EXISTS dynamic_analyze_config;
CREATE TABLE dynamic_analyze_config (
    schemaname TEXT NOT NULL,      -- Text representation of schema name
    tablename TEXT NOT NULL,       -- Text representation of table name
    columns TEXT[],                -- Array of column names represented as text
                                   --   objects listing the subset of columns
                                   --   to analyze; if this field is empty, all
                                   --   columns will be analyzed
    threshold INTERVAL NOT NULL    -- Interval of time after which statistics
                                   --   for this table are considered stale
)
    WITH (APPENDONLY=FALSE)
    DISTRIBUTED RANDOMLY
;
The gadget spec URL could not be found
-- This code is provided on an "as is" basis as an example of pothential
-- functionality, without warntees or conditions of any kind, either express or
-- implied, including, without limitation, any warranties or conditions of
-- title, non-infringement, merchantability, or fitness for a particular
-- purpose. As the user of this code you are solely responsible for determining
-- the appropriateness of using this code and mitigating risks associated with
-- it. 


-- Usage forms:
--
--     SELECT * FROM dynamic_analyze_targeted(schema_name, max_analyzes);
--     - Run ANALYZE against at most max_analyzes tables belonging to the schema
--       named by schema_name that are configured in the dynamic_analyze_config
--       table.  If max_analyzes is not a positive integer, no such limit will
--       be observed.  Table analysis will be prioritized based upon how much
--       time has elapsed since each table has been analyzed relative to the
--       staleness threshold configured in dynamic_analyze_config.
--
--     SELECT * FROM dynamic_analyze_targeted(schema_name, max_analyzes, runtime_limit);
--     - Run ANALYZE against tables belonging to the schema named by schema_name
--       that are configured in the dynamic_analyze_config table until either:
--       1) the number of tables that have been analyzed equals/exceeds
--          max_analyzes (assuming max_analyzes is a positive integer) or
--       2) the total runtime after the completion of the most recent ANALYZE
--          has exceeded the runtime_limit parameter (interpreted as an
--          INTERVAL)
--       As with the other usage form, table analysis will be prioritized based
--       upon how much time has elapsed since each table has been analyzed
--       relative to the staleness threshold configured in
--       dynamic_analyze_config.

CREATE OR REPLACE FUNCTION dynamic_analyze_targeted (
schema_name TEXT, size int 
) RETURNS TABLE(
tablename TEXT,
analyze_start TIMESTAMP,
analyze_stop TIMESTAMP,
time_minutes NUMERIC
) AS $$
BEGIN
FOR tablename, analyze_start, analyze_stop, time_minutes IN
SELECT * FROM dynamic_analyze_targeted (
schema_name, size, '-1 SECONDS'
)
LOOP
RETURN NEXT;
END LOOP;
END;
$$ LANGUAGE plpgsql;
The gadget spec URL could not be found

CREATE OR REPLACE FUNCTION dynamic_analyze_targeted (
schema_name TEXT, size int, runtime_limit_text TEXT
) RETURNS TABLE(
tablename TEXT,
analyze_start TIMESTAMP,
analyze_stop TIMESTAMP,
time_minutes NUMERIC
) AS $$
DECLARE
cmd TEXT; 
runtime_limit INTERVAL;
start_ts TIMESTAMP;
columns TEXT[];
columnstring TEXT;
tablesdone INTEGER;
BEGIN
tablesdone := 0;
start_ts := CLOCK_TIMESTAMP();
runtime_limit := runtime_limit_text::INTERVAL;
FOR tablename, columns IN
SELECT 
n.nspname||'.'||c.relname,
tgts.columns
FROM

-- Assemble a list of all objects known to the catalog
pg_class c INNER JOIN pg_namespace n
ON (c.relnamespace = n.oid)

-- Determine when each object has last been analyzed
LEFT OUTER JOIN (
SELECT DISTINCT
objid,
statime
FROM
pg_stat_last_operation 
WHERE
staactionname in ('ANALYZE')
) a
ON (a.objid = c.oid)

-- Get a list of objects explicitly named in the dynamic_analyze_config table
LEFT OUTER JOIN dynamic_analyze_config tgts ON (
n.nspname = tgts.schemaname AND
c.relname = tgts.tablename
)


-- Consider only tables in the named schema that have never been analyzed, or
-- for whom the timestamp of the last ANALYZE operation is outside of the
-- staleness interval set in dynamic_analyze_config
WHERE
n.nspname = schema_name
AND (
a.statime IS NULL
OR NOW()-a.statime-tgts.threshold > '0 SECONDS'::INTERVAL
)


-- Consider only tables explicitly named in dynamic_analyze_config
AND tgts.tablename IS NOT NULL

-- Prioritize tables whose statistics are more out-of-date
ORDER BY NOW()-a.statime-tgts.threshold DESC

-- Keep running until no tables remain to analyze, until the runtime
-- threshold has been exceeded, or until the table maximum has been hit.
LOOP
EXIT WHEN runtime_limit > '0 SECONDS'::INTERVAL AND CLOCK_TIMESTAMP() - start_ts > runtime_limit;
EXIT WHEN size > 0 AND tablesdone >= size;

-- Build and execute the analyze statement, taking into account whether a column
-- subset was configured in dynamic_analyze_config.
IF columns IS NULL OR columns = '{}' THEN
columnstring := '';
ELSE
columnstring := '('||array_to_string(columns, ',')||')';
END IF;
cmd := 'ANALYZE '||tablename||' '||columnstring||';';
analyze_start := CLOCK_TIMESTAMP();
EXECUTE cmd;
analyze_stop := CLOCK_TIMESTAMP();
tablesdone := tablesdone + 1;
time_minutes := EXTRACT(EPOCH FROM analyze_stop - analyze_start) / 60.0;
RETURN NEXT;
END LOOP;
END;
$$ LANGUAGE plpgsql;

The gadget spec URL could not be found
-- This code is provided on an "as is" basis as an example of pothential
-- functionality, without warntees or conditions of any kind, either express or
-- implied, including, without limitation, any warranties or conditions of
-- title, non-infringement, merchantability, or fitness for a particular
-- purpose. As the user of this code you are solely responsible for determining
-- the appropriateness of using this code and mitigating risks associated with
-- it. 


-- Usage forms:
--
--     SELECT * FROM dynamic_analyze_catchall(schema_name, max_analyzes, expiry);
--     - Run ANALYZE against at most max_analyzes tables belonging to the
--       schema named by schema_name that are NOT configured in the
--       dynamic_analyze_config table.  If max_analyzes is not a positive
--       integer, no such limit will be observed.  Tables are prioritized based
--       upon how much time has elapsed since each table has been analyzed
--       relative to the expiry parameter.
--       Each table will only be analyzed if:
--       1) It is not configured in the dynamic_analyze_config table
--       2) The time that has elapsed since the table was last analyzed exceeds
--          the interval specified by the expiry parameter
--       3) If the table is part of a partitioned object, it is a lowest-level
--          child (i.e. the table actually contains data)
--       4) The table is not a staging or swapping table (by naming convention)
--
--     SELECT * FROM dynamic_analyze_catchall(schema_name, max_analyzes, expiry, runtime_limit);
--     - Run ANALYZE against at tables belonging to the schema named by
--       schema_name that are NOT configured in the dynamic_analyze_config
--       table.  Execution terminates after either:
--       1) The number of tables analyzed equals/exceeds the max_analyzes
--          parameter (assuming it is a positive integer)
--       2) The time elapsed since the start of the function execution exceeds
--          the interval specified by the runtime_limit parameter
--       All other constraints on individual tables as noted above apply.


CREATE OR REPLACE FUNCTION dynamic_analyze_catchall (
schema_name TEXT, size int, expiry TEXT
) RETURNS TABLE(
tablename TEXT,
analyze_start TIMESTAMP,
analyze_stop TIMESTAMP,
time_minutes NUMERIC
) AS $$
BEGIN
FOR tablename, analyze_start, analyze_stop, time_minutes IN
SELECT * FROM dynamic_analyze_catchall (
schema_name, size, expiry, '-1 SECONDS'
)
LOOP
RETURN NEXT;
END LOOP;
END;
$$ LANGUAGE plpgsql;
The gadget spec URL could not be found

CREATE OR REPLACE FUNCTION dynamic_analyze_catchall (
schema_name TEXT, size int, expiry TEXT, runtime_limit_text TEXT
) RETURNS TABLE(
tablename TEXT,
analyze_start TIMESTAMP,
analyze_stop TIMESTAMP,
time_minutes NUMERIC
) AS $$
DECLARE
cmd TEXT; 
runtime_limit INTERVAL;
start_ts TIMESTAMP;
tablesdone INTEGER;
BEGIN
tablesdone := 0;
start_ts := CLOCK_TIMESTAMP();
runtime_limit := runtime_limit_text::INTERVAL;
FOR cmd, tablename IN
SELECT 
'analyze '||n.nspname||'.'||c.relname||';',
n.nspname||'.'||c.relname
FROM

-- Assemble a list of all objects known to the catalog
pg_class c INNER JOIN pg_namespace n
ON (c.relnamespace = n.oid)
LEFT OUTER JOIN pg_partitions pgp
ON (
pgp.schemaname = n.nspname
AND pgp.tablename = c.relname
)

-- Determine when each object has last been analyzed
LEFT OUTER JOIN (
SELECT DISTINCT
objid,
statime
FROM
pg_stat_last_operation 
WHERE
staactionname in ('ANALYZE')
) a
ON (a.objid = c.oid)

-- Get a list of objects explicitly named in the dynamic_analyze_config table
LEFT OUTER JOIN dynamic_analyze_config tgts ON (
n.nspname = tgts.schemaname AND
c.relname = tgts.tablename
)

-- Restrict the list of objects to non-swap and non-staging heap tables in the
-- named schema
WHERE
n.nspname = schema_name
AND c.relname not like 'stg%' and c.relname not like 'swap%'
AND relkind in ('r')
AND c.relstorage = 'h'

-- Consider only tables that have never been analyzed, or for whom the timestamp
-- of the last ANALYZE operation is outside of the specific staleness interval
AND (
a.statime IS NULL
OR a.statime < (NOW()-expiry::INTERVAL)
)

-- If a table is partitioned, ignore the ones that are not at the lowest level
-- of partitioning (and therefore would contain no data)
AND pgp.partitiontype IS NULL

-- Ignore tables explicitly named in dynamic_analyze_config
AND tgts.tablename IS NULL

-- Prioritize each table based upon the relative staleness of its statistics
ORDER BY NOW()-a.statime-expiry::INTERVAL DESC

-- Keep running until no tables remain to analyze, until the runtime
-- threshold has been exceeded, or until the number of tables meets
-- the maximum table count.
LOOP
EXIT WHEN runtime_limit > '0 SECONDS'::INTERVAL AND CLOCK_TIMESTAMP() - start_ts > runtime_limit;
EXIT WHEN size > 0 AND tablesdone >= size;
analyze_start := CLOCK_TIMESTAMP();
EXECUTE cmd;
analyze_stop := CLOCK_TIMESTAMP();
tablesdone := tablesdone + 1;
time_minutes := EXTRACT(EPOCH FROM analyze_stop - analyze_start) / 60.0;
RETURN NEXT;
END LOOP;
END;
$$ LANGUAGE plpgsql;
The gadget spec URL could not be found
The gadget spec URL could not be found


The gadget spec URL could not be found




The gadget spec URL could not be found


The gadget spec URL could not be found


The gadget spec URL could not be found

Comments