SQL to display table and index bloat in Greenplum

The easiest way to monitor table bloat is run SQL

select * from gp_toolkit.gp_bloat_diag;

Here is the SQL query that runs behind it.

SELECT current_database(), schemaname, tablename, /*reltuples::bigint, relpages::bigint, otta,*/ ROUND(CASE WHEN otta=0 THEN 0.0 ELSE sml.relpages/otta::numeric END,1) AS tbloat, CASE WHEN relpages < otta THEN 0 ELSE bs*(sml.relpages-otta)::bigint END AS wastedbytes, iname, /*ituples::bigint, ipages::bigint, iotta,*/ ROUND(CASE WHEN iotta=0 OR ipages=0 THEN 0.0 ELSE ipages/iotta::numeric END,1) AS ibloat, CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta) END AS wastedibytes FROM ( SELECT schemaname, tablename, cc.reltuples, cc.relpages, bs, CEIL((cc.reltuples*((datahdr+ma- (CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::float)) AS otta, COALESCE(c2.relname,'?') AS iname, COALESCE(c2.reltuples,0) AS ituples, COALESCE(c2.relpages,0) AS ipages, COALESCE(CEIL((c2.reltuples*(datahdr-12))/(bs-20::float)),0) AS iotta -- very rough approximation, assumes all cols FROM ( SELECT ma,bs,schemaname,tablename, (datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr, (maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2 FROM ( SELECT schemaname, tablename, hdr, ma, bs, SUM((1-null_frac)*avg_width) AS datawidth, MAX(null_frac) AS maxfracsum, hdr+( SELECT 1+count(*)/8 FROM pg_stats s2 WHERE null_frac<>0 AND s2.schemaname = s.schemaname AND s2.tablename = s.tablename ) AS nullhdr FROM pg_stats s, ( SELECT (SELECT current_setting('block_size')::numeric) AS bs, CASE WHEN substring(v,12,3) IN ('8.0','8.1','8.2') THEN 27 ELSE 23 END AS hdr, CASE WHEN v ~ 'mingw32' THEN 8 ELSE 4 END AS ma FROM (SELECT version() AS v) AS foo ) AS constants GROUP BY 1,2,3,4,5 ) AS foo ) AS rs JOIN pg_class cc ON cc.relname = rs.tablename JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = rs.schemaname AND nn.nspname <> 'information_schema' LEFT JOIN pg_index i ON indrelid = cc.oid LEFT JOIN pg_class c2 ON c2.oid = i.indexrelid ) AS sml ORDER BY wastedbytes DESC

select current_database() as dbname , relid , schemaname , tablename , round(bloat,1) as bloat_ratio , pg_size_pretty(expbytes) as expected_size , pg_size_pretty(relbytes) as relation_size , pg_size_pretty(wastedbytes) as wasted_space , round(1.0 * relbytes / nullif((sum(relbytes) over ())::bigint,0),4) as relative_size , round((bloat-1) * (1.0 * relbytes / nullif((sum(relbytes) over ())::bigint, 0)), 8) as concern from ( SELECT relid , schemaname , tablename , CASE WHEN otta=0 THEN 0.0 ELSE sml.relpages/otta::numeric END AS bloat , reltuples::bigint , relpages::bigint , otta , (bs*otta)::bigint as expbytes , CASE WHEN relpages < otta THEN 0 ELSE (bs*(sml.relpages-otta))::bigint END AS wastedbytes , CASE WHEN relpages < otta THEN 0 ELSE relpages::bigint - otta END AS wastedpages , (bs*relpages)::bigint as relbytes FROM ( SELECT schemaname , tablename , cc.oid as relid , cc.reltuples , cc.relpages , bs , CEIL((cc.reltuples*((datahdr+ma-(CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::float)) AS otta FROM ( SELECT ma , bs , schemaname , tablename , (datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr , (maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2 FROM ( SELECT schemaname , tablename , hdr , ma , bs , SUM((1-s.null_frac)*avg_width) AS datawidth , MAX(s.null_frac) AS maxfracsum , hdr+(1+(count(case when s.null_frac<>0 then 1 end))/8) as nullhdr FROM pg_stats s cross join ( SELECT current_setting('block_size')::numeric AS bs , CASE WHEN substring(version(),12,3) IN ('8.0','8.1','8.2') THEN 27 ELSE 23 END AS hdr , CASE WHEN version() ~ 'mingw32' THEN 8 ELSE 4 END AS ma ) AS constants GROUP BY schemaname, tablename, hdr, ma, bs ) AS foo ) AS rs JOIN pg_class cc ON cc.relname = rs.tablename and cc.relkind = 'r' JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = rs.schemaname ) AS sml ) wrapper where wastedbytes > 2*1024*1024 and bloat >= 1.4 order by (bloat-1) * (1.0 * relbytes / nullif((sum(relbytes) over ())::bigint, 0)) desc;

Table and Index Bloat view

CREATE OR REPLACE VIEW bloat AS

      SELECT

        schemaname, tablename, reltuples::bigint, relpages::bigint, otta,

        ROUND(CASE WHEN otta=0 THEN 0.0 ELSE sml.relpages/otta::numeric END,1) AS tbloat,

        relpages::bigint - otta AS wastedpages,

        bs*(sml.relpages-otta)::bigint AS wastedbytes,

        pg_size_pretty((bs*(relpages-otta))::bigint) AS wastedsize,

        iname, ituples::bigint, ipages::bigint, iotta,

        ROUND(CASE WHEN iotta=0 OR ipages=0 THEN 0.0 ELSE ipages/iotta::numeric END,1) AS ibloat,

        CASE WHEN ipages < iotta THEN 0 ELSE ipages::bigint - iotta END AS wastedipages,

        CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta) END AS wastedibytes,

        CASE WHEN ipages < iotta THEN pg_size_pretty(0) ELSE pg_size_pretty((bs*(ipages-iotta))::bigint) END AS wastedisize

      FROM (

        SELECT

          schemaname, tablename, cc.reltuples, cc.relpages, bs,

          CEIL((cc.reltuples*((datahdr+ma-

            (CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::float)) AS otta,

          COALESCE(c2.relname,'?') AS iname, COALESCE(c2.reltuples,0) AS ituples, COALESCE(c2.relpages,0) AS ipages,

          COALESCE(CEIL((c2.reltuples*(datahdr-12))/(bs-20::float)),0) AS iotta -- very rough approximation, assumes all cols

        FROM (

          SELECT

            ma,bs,schemaname,tablename,

            (datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr,

            (maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2

          FROM (

            SELECT

              schemaname, tablename, hdr, ma, bs,

              SUM((1-null_frac)*avg_width) AS datawidth,

              MAX(null_frac) AS maxfracsum,

              hdr+(

                SELECT 1+count(*)/8

                FROM pg_stats s2

                WHERE null_frac<>0 AND s2.schemaname = s.schemaname AND s2.tablename = s.tablename

              ) AS nullhdr

            FROM pg_stats s, (

              SELECT

                (SELECT current_setting('block_size')::numeric) AS bs,

                CASE WHEN substring(v,12,3) IN ('8.0','8.1','8.2') THEN 27 ELSE 23 END AS hdr,

                CASE WHEN v ~ 'mingw32' THEN 8 ELSE 4 END AS ma

              FROM (SELECT version() AS v) AS foo

            ) AS constants

            GROUP BY 1,2,3,4,5

          ) AS foo

        ) AS rs

        JOIN pg_class cc ON cc.relname = rs.tablename

        JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = rs.schemaname

        LEFT JOIN pg_index i ON indrelid = cc.oid

        LEFT JOIN pg_class c2 ON c2.oid = i.indexrelid

      ) AS sml

      WHERE sml.relpages - otta > 0 OR ipages - iotta > 10

      ORDER BY wastedbytes DESC, wastedibytes DESC;