Automatic indexes recommendations in PostgreSQL

Installation

yum install pg_qualstats10.x86_64
  • Change the following in postgresql.conf and restart postgresql
shared_preload_libraries = 'pg_stat_statements, pg_qualstats'
  • Create following extensions :
testdb=# CREATE EXTENSION hypopg ;
CREATE EXTENSION
testdb=# CREATE EXTENSION pg_stat_statements ;
CREATE EXTENSION
testdb=# CREATE EXTENSION pg_qualstats;
CREATE EXTENSION
  • Set sample rate of pgqual stats to 1 in postgresql.conf . This rate define how frequently monitor and analyze the queries . Value ‘1’ represents that keep track of all queries
pg_qualstats.sample_rate = '1'
  • Create the function which will be used to detect usable indexes :
CREATE OR REPLACE FUNCTION find_usable_indexes()
RETURNS VOID AS
$$
DECLARE
l_queries record;
l_querytext text;
l_idx_def text;
l_bef_exp text;
l_after_exp text;
hypo_idx record;
l_attr record;
/* l_err int; */
BEGIN
CREATE TABLE IF NOT EXISTS public.idx_recommendations (queryid bigint,
query text, current_plan jsonb, recmnded_index text, hypo_plan jsonb);
FOR l_queries IN
SELECT t.relid, t.relname, t.queryid, t.attnames, t.attnums,
pg_qualstats_example_query(t.queryid) as query
FROM
(
SELECT qs.relid::regclass AS relname, qs.relid AS relid, qs.queryid,
string_agg(DISTINCT attnames.attnames,',') AS attnames, qs.attnums
FROM pg_qualstats_all qs
JOIN pg_qualstats q ON q.queryid = qs.queryid
JOIN pg_stat_statements ps ON q.queryid = ps.queryid
JOIN pg_amop amop ON amop.amopopr = qs.opno
JOIN pg_am ON amop.amopmethod = pg_am.oid,
LATERAL
(
SELECT pg_attribute.attname AS attnames
FROM pg_attribute
JOIN unnest(qs.attnums) a(a) ON a.a = pg_attribute.attnum
AND pg_attribute.attrelid = qs.relid
ORDER BY pg_attribute.attnum) attnames,
LATERAL unnest(qs.attnums) attnum(attnum)
WHERE NOT
(
EXISTS
(
SELECT 1
FROM pg_index i
WHERE i.indrelid = qs.relid AND
(arraycontains((i.indkey::integer[])[0:array_length(qs.attnums, 1) - 1],
qs.attnums::integer[]) OR arraycontains(qs.attnums::integer[],
(i.indkey::integer[])[0:array_length(i.indkey, 1) + 1]) AND i.indisunique)))
GROUP BY qs.relid, qs.queryid, qs.qualnodeid, qs.attnums) t
GROUP BY t.relid, t.relname, t.queryid, t.attnames, t.attnums
LOOP
/* RAISE NOTICE '% : is queryid',l_queries.queryid; */
execute 'explain (FORMAT JSON) '||l_queries.query INTO l_bef_exp;
execute 'select hypopg_reset()';
execute 'SELECT indexrelid,indexname FROM hypopg_create_index(''CREATE INDEX on '||l_queries.relname||'('||l_queries.attnames||')'')' INTO hypo_idx;
execute 'explain (FORMAT JSON) '||l_queries.query INTO l_after_exp;
execute 'select hypopg_get_indexdef('||hypo_idx.indexrelid||')' INTO l_idx_def;
INSERT INTO public.idx_recommendations (queryid,query,current_plan,recmnded_index,hypo_plan)
VALUES (l_queries.queryid,l_querytext,l_bef_exp::jsonb,l_idx_def,l_after_exp::jsonb);
END LOOP;
execute 'select hypopg_reset()';
END;
$$ LANGUAGE plpgsql;

Usage

  • Now Let’s make a table with 10 Crores rows as following :
testdb=# create table orders as select s as orderno, md5(random()::text) as orderitem , now() as order_created from generate_Series(1,100000000) s;
SELECT 100000000
  • Now lets run a query on which we want to check if need to make any index
testdb=# select * from orders where orderno = 80000  ;
-[ RECORD 1 ]-+---------------------------------
orderno | 80000
orderitem | 03b41c2f32d99e9a597010608946c4c6
order_created | 2021-05-22 17:52:21.435936+05:30
  • Now run following queries to find out which indexes are recommend by this extension and what are the improvement percentage by applying these indexes hypothetically
testdb=#           select find_usable_indexes();
find_usable_indexes
---------------------

(1 row)
testdb=# select b.query, a.recmnded_index,round((((a.current_plan->0->'Plan'->>'Total Cost')::numeric-(hypo_plan->0->'Plan'->>'Total Cost')::numeric)*100/(a.current_plan->0->'Plan'->>'Total Cost')::numeric),2) as percent_improvd FROM idx_recommendations a JOIN pg_stat_statements b ON a.queryid = b.queryid WHERE round((((current_plan->0->'Plan'->>'Total Cost')::numeric-(hypo_plan->0->'Plan'->>'Total Cost')::numeric)*100/(current_plan->0->'Plan'->>'Total Cost')::numeric),2) > 0 order by 3 desc ;
query | recmnded_index | percent_improvd
-----------------------------------------------------+------------------------------------------------------------------+-----------------
select * from orders where orderno = $1 | CREATE INDEX ON public.orders USING btree (orderno) | 100.00

--

--

SSE3 at Ameyo

Love podcasts or audiobooks? Learn on the go with our new app.

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store