postgresources – Telegram
#math

SELECT ROUND(123.456);
# => 123

SELECT ROUND(123.456, 2);
# => 123.46

SELECT ROUND(123.456, -2);
# => 100

SELECT ROUND(123.456, -1);
# => 120

# This also works in MySQL, Oracle and SQL Server, though not SQLite.
#test Create one million rows

insert into towns (
code, article, name, department
)
select
left(md5(i::text), 10),
md5(random()::text),
md5(random()::text),
left(md5(random()::text), 4)
from generate_series(1, 1000000) s(i)
#analyze See how much space your tables (and indexes!) are taking up

SELECT
relname AS table_name,
pg_size_pretty(pg_total_relation_size(relid)) AS total,
pg_size_pretty(pg_relation_size(relid)) AS internal,
pg_size_pretty(pg_table_size(relid) - pg_relation_size(relid)) AS external,
pg_size_pretty(pg_indexes_size(relid)) AS indexes
FROM pg_catalog.pg_statio_user_tables ORDER BY pg_total_relation_size(relid) DESC;
#conf How to see the elapsed time for your queries in psql

\timing

If you really like this and want it to be permanent, create a .psqlrc
#analyze The IO statistics for each of your tables, including how many 'blocks' of data have been served from the cache (i.e. heap_blks_hit).

SELECT * FROM pg_statio_user_tables;

WITH y AS (
SELECT
sum(heap_blks_read) AS read,
sum(heap_blks_hit) AS hit
FROM pg_statio_user_tables
) SELECT read, hit, hit / (hit+read)
AS ratio FROM y;

Our new query has added together all of the relevant numbers to show how many cache hits occurred, how often the cache *didn't* deliver (heap_blks_read is the number of disk blocks read) and we calculate the ratio of hits to the total number of blocks read.

In this case, our cache is working effectively as the hit rate is *almost* 100%. But in a low memory environment or when a server is under particular strain, you may see lower numbers. Take note of these and be prepared to upgrade your database server.
#rand Selecting a 'sample' of a table's rows

create a table that contains 1000 rows — the numbers 1 to 1000:

CREATE TABLE numbers (number int);
INSERT INTO numbers (number)
SELECT id FROM generate_series(1,1000) AS id;

Postgres 9.5 introduced a new TABLESAMPLE clause that lets you sample tables in different ways (2 ways by default, but more can be added via extensions). Let's give it a go at retrieving a random 0.5% of the rows from our table

SELECT * FROM numbers TABLESAMPLE BERNOULLI (0.5);
number
--------
101
213
278
433

The BERNOULLI here selects rows from the table with a 0.5% chance (so statistically we'd get 5 rows on average), but there's also a more efficient SYSTEMmethod that uses a cruder block-based technique (with the possible downside that it tends to return ranges/groups of rows within the table).

You can also use TABLESAMPLE with other SQL statements like UPDATE or `DELETE`if, say, you wanted to delete 30% of a table's rows for whatever reason.
#json

# “is a subset of”
SELECT * FROM json_test WHERE data @> '{"a":1}';

SELECT * FROM json_test WHERE data <@ '{"a":1}';

# test for the existence of a key
SELECT * FROM json_test WHERE data ? 'a';

# test for objects that have any of a list of keys:
SELECT * FROM json_test WHERE data ?| array['a', 'b'];
#array In Postgres syntax, the part '{1,2,3}' is a plain string, :: is the Postgres typecast operator, and int[] is the type (an array of integers). The expression '{1,2,3}'::int[] reads as “typecast the string {1,2,3} into an array of integers”.

The string notation of curly brackets and comma-separated values is an array value constructor recognized by Postgres.

test=# SELECT '{1,2,3}'::int[] ; -- an array of integers
int4
---------
{1,2,3}
(1 row)

test=# SELECT '{"hello","world"}'::text[] ; -- an array of strings
text
---------------
{hello,world}
(1 row)
#array

test=# SELECT array[1,2,3];
array
---------
{1,2,3}
(1 row)

test=# SELECT array['hello', 'world'];
array
---------------
{hello,world}
(1 row)

test=# SELECT array[1, 'world'];
ERROR: invalid input syntax for integer: "world"
LINE 1: SELECT array[1, 'world'];
#array Here is how you can have an array as a column type in a table:

test=# CREATE TABLE posts (
test(# noscript text NOT NULL PRIMARY KEY,
test(# tagz text[]
test(# tags text[] NOT NULL DEFAULT '{}'::text[]
test(# );
CREATE TABLE

test=# INSERT INTO posts (noscript, tags) VALUES
test-# ('Using PostgreSQL Arrays with Golang', '{"postgres","golang"}');
INSERT 0 1
#index #bloom

Let us create a 1,000,000 row 57 MB table with 5 random integer columns:

CREATE TABLE foo(id SERIAL, c1 INT, c2 INT, c3 INT, c4 INT, c5 INT,
it TIMESTAMP NOT NULL DEFAULT NOW());
INSERT INTO foo(c1, c2, c3, c4, c5)
SELECT random() * 1000000, random() * 1000000, random() * 1000000,
random() * 1000000, random() * 1000000
FROM generate_series(1, 1000000);

The following query takes 110.2 ms:

SELECT * FROM foo WHERE c1 = 171267 AND c4=65555;
-- Seq Scan on foo
-- Filter: ((c1 = 171267) AND (c4 = 65555))
-- Rows Removed by Filter: 999999

After creating a 13 MB Bloom index on the five columns:

CREATE INDEX foo_c1c2c3c4c5
ON foo USING bloom(c1, c2, c3, c4, c5)
WITH (length=64, col1=3, col2=3, col3=3, col4=3, col5=3);

The same query takes 9.3 ms:

SELECT * FROM foo WHERE c1 = 171267 AND c4=65555;
-- Bitmap Heap Scan on foo
-- Recheck Cond: ((c1 = 171267) AND (c4 = 65555))
-- Rows Removed by Index Recheck: 232
-- Heap Blocks: exact=232
-- -> Bitmap Index Scan on foo_c1c2c3c4c5
-- Index Cond: ((c1 = 171267) AND (c4 = 65555))

The index provides a 10 to 20 speedup to these queries on my SSD laptop.
#date

date_trunc truncates timestamps to a certain level of precision, such as a specific hour, second, day, week, year, or so on.


CREATE TABLE orders (created_at timestamp);

INSERT INTO orders VALUES
('2019-01-01 04:05:33'),
('2019-01-01 09:05:51'),
('2019-01-03 02:55:14'),
('2019-01-08 14:12:07');

SELECT date_trunc('day', created_at) AS day,
COUNT(created_at) FROM orders
GROUP BY day
ORDER BY day;

day | count
----------------------------
2019-01-01T00:0 ... | 2
2019-01-08T00:0 ... | 1
2019-01-03T00:0 ... | 1
#generate

CREATE TABLE electrothingy
AS
SELECT
x::int AS id,
(x::int % 20000)::int AS locid, -- fake location ids in the range of 1-20000
now() AS tsin, -- static timestmap
97.5::numeric(5,2) AS temp, -- static temp
x::int AS usage -- usage the same as id not sure what we want here.
FROM generate_series(1,1728000000) -- for 1.7 billion rows
AS gs(x);
pgstattuple is a built-in Postgres extension that provides a variety of functions for looking at statistics of various objects within your database. One use is for checking how many dead rows (often caused when data is deleted from a table) there are or how much free space there is within a table.

Let's create a table with 1000 rows of data and see what info pgstattuple can give us:

CREATE TABLE numbers (id int);
INSERT INTO numbers SELECT * FROM generate_series(1,1000);

CREATE EXTENSION pgstattuple;
SELECT * FROM public.pgstattuple('numbers');

DELETE FROM numbers WHERE id < 500;
SELECT * FROM public.pgstattuple('numbers');


Now we get a dead_tuple_count of 499 — the number of rows we deleted! But the overall table_len is the same.. because of those dead rows. This can be resolved by running: VACUUM numbers;

Re-running pgstattuple at this stage will show no dead rows, but the free_space will jump up.. with table_len remaining the same! The reason for this is that VACUUM will clean up dead rows an
#locks

-- blocks all writes
CREATE INDEX items_value_idx ON items USING GIN (value jsonb_path_ops);

-- only blocks other DDL
CREATE INDEX CONCURRENTLY items_value_idx ON items USING GIN (value jsonb_path_ops);
#locks

ALTER TABLE items ADD COLUMN last_update timestamptz;

SET lock_timeout TO '2s'
ALTER TABLE items ADD COLUMN last_update timestamptz;
#locks

-- blocks queries for a long time
ALTER TABLE items ADD PRIMARY KEY (id);

-- takes a long time, but doesn’t block queries
CREATE UNIQUE INDEX CONCURRENTLY items_pk ON items (id);
-- blocks queries, but only very briefly
ALTER TABLE items ADD CONSTRAINT items_pk PRIMARY KEY USING INDEX items_pk;