postgresources – Telegram
Channel created
#math

SELECT ROUND(123.456);
# => 123

SELECT ROUND(123.456, 2);
# => 123.46

SELECT ROUND(123.456, -2);
# => 100

SELECT ROUND(123.456, -1);
# => 120

# This also works in MySQL, Oracle and SQL Server, though not SQLite.
#test Create one million rows

insert into towns (
code, article, name, department
)
select
left(md5(i::text), 10),
md5(random()::text),
md5(random()::text),
left(md5(random()::text), 4)
from generate_series(1, 1000000) s(i)
#analyze See how much space your tables (and indexes!) are taking up

SELECT
relname AS table_name,
pg_size_pretty(pg_total_relation_size(relid)) AS total,
pg_size_pretty(pg_relation_size(relid)) AS internal,
pg_size_pretty(pg_table_size(relid) - pg_relation_size(relid)) AS external,
pg_size_pretty(pg_indexes_size(relid)) AS indexes
FROM pg_catalog.pg_statio_user_tables ORDER BY pg_total_relation_size(relid) DESC;
#conf How to see the elapsed time for your queries in psql

\timing

If you really like this and want it to be permanent, create a .psqlrc
#analyze The IO statistics for each of your tables, including how many 'blocks' of data have been served from the cache (i.e. heap_blks_hit).

SELECT * FROM pg_statio_user_tables;

WITH y AS (
SELECT
sum(heap_blks_read) AS read,
sum(heap_blks_hit) AS hit
FROM pg_statio_user_tables
) SELECT read, hit, hit / (hit+read)
AS ratio FROM y;

Our new query has added together all of the relevant numbers to show how many cache hits occurred, how often the cache *didn't* deliver (heap_blks_read is the number of disk blocks read) and we calculate the ratio of hits to the total number of blocks read.

In this case, our cache is working effectively as the hit rate is *almost* 100%. But in a low memory environment or when a server is under particular strain, you may see lower numbers. Take note of these and be prepared to upgrade your database server.
#rand Selecting a 'sample' of a table's rows

create a table that contains 1000 rows — the numbers 1 to 1000:

CREATE TABLE numbers (number int);
INSERT INTO numbers (number)
SELECT id FROM generate_series(1,1000) AS id;

Postgres 9.5 introduced a new TABLESAMPLE clause that lets you sample tables in different ways (2 ways by default, but more can be added via extensions). Let's give it a go at retrieving a random 0.5% of the rows from our table

SELECT * FROM numbers TABLESAMPLE BERNOULLI (0.5);
number
--------
101
213
278
433

The BERNOULLI here selects rows from the table with a 0.5% chance (so statistically we'd get 5 rows on average), but there's also a more efficient SYSTEMmethod that uses a cruder block-based technique (with the possible downside that it tends to return ranges/groups of rows within the table).

You can also use TABLESAMPLE with other SQL statements like UPDATE or `DELETE`if, say, you wanted to delete 30% of a table's rows for whatever reason.
#json

# “is a subset of”
SELECT * FROM json_test WHERE data @> '{"a":1}';

SELECT * FROM json_test WHERE data <@ '{"a":1}';

# test for the existence of a key
SELECT * FROM json_test WHERE data ? 'a';

# test for objects that have any of a list of keys:
SELECT * FROM json_test WHERE data ?| array['a', 'b'];
#array In Postgres syntax, the part '{1,2,3}' is a plain string, :: is the Postgres typecast operator, and int[] is the type (an array of integers). The expression '{1,2,3}'::int[] reads as “typecast the string {1,2,3} into an array of integers”.

The string notation of curly brackets and comma-separated values is an array value constructor recognized by Postgres.

test=# SELECT '{1,2,3}'::int[] ; -- an array of integers
int4
---------
{1,2,3}
(1 row)

test=# SELECT '{"hello","world"}'::text[] ; -- an array of strings
text
---------------
{hello,world}
(1 row)
#array

test=# SELECT array[1,2,3];
array
---------
{1,2,3}
(1 row)

test=# SELECT array['hello', 'world'];
array
---------------
{hello,world}
(1 row)

test=# SELECT array[1, 'world'];
ERROR: invalid input syntax for integer: "world"
LINE 1: SELECT array[1, 'world'];
#array Here is how you can have an array as a column type in a table:

test=# CREATE TABLE posts (
test(# noscript text NOT NULL PRIMARY KEY,
test(# tagz text[]
test(# tags text[] NOT NULL DEFAULT '{}'::text[]
test(# );
CREATE TABLE

test=# INSERT INTO posts (noscript, tags) VALUES
test-# ('Using PostgreSQL Arrays with Golang', '{"postgres","golang"}');
INSERT 0 1
#index #bloom

Let us create a 1,000,000 row 57 MB table with 5 random integer columns:

CREATE TABLE foo(id SERIAL, c1 INT, c2 INT, c3 INT, c4 INT, c5 INT,
it TIMESTAMP NOT NULL DEFAULT NOW());
INSERT INTO foo(c1, c2, c3, c4, c5)
SELECT random() * 1000000, random() * 1000000, random() * 1000000,
random() * 1000000, random() * 1000000
FROM generate_series(1, 1000000);

The following query takes 110.2 ms:

SELECT * FROM foo WHERE c1 = 171267 AND c4=65555;
-- Seq Scan on foo
-- Filter: ((c1 = 171267) AND (c4 = 65555))
-- Rows Removed by Filter: 999999

After creating a 13 MB Bloom index on the five columns:

CREATE INDEX foo_c1c2c3c4c5
ON foo USING bloom(c1, c2, c3, c4, c5)
WITH (length=64, col1=3, col2=3, col3=3, col4=3, col5=3);

The same query takes 9.3 ms:

SELECT * FROM foo WHERE c1 = 171267 AND c4=65555;
-- Bitmap Heap Scan on foo
-- Recheck Cond: ((c1 = 171267) AND (c4 = 65555))
-- Rows Removed by Index Recheck: 232
-- Heap Blocks: exact=232
-- -> Bitmap Index Scan on foo_c1c2c3c4c5
-- Index Cond: ((c1 = 171267) AND (c4 = 65555))

The index provides a 10 to 20 speedup to these queries on my SSD laptop.
#date

date_trunc truncates timestamps to a certain level of precision, such as a specific hour, second, day, week, year, or so on.


CREATE TABLE orders (created_at timestamp);

INSERT INTO orders VALUES
('2019-01-01 04:05:33'),
('2019-01-01 09:05:51'),
('2019-01-03 02:55:14'),
('2019-01-08 14:12:07');

SELECT date_trunc('day', created_at) AS day,
COUNT(created_at) FROM orders
GROUP BY day
ORDER BY day;

day | count
----------------------------
2019-01-01T00:0 ... | 2
2019-01-08T00:0 ... | 1
2019-01-03T00:0 ... | 1