#math
SELECT ROUND(123.456);
# => 123
SELECT ROUND(123.456, 2);
# => 123.46
SELECT ROUND(123.456, -2);
# => 100
SELECT ROUND(123.456, -1);
# => 120
# This also works in MySQL, Oracle and SQL Server, though not SQLite.
#test Create one million rows
insert into towns (
code, article, name, department
)
select
left(md5(i::text), 10),
md5(random()::text),
md5(random()::text),
left(md5(random()::text), 4)
from generate_series(1, 1000000) s(i)
#analyze See how much space your tables (and indexes!) are taking up
SELECT
relname AS table_name,
pg_size_pretty(pg_total_relation_size(relid)) AS total,
pg_size_pretty(pg_relation_size(relid)) AS internal,
pg_size_pretty(pg_table_size(relid) - pg_relation_size(relid)) AS external,
pg_size_pretty(pg_indexes_size(relid)) AS indexes
FROM pg_catalog.pg_statio_user_tables ORDER BY pg_total_relation_size(relid) DESC;
#conf How to see the elapsed time for your queries in psql
\timingIf you really like this and want it to be permanent, create a .psqlrc
#analyze The IO statistics for each of your tables, including how many 'blocks' of data have been served from the cache (i.e. heap_blks_hit).
In this case, our cache is working effectively as the hit rate is *almost* 100%. But in a low memory environment or when a server is under particular strain, you may see lower numbers. Take note of these and be prepared to upgrade your database server.
SELECT * FROM pg_statio_user_tables;Our new query has added together all of the relevant numbers to show how many cache hits occurred, how often the cache *didn't* deliver (
WITH y AS (
SELECT
sum(heap_blks_read) AS read,
sum(heap_blks_hit) AS hit
FROM pg_statio_user_tables
) SELECT read, hit, hit / (hit+read)
AS ratio FROM y;
heap_blks_read is the number of disk blocks read) and we calculate the ratio of hits to the total number of blocks read.In this case, our cache is working effectively as the hit rate is *almost* 100%. But in a low memory environment or when a server is under particular strain, you may see lower numbers. Take note of these and be prepared to upgrade your database server.
#rand Selecting a 'sample' of a table's rows
create a table that contains 1000 rows — the numbers 1 to 1000:
You can also use
create a table that contains 1000 rows — the numbers 1 to 1000:
CREATE TABLE numbers (number int);Postgres 9.5 introduced a new TABLESAMPLE clause that lets you sample tables in different ways (2 ways by default, but more can be added via extensions). Let's give it a go at retrieving a random 0.5% of the rows from our table
INSERT INTO numbers (number)
SELECT id FROM generate_series(1,1000) AS id;
SELECT * FROM numbers TABLESAMPLE BERNOULLI (0.5);The BERNOULLI here selects rows from the table with a 0.5% chance (so statistically we'd get 5 rows on average), but there's also a more efficient SYSTEMmethod that uses a cruder block-based technique (with the possible downside that it tends to return ranges/groups of rows within the table).
number
--------
101
213
278
433
You can also use
TABLESAMPLE with other SQL statements like UPDATE or `DELETE`if, say, you wanted to delete 30% of a table's rows for whatever reason.#json
# “is a subset of”
SELECT * FROM json_test WHERE data @> '{"a":1}';
SELECT * FROM json_test WHERE data <@ '{"a":1}';
# test for the existence of a key
SELECT * FROM json_test WHERE data ? 'a';
# test for objects that have any of a list of keys:
SELECT * FROM json_test WHERE data ?| array['a', 'b'];
#array In Postgres syntax, the part
The string notation of curly brackets and comma-separated values is an array value constructor recognized by Postgres.
'{1,2,3}' is a plain string, :: is the Postgres typecast operator, and int[] is the type (an array of integers). The expression '{1,2,3}'::int[] reads as “typecast the string {1,2,3} into an array of integers”.The string notation of curly brackets and comma-separated values is an array value constructor recognized by Postgres.
test=# SELECT '{1,2,3}'::int[] ; -- an array of integers
int4
---------
{1,2,3}
(1 row)
test=# SELECT '{"hello","world"}'::text[] ; -- an array of strings
text
---------------
{hello,world}
(1 row)#array
test=# SELECT array[1,2,3];
array
---------
{1,2,3}
(1 row)
test=# SELECT array['hello', 'world'];
array
---------------
{hello,world}
(1 row)
test=# SELECT array[1, 'world'];
ERROR: invalid input syntax for integer: "world"
LINE 1: SELECT array[1, 'world'];
#array Here is how you can have an array as a column type in a table:
test=# CREATE TABLE posts (
test(# noscript text NOT NULL PRIMARY KEY,
test(# tagz text[]
test(# tags text[] NOT NULL DEFAULT '{}'::text[]
test(# );
CREATE TABLE
test=# INSERT INTO posts (noscript, tags) VALUES
test-# ('Using PostgreSQL Arrays with Golang', '{"postgres","golang"}');
INSERT 0 1
#index #bloom
Let us create a 1,000,000 row 57 MB table with 5 random integer columns:
Let us create a 1,000,000 row 57 MB table with 5 random integer columns:
CREATE TABLE foo(id SERIAL, c1 INT, c2 INT, c3 INT, c4 INT, c5 INT,The following query takes 110.2 ms:
it TIMESTAMP NOT NULL DEFAULT NOW());
INSERT INTO foo(c1, c2, c3, c4, c5)
SELECT random() * 1000000, random() * 1000000, random() * 1000000,
random() * 1000000, random() * 1000000
FROM generate_series(1, 1000000);
SELECT * FROM foo WHERE c1 = 171267 AND c4=65555;After creating a 13 MB Bloom index on the five columns:
-- Seq Scan on foo
-- Filter: ((c1 = 171267) AND (c4 = 65555))
-- Rows Removed by Filter: 999999
CREATE INDEX foo_c1c2c3c4c5The same query takes 9.3 ms:
ON foo USING bloom(c1, c2, c3, c4, c5)
WITH (length=64, col1=3, col2=3, col3=3, col4=3, col5=3);
SELECT * FROM foo WHERE c1 = 171267 AND c4=65555;The index provides a 10 to 20 speedup to these queries on my SSD laptop.
-- Bitmap Heap Scan on foo
-- Recheck Cond: ((c1 = 171267) AND (c4 = 65555))
-- Rows Removed by Index Recheck: 232
-- Heap Blocks: exact=232
-- -> Bitmap Index Scan on foo_c1c2c3c4c5
-- Index Cond: ((c1 = 171267) AND (c4 = 65555))
#date
date_trunc truncates timestamps to a certain level of precision, such as a specific hour, second, day, week, year, or so on.
date_trunc truncates timestamps to a certain level of precision, such as a specific hour, second, day, week, year, or so on.
CREATE TABLE orders (created_at timestamp);
INSERT INTO orders VALUES
('2019-01-01 04:05:33'),
('2019-01-01 09:05:51'),
('2019-01-03 02:55:14'),
('2019-01-08 14:12:07');
SELECT date_trunc('day', created_at) AS day,
COUNT(created_at) FROM orders
GROUP BY day
ORDER BY day;
day | count
----------------------------
2019-01-01T00:0 ... | 2
2019-01-08T00:0 ... | 1
2019-01-03T00:0 ... | 1
#generate
CREATE TABLE electrothingy
AS
SELECT
x::int AS id,
(x::int % 20000)::int AS locid, -- fake location ids in the range of 1-20000
now() AS tsin, -- static timestmap
97.5::numeric(5,2) AS temp, -- static temp
x::int AS usage -- usage the same as id not sure what we want here.
FROM generate_series(1,1728000000) -- for 1.7 billion rows
AS gs(x);
#index
Combined indexes vs Separate indexes
A “composite index”, also known as “concatenated index”, is an index on multiple columns in a table. Using separate or using composite indexes?
https://www.cybertec-postgresql.com/en/combined-indexes-vs-separate-indexes-in-postgresql/
Combined indexes vs Separate indexes
A “composite index”, also known as “concatenated index”, is an index on multiple columns in a table. Using separate or using composite indexes?
https://www.cybertec-postgresql.com/en/combined-indexes-vs-separate-indexes-in-postgresql/
CYBERTEC PostgreSQL | Services & Support
Composite/ combined indexes vs. separate indexes in PostgreSQL
What is more beneficial: Using separate or combined indexes in your PostgreSQL database? Speed performance by understanding index scans.
pgstattuple is a built-in Postgres extension that provides a variety of functions for looking at statistics of various objects within your database. One use is for checking how many dead rows (often caused when data is deleted from a table) there are or how much free space there is within a table.
Let's create a table with 1000 rows of data and see what info pgstattuple can give us:
Now we get a dead_tuple_count of 499 — the number of rows we deleted! But the overall table_len is the same.. because of those dead rows. This can be resolved by running: VACUUM numbers;
Re-running pgstattuple at this stage will show no dead rows, but the free_space will jump up.. with table_len remaining the same! The reason for this is that VACUUM will clean up dead rows an
Let's create a table with 1000 rows of data and see what info pgstattuple can give us:
CREATE TABLE numbers (id int);
INSERT INTO numbers SELECT * FROM generate_series(1,1000);
CREATE EXTENSION pgstattuple;
SELECT * FROM public.pgstattuple('numbers');
DELETE FROM numbers WHERE id < 500;
SELECT * FROM public.pgstattuple('numbers');
Now we get a dead_tuple_count of 499 — the number of rows we deleted! But the overall table_len is the same.. because of those dead rows. This can be resolved by running: VACUUM numbers;
Re-running pgstattuple at this stage will show no dead rows, but the free_space will jump up.. with table_len remaining the same! The reason for this is that VACUUM will clean up dead rows an
#locks
-- blocks all writes
CREATE INDEX items_value_idx ON items USING GIN (value jsonb_path_ops);
-- only blocks other DDL
CREATE INDEX CONCURRENTLY items_value_idx ON items USING GIN (value jsonb_path_ops);
#locks
ALTER TABLE items ADD COLUMN last_update timestamptz;
SET lock_timeout TO '2s'
ALTER TABLE items ADD COLUMN last_update timestamptz;
#locks
-- blocks queries for a long time
ALTER TABLE items ADD PRIMARY KEY (id);
-- takes a long time, but doesn’t block queries
CREATE UNIQUE INDEX CONCURRENTLY items_pk ON items (id);
-- blocks queries, but only very briefly
ALTER TABLE items ADD CONSTRAINT items_pk PRIMARY KEY USING INDEX items_pk;