#array
test=# SELECT array[1,2,3];
array
---------
{1,2,3}
(1 row)
test=# SELECT array['hello', 'world'];
array
---------------
{hello,world}
(1 row)
test=# SELECT array[1, 'world'];
ERROR: invalid input syntax for integer: "world"
LINE 1: SELECT array[1, 'world'];
#array Here is how you can have an array as a column type in a table:
test=# CREATE TABLE posts (
test(# noscript text NOT NULL PRIMARY KEY,
test(# tagz text[]
test(# tags text[] NOT NULL DEFAULT '{}'::text[]
test(# );
CREATE TABLE
test=# INSERT INTO posts (noscript, tags) VALUES
test-# ('Using PostgreSQL Arrays with Golang', '{"postgres","golang"}');
INSERT 0 1
#index #bloom
Let us create a 1,000,000 row 57 MB table with 5 random integer columns:
Let us create a 1,000,000 row 57 MB table with 5 random integer columns:
CREATE TABLE foo(id SERIAL, c1 INT, c2 INT, c3 INT, c4 INT, c5 INT,The following query takes 110.2 ms:
it TIMESTAMP NOT NULL DEFAULT NOW());
INSERT INTO foo(c1, c2, c3, c4, c5)
SELECT random() * 1000000, random() * 1000000, random() * 1000000,
random() * 1000000, random() * 1000000
FROM generate_series(1, 1000000);
SELECT * FROM foo WHERE c1 = 171267 AND c4=65555;After creating a 13 MB Bloom index on the five columns:
-- Seq Scan on foo
-- Filter: ((c1 = 171267) AND (c4 = 65555))
-- Rows Removed by Filter: 999999
CREATE INDEX foo_c1c2c3c4c5The same query takes 9.3 ms:
ON foo USING bloom(c1, c2, c3, c4, c5)
WITH (length=64, col1=3, col2=3, col3=3, col4=3, col5=3);
SELECT * FROM foo WHERE c1 = 171267 AND c4=65555;The index provides a 10 to 20 speedup to these queries on my SSD laptop.
-- Bitmap Heap Scan on foo
-- Recheck Cond: ((c1 = 171267) AND (c4 = 65555))
-- Rows Removed by Index Recheck: 232
-- Heap Blocks: exact=232
-- -> Bitmap Index Scan on foo_c1c2c3c4c5
-- Index Cond: ((c1 = 171267) AND (c4 = 65555))
#date
date_trunc truncates timestamps to a certain level of precision, such as a specific hour, second, day, week, year, or so on.
date_trunc truncates timestamps to a certain level of precision, such as a specific hour, second, day, week, year, or so on.
CREATE TABLE orders (created_at timestamp);
INSERT INTO orders VALUES
('2019-01-01 04:05:33'),
('2019-01-01 09:05:51'),
('2019-01-03 02:55:14'),
('2019-01-08 14:12:07');
SELECT date_trunc('day', created_at) AS day,
COUNT(created_at) FROM orders
GROUP BY day
ORDER BY day;
day | count
----------------------------
2019-01-01T00:0 ... | 2
2019-01-08T00:0 ... | 1
2019-01-03T00:0 ... | 1
#generate
CREATE TABLE electrothingy
AS
SELECT
x::int AS id,
(x::int % 20000)::int AS locid, -- fake location ids in the range of 1-20000
now() AS tsin, -- static timestmap
97.5::numeric(5,2) AS temp, -- static temp
x::int AS usage -- usage the same as id not sure what we want here.
FROM generate_series(1,1728000000) -- for 1.7 billion rows
AS gs(x);
#index
Combined indexes vs Separate indexes
A “composite index”, also known as “concatenated index”, is an index on multiple columns in a table. Using separate or using composite indexes?
https://www.cybertec-postgresql.com/en/combined-indexes-vs-separate-indexes-in-postgresql/
Combined indexes vs Separate indexes
A “composite index”, also known as “concatenated index”, is an index on multiple columns in a table. Using separate or using composite indexes?
https://www.cybertec-postgresql.com/en/combined-indexes-vs-separate-indexes-in-postgresql/
CYBERTEC PostgreSQL | Services & Support
Composite/ combined indexes vs. separate indexes in PostgreSQL
What is more beneficial: Using separate or combined indexes in your PostgreSQL database? Speed performance by understanding index scans.
pgstattuple is a built-in Postgres extension that provides a variety of functions for looking at statistics of various objects within your database. One use is for checking how many dead rows (often caused when data is deleted from a table) there are or how much free space there is within a table.
Let's create a table with 1000 rows of data and see what info pgstattuple can give us:
Now we get a dead_tuple_count of 499 — the number of rows we deleted! But the overall table_len is the same.. because of those dead rows. This can be resolved by running: VACUUM numbers;
Re-running pgstattuple at this stage will show no dead rows, but the free_space will jump up.. with table_len remaining the same! The reason for this is that VACUUM will clean up dead rows an
Let's create a table with 1000 rows of data and see what info pgstattuple can give us:
CREATE TABLE numbers (id int);
INSERT INTO numbers SELECT * FROM generate_series(1,1000);
CREATE EXTENSION pgstattuple;
SELECT * FROM public.pgstattuple('numbers');
DELETE FROM numbers WHERE id < 500;
SELECT * FROM public.pgstattuple('numbers');
Now we get a dead_tuple_count of 499 — the number of rows we deleted! But the overall table_len is the same.. because of those dead rows. This can be resolved by running: VACUUM numbers;
Re-running pgstattuple at this stage will show no dead rows, but the free_space will jump up.. with table_len remaining the same! The reason for this is that VACUUM will clean up dead rows an
#locks
-- blocks all writes
CREATE INDEX items_value_idx ON items USING GIN (value jsonb_path_ops);
-- only blocks other DDL
CREATE INDEX CONCURRENTLY items_value_idx ON items USING GIN (value jsonb_path_ops);
#locks
ALTER TABLE items ADD COLUMN last_update timestamptz;
SET lock_timeout TO '2s'
ALTER TABLE items ADD COLUMN last_update timestamptz;
#locks
-- blocks queries for a long time
ALTER TABLE items ADD PRIMARY KEY (id);
-- takes a long time, but doesn’t block queries
CREATE UNIQUE INDEX CONCURRENTLY items_pk ON items (id);
-- blocks queries, but only very briefly
ALTER TABLE items ADD CONSTRAINT items_pk PRIMARY KEY USING INDEX items_pk;
#locks
Add a new column (safe)
Drop a column (safe)
Add an index concurrently (safe)
Drop a constraint (safe)
Add a default value to an existing column (safe)
Add an index (unsafe)
Add a column with a default (unsafe if PostgreSQL < 11)
Add a column that is non-nullable (unsafe if PostgreSQL < 11)
Add a column with a unique constraint (unsafe)
Change the type of a column (unsafe)
VACUUM FULL (unsafe)
ALTER TABLE SET TABLESPACE (unsafe)
Add a new column (safe)
Drop a column (safe)
Add an index concurrently (safe)
Drop a constraint (safe)
Add a default value to an existing column (safe)
Add an index (unsafe)
Add a column with a default (unsafe if PostgreSQL < 11)
Add a column that is non-nullable (unsafe if PostgreSQL < 11)
Add a column with a unique constraint (unsafe)
Change the type of a column (unsafe)
VACUUM FULL (unsafe)
ALTER TABLE SET TABLESPACE (unsafe)
#mvcc
https://habr.com/ru/company/postgrespro/blog/442804/
https://habr.com/ru/company/postgrespro/blog/444536/
https://habr.com/ru/company/postgrespro/blog/445820/
https://habr.com/ru/company/postgrespro/blog/446652/
https://habr.com/ru/company/postgrespro/blog/449704/
https://habr.com/ru/company/postgrespro/blog/452320/
https://habr.com/ru/company/postgrespro/blog/452762/
https://habr.com/ru/company/postgrespro/blog/442804/
https://habr.com/ru/company/postgrespro/blog/444536/
https://habr.com/ru/company/postgrespro/blog/445820/
https://habr.com/ru/company/postgrespro/blog/446652/
https://habr.com/ru/company/postgrespro/blog/449704/
https://habr.com/ru/company/postgrespro/blog/452320/
https://habr.com/ru/company/postgrespro/blog/452762/
#query
At the most basic level, DISTINCT is a keyword you can use on SELECT queries to eliminate duplicate rows from the end result. For example:
At the most basic level, DISTINCT is a keyword you can use on SELECT queries to eliminate duplicate rows from the end result. For example:
SELECT DISTINCT city FROM employees;This simple example is basically a nicer looking equivalent to:
SELECT city FROM employees GROUP BY city;
#json or
row_to_json(q1, true)CREATE TABLE people
(name text, age int, interests text[]);
INSERT INTO people (name, age, interests)
VALUES
('Jon', 12, ARRAY ['Golf', 'Food']),
('Jane', 45, ARRAY ['Art']);
SELECT row_to_json(ROW(name, age, interests)) FROM people;
row_to_json
-----------------------------------
{"f1":"Jon","f2":12,"f3":["Golf","Food"]}
{"f1":"Jane","f2":45,"f3":["Art"]}
SELECT row_to_json(q1) FROM
(SELECT * FROM people LIMIT 1) q1;
row_to_json
-----------------------------------
{"name":"Jon","age":12,"interests":["Golf","Food"]}
#query
Don't use BETWEEN (especially with timestamps)
This will include results where the timestamp is exactly 2018-06-08 00:00:00.000000, but not timestamps later in that same day
BETWEEN is safe for discrete quantities like integers or dates, as long as you remember that both ends of the range are included in the result. But it's a bad habit to get into.
Don't use BETWEEN (especially with timestamps)
SELECT * FROM blah WHERE timestampcol BETWEEN '2018-06-01' AND '2018-06-08'
This will include results where the timestamp is exactly 2018-06-08 00:00:00.000000, but not timestamps later in that same day
SELECT * FROM blah WHERE timestampcol >= '2018-06-01' AND timestampcol < '2018-06-08'
BETWEEN is safe for discrete quantities like integers or dates, as long as you remember that both ends of the range are included in the result. But it's a bad habit to get into.