DEV Community

Erika for Tinybird

Posted on • Originally published at tinybird.co

ClickHouse Fundamentals: Part 2

Change table TTLs
You can modify the TTL of a table in ClickHouse by using ALTER TABLE...MODIFY TTL. For example:

ALTER TABLE database.table MODIFY TTL event_date + INTERVAL 30 DAY;
Enter fullscreen mode Exit fullscreen mode

However, ClickHouse will rewrite all table partitions, including those not impacted by the TTL change. This can be a very expensive operation, especially for large tables.

To avoid impacting the performance of our database, we can instead set materialize_ttl_after_modify to 0 and clear up old partitions manually.

This avoids the huge performance impact of rewriting all table partitions, but does mean there is additional manual effort.

For example:

set materialize_ttl_after_modify=0;
ALTER TABLE database.table MODIFY TTL event_date + INTERVAL 30 DAY;
ALTER TABLE database.table DROP PARTITION 202205;
ALTER TABLE database.table DROP PARTITION 202206;
ALTER TABLE database.table DROP PARTITION 202207;
Enter fullscreen mode Exit fullscreen mode

Analyze local files with clickhouse-local
clickhouse-local is like running a temporary ClickHouse server that only lasts for your session. It's great for exploring local files to quickly experiment with data, without needing to set up a proper ClickHouse deployment.

It's possible to use to clickhouse-local to analyze files of structured data directly from the local file system.

SELECT count() FROM file('final.ndjson');

SELECT count()
FROM file('final.ndjson')

Query id: a0a1f4b5-40cb-4125-b68b-4ed978c41576

┌─count()─┐
│  100000 │
└─────────┘

1 row in set. Elapsed: 0.659 sec. Processed 55.38 thousand rows, 96.97 MB (84.04 thousand rows/s., 147.16 MB/s.)


SELECT countDistinct(public_ip) FROM file('final.ndjson');

SELECT countDistinct(public_ip)
FROM file('final.ndjson')

Query id: 21df7ca5-e3bf-4010-b2a0-bf8b854502d2

┌─uniqExact(public_ip)─┐
│                   71 │
└──────────────────────┘

1 row in set. Elapsed: 0.225 sec. Processed 77.53 thousand rows, 96.45 MB (345.22 thousand rows/s., 429.46 MB/s.)
Enter fullscreen mode Exit fullscreen mode

You can create tables from the local file if you want to do more than one analysis on the data. The table is destroyed when your clickhouse-local session ends.

CREATE TABLE auxiliar Engine=MergeTree() ORDER BY tuple() AS SELECT * FROM file('final.ndjson');

CREATE TABLE auxiliar
ENGINE = MergeTree
ORDER BY tuple() AS
SELECT *
FROM file('final.ndjson')

Query id: a1732be5-a912-41a5-bf8e-e524db8f12f4

Ok.

0 rows in set. Elapsed: 0.486 sec. Processed 100.00 thousand rows, 161.88 MB (205.73 thousand rows/s., 333.03 MB/s.)


SHOW CREATE TABLE auxiliar;

SHOW CREATE TABLE auxiliar

Query id: dffbcd4b-2c08-4d07-916c-b8e1b668c202


│ CREATE TABLE _local.auxiliar
(
    `timestamp_iso8601` Nullable(DateTime64(9)),
    `host` Nullable(String),
    `public_ip` Nullable(String),
    `request_method` Nullable(String),
    `request_path` Nullable(String),
    `status` Nullable(Int64),
    `body_bytes_sent` Nullable(Int64),
    `request_length` Nullable(Int64),
    `first_byte` Nullable(Float64),
    `request_time` Nullable(Float64),
    `lambda_name` Nullable(String),
    `lambda_region` Nullable(String),
    `path_type` Nullable(String),
    `hit_level` Nullable(String),
    `hit_state` Nullable(String),
    `error_details` Nullable(String),
    `owner_id` Nullable(String),
    `project_id` Nullable(String),
    `target_path` Nullable(String),
    `deployment_plan` Nullable(String),
    `lambda_duration` Nullable(Float64),
    `lambda_billed_duration` Nullable(Int64),
    `lambda_memory_size` Nullable(Int64),
    `http_user_agent` Nullable(String),
    `full_vercel_id` Nullable(String),
    `dc` Nullable(String),
    `public_ip_country` Nullable(String),
    `public_ip_city` Nullable(String),
    `asn_id` Nullable(String),
    `asn_name` Nullable(String)
)
ENGINE = MergeTree
ORDER BY tuple()
SETTINGS index_granularity = 8192 │


1 row in set. Elapsed: 0.001 sec.

SELECT count(), status - status % 100 AS status_range FROM auxiliar GROUP BY status_range;

SELECT
    count(),
    status - (status % 100) AS status_range
FROM auxiliar
GROUP BY status_range

Query id: 2685e0d4-827a-4306-8598-5d6e589dbd15

┌─count()─┬─status_range─┐
│   74000 │          200 │
│    5000 │          400 │
│   21000 │          300 │
└─────────┴──────────────┘

3 rows in set. Elapsed: 0.015 sec.
Enter fullscreen mode Exit fullscreen mode

Add a default value for new columns
When you add a new column to a table, ClickHouse will add it with the default value:

CREATE TABLE local
ENGINE = MergeTree
ORDER BY number AS
SELECT *
FROM numbers(1000000);

ALTER TABLE local
    ADD COLUMN IF NOT EXISTS `date` DateTime;

OPTIMIZE TABLE local FINAL; -- To speed up the mutation / lazy way to know it has finished

SELECT *
FROM local
LIMIT 10

Query id: b5fedb97-a1c8-475f-a674-0b1658c8e889

┌─number─┬────────────────date─┐
│      0 │ 1970-01-01 01:00:00 │
│      1 │ 1970-01-01 01:00:00 │
│      2 │ 1970-01-01 01:00:00 │
│      3 │ 1970-01-01 01:00:00 │
│      4 │ 1970-01-01 01:00:00 │
│      5 │ 1970-01-01 01:00:00 │
│      6 │ 1970-01-01 01:00:00 │
│      7 │ 1970-01-01 01:00:00 │
│      8 │ 1970-01-01 01:00:00 │
│      9 │ 1970-01-01 01:00:00 │
└────────┴─────────────────────┘
Enter fullscreen mode Exit fullscreen mode

To change the default value for old rows you need to declare the default in the column definition:

ALTER TABLE local
    ADD COLUMN IF NOT EXISTS `new_date` DateTime DEFAULT now();

OPTIMIZE TABLE local FINAL;

SELECT *
FROM local
LIMIT 10

Query id: b5ff3afd-78f7-4ea3-8d43-adc7fe14f0a0

┌─number─┬────────────────date─┬────────────new_date─┐
│      0 │ 1970-01-01 01:00:00 │ 2022-09-23 13:53:38 │
│      1 │ 1970-01-01 01:00:00 │ 2022-09-23 13:53:38 │
│      2 │ 1970-01-01 01:00:00 │ 2022-09-23 13:53:38 │
│      3 │ 1970-01-01 01:00:00 │ 2022-09-23 13:53:38 │
│      4 │ 1970-01-01 01:00:00 │ 2022-09-23 13:53:38 │
│      5 │ 1970-01-01 01:00:00 │ 2022-09-23 13:53:38 │
│      6 │ 1970-01-01 01:00:00 │ 2022-09-23 13:53:38 │
│      7 │ 1970-01-01 01:00:00 │ 2022-09-23 13:53:38 │
│      8 │ 1970-01-01 01:00:00 │ 2022-09-23 13:53:38 │
│      9 │ 1970-01-01 01:00:00 │ 2022-09-23 13:53:38 │
└────────┴─────────────────────┴─────────────────────┘

10 rows in set. Elapsed: 0.002 sec.
Enter fullscreen mode Exit fullscreen mode

Note that this means that new rows will also get the default value if it's not declared on insertion.

ALTER TABLE local
    MODIFY COLUMN `new_date` DateTime DEFAULT yesterday();

INSERT INTO local(number) VALUES (999999999);

SELECT *
FROM local
WHERE number = 999999999

Query id: 02527ad6-4644-42ff-8755-8869a9df30fa

┌────number─┬────────────────date─┬────────────new_date─┐
│ 999999999 │ 1970-01-01 01:00:00 │ 2022-09-22 00:00:00 │
└───────────┴─────────────────────┴─────────────────────┘
Enter fullscreen mode Exit fullscreen mode

Top comments (0)