Use your database! (part 3) - Creating a revision system

#relational #database #trigger #postgres

If ever you used Hibernate, you might have also used Hibernate Envers, an auditing and versioning framework. It basically stores the changes made to records on the database and provides an API to manage the revisions.

But why not do the same using only the database? We can build our own revision system using database functions and triggers.

First, let's create two tables (I will be using Postgres 10):

CREATE TABLE tasklist (
    id INTEGER PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY,
    name VARCHAR(100) NOT NULL
);

CREATE TABLE task (
    tasklist_id INTEGER NOT NULL,
    task_number INTEGER NOT NULL,
    name VARCHAR(100) NOT NULL,
    done BOOLEAN DEFAULT FALSE,

    PRIMARY KEY (tasklist_id, task_number),
    FOREIGN KEY (tasklist_id) REFERENCES tasklist (id)
);

CREATE OR REPLACE FUNCTION generate_task_number() RETURNS TRIGGER AS $$
BEGIN
    SELECT coalesce(max(task_number), 0) + 1 INTO NEW.task_number
    FROM task
    WHERE task.tasklist_id = NEW.tasklist_id;

    RETURN NEW;
END
$$ LANGUAGE plpgsql;

CREATE TRIGGER generate_task_number
    BEFORE INSERT ON task
    FOR EACH ROW
    EXECUTE PROCEDURE generate_task_number();

Here we have tasklist and task tables. The tasklist primary key is GENERATED BY DEFAULT AS IDENTITY, this is new is Postgres 10 and works the same as using a Serial column.

The primary key on task is a composite key: it is composed by the primary key of the tasklist the task belongs to and a sequential number dependent on the tasklist id, so we used a trigger to generate the task number.

To store the revisions, we need a revision table for each table to be versioned. The revision tables must have a revision number, a operation code (Insert, Update or Delete) and a timestamp of its creation. The primary key of the revision tables will be the revision number combined with the primary key of the original table.

To avoid repeating that columns for all revision tables we need to create, we can create a temporary table and tell the other tables to copy all the columns from it:

-- Base for revision table to avoid repetition
CREATE TEMPORARY TABLE IF NOT EXISTS base_rev (
    rev_number INTEGER NOT NULL,
    rev_operation CHAR(1) NOT NULL CHECK (rev_operation IN ('I', 'U', 'D')),
    rev_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

Now we can create the other revision tables as follows:

-- Revision table for tasklist
CREATE TABLE rev_tasklist (
    LIKE base_rev INCLUDING CONSTRAINTS,
    LIKE tasklist,

    PRIMARY KEY (rev_number, id)
);

-- Revision table for task
CREATE TABLE rev_task (
    LIKE base_rev INCLUDING CONSTRAINTS,
    LIKE task,

    PRIMARY KEY (rev_number, tasklist_id, task_number)
);

The LIKE clause in the CREATE TABLE statement will copy the columns from a table to the one being created. You can know more in the CREATE TABLE documentation.

With all the table in place, we can create a trigger function that will insert the revisions:

CREATE OR REPLACE FUNCTION insert_revision() RETURNS TRIGGER AS $$
DECLARE
    -- operation that triggered this revision, (I)nsert, (U)update, (D)elete
    _op CHAR(1);
    -- current record
    _record RECORD;
    -- current revision number
    _rev_number INTEGER;
    -- revision table name ('rev_' + table name)
    _rev_table VARCHAR := 'rev_' || TG_TABLE_NAME;
BEGIN
    -- Which operation are we dealing with?
    IF TG_OP = 'INSERT' THEN
        _op := 'I';
        _record := NEW;
    ELSIF TG_OP = 'UPDATE' THEN
        _op := 'U';
        _record := NEW;
    ELSE
        _op := 'D';
        _record := OLD;
    END IF;

    -- Retrieve next revision number
    EXECUTE format('SELECT coalesce(max(rev_number), 0) + 1 FROM %s rev_table WHERE rev_table.id = $1', _rev_table)
        INTO _rev_number
        USING _record.id;

    -- Insert revision
    EXECUTE format('INSERT INTO %s VALUES ($1, $2, $3, $4.*)', _rev_table)
        USING _rev_number, _op, now(), _record;

    RETURN _record;
END;
$$ LANGUAGE plpgsql;

The function above will insert a revision for a table with the only requirement that such table have a primary key called id.

We used EXECUTE because we needed to dynamically execute a SQL statement, since the function is intended to work on any table that fulfill the primary key requirement.

Now we can create the trigger for the tasklist table:

CREATE TRIGGER tasklist_revision AFTER INSERT OR UPDATE OR DELETE ON tasklist
    FOR EACH ROW EXECUTE PROCEDURE insert_revision();

This will work for tasklist, but task has a composite primary key, we need to somehow tell the trigger function about it. We can do this by passing parameters to the trigger and changing the trigger function to use them and, if no parameter is passed, assume a single primary key column id:

CREATE OR REPLACE FUNCTION insert_revision() RETURNS TRIGGER AS $$
DECLARE
    -- operation that triggered this revision, (I)nsert, (U)update, (D)elete
    _op CHAR(1);
    -- current record
    _record RECORD;
    -- current revision number
    _rev_number INTEGER;
    -- revision table name ('rev_' + table name)
    _rev_table VARCHAR := 'rev_' || TG_TABLE_NAME;
    -- where clause for composite primary key
    _where VARCHAR := '';
    -- variable used to iterate over the TG_ARGV
    _pk VARCHAR;
BEGIN
    -- Which operation are we dealing with?
    IF TG_OP = 'INSERT' THEN
        _op := 'I';
        _record := NEW;
    ELSIF TG_OP = 'UPDATE' THEN
        _op := 'U';
        _record := NEW;
    ELSE
        _op := 'D';
        _record := OLD;
    END IF;

    -- no parameters, assume primary key = id
    IF TG_NARGS = 0 THEN
        _where := '_rev_table.id = $1.id';
    -- process primary key from TG_ARGV
    ELSE
        _where := format('_rev_table.%1$s = $1.%1$s', TG_ARGV[0]);
        FOREACH _pk IN ARRAY TG_ARGV[1:] LOOP
            _where := _where || format(' AND _rev_table.%1$s = $1.%1$s', _pk);
        END LOOP;
    END IF;

    -- Retrieve next revision number
    EXECUTE format('SELECT coalesce(max(rev_number), 0) FROM %s _rev_table WHERE %s', _rev_table, _where)
        INTO _rev_number
        USING _record;

    -- Insert revision
    EXECUTE format('INSERT INTO %s VALUES ($1, $2, $3, $4.*)', _rev_table)
        USING _rev_number + 1, _op, now(), _record;

    RETURN _record;
END;
$$ LANGUAGE plpgsql;

The difference between the original and the updated functions is this:

-- no parameters, assume primary key = id
IF TG_NARGS = 0 THEN
    _where := '_rev_table.id = $1.id';
-- process primary key from TG_ARGV
ELSE
    _where := format('_rev_table.%1$s = $1.%1$s', TG_ARGV[0]);
    FOREACH _pk IN ARRAY TG_ARGV[1:] LOOP
        _where := _where || format(' AND _rev_table.%1$s = $1.%1$s', _pk);
    END LOOP;
END IF;

This code will check if there are any parameters passed to the trigger and use them as the columns composing the primary key (or if the primary key is not id).

And finally the trigger on task:

CREATE TRIGGER task_revision AFTER INSERT OR UPDATE OR DELETE ON task
    FOR EACH ROW EXECUTE PROCEDURE insert_revision('tasklist_id', 'task_number');

As we create the trigger, we pass 'tasklist_id' and 'task_number' as its parameters, which are the columns that compose the primary key of the task table.

In case you are wondering where these TG_NARGS, TG_ARGV and TG_TABLE_NAME come from, those are variables automatically created by Postgres when the trigger function executes.

If you are planning to use this code in production, feel free to come back here and comment about your experience (and remember to test everything before deploying).

Finally, the complete example is shown bellow:

CREATE TABLE tasklist (
    id INTEGER PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY,
    name VARCHAR(100) NOT NULL
);

CREATE TABLE task (
    tasklist_id INTEGER NOT NULL,
    task_number INTEGER NOT NULL,
    name VARCHAR(100) NOT NULL,
    done BOOLEAN DEFAULT FALSE,

    PRIMARY KEY (tasklist_id, task_number),
    FOREIGN KEY (tasklist_id) REFERENCES tasklist (id)
);

CREATE OR REPLACE FUNCTION generate_task_number() RETURNS TRIGGER AS $$
BEGIN
    SELECT coalesce(max(task_number), 0) + 1 INTO NEW.task_number
    FROM task
    WHERE task.tasklist_id = NEW.tasklist_id;

    RETURN NEW;
END
$$ LANGUAGE plpgsql;

CREATE TRIGGER generate_task_number
    BEFORE INSERT ON task
    FOR EACH ROW
    EXECUTE PROCEDURE generate_task_number();

-- Base for revision table to avoid repetition
CREATE TEMPORARY TABLE IF NOT EXISTS base_rev (
    rev_number INTEGER NOT NULL,
    rev_operation CHAR(1) NOT NULL CHECK (rev_operation IN ('I', 'U', 'D')),
    rev_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- Revision table for tasklist
CREATE TABLE rev_tasklist (
    LIKE base_rev INCLUDING CONSTRAINTS,
    LIKE tasklist,

    PRIMARY KEY (rev_number, id)
);

-- Revision table for task
CREATE TABLE rev_task (
    LIKE base_rev INCLUDING CONSTRAINTS,
    LIKE task,

    PRIMARY KEY (rev_number, tasklist_id, task_number)
);

CREATE OR REPLACE FUNCTION insert_revision() RETURNS TRIGGER AS $$
DECLARE
    -- operation that triggered this revision, (I)nsert, (U)update, (D)elete
    _op CHAR(1);
    -- current record
    _record RECORD;
    -- current revision number
    _rev_number INTEGER;
    -- revision table name ('rev_' + table name)
    _rev_table VARCHAR := 'rev_' || TG_TABLE_NAME;
    -- where clause for composite primary key
    _where VARCHAR := '';
    -- variable used to iterate over the TG_ARGV
    _pk VARCHAR;
BEGIN
    -- Which operation are we dealing with?
    IF TG_OP = 'INSERT' THEN
        _op := 'I';
        _record := NEW;
    ELSIF TG_OP = 'UPDATE' THEN
        _op := 'U';
        _record := NEW;
    ELSE
        _op := 'D';
        _record := OLD;
    END IF;

    -- no parameters, assume primary key = id
    IF TG_NARGS = 0 THEN
        _where := '_rev_table.id = $1.id';
    -- process primary key from TG_ARGV
    ELSE
        _where := format('_rev_table.%1$s = $1.%1$s', TG_ARGV[0]);
        FOREACH _pk IN ARRAY TG_ARGV[1:] LOOP
            _where := _where || format(' AND _rev_table.%1$s = $1.%1$s', _pk);
        END LOOP;
    END IF;

    -- Retrieve next revision number
    EXECUTE format('SELECT coalesce(max(rev_number), 0) FROM %s _rev_table WHERE %s', _rev_table, _where)
        INTO _rev_number
        USING _record;

    -- Insert revision
    EXECUTE format('INSERT INTO %s VALUES ($1, $2, $3, $4.*)', _rev_table)
        USING _rev_number + 1, _op, now(), _record;

    RETURN _record;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER tasklist_revision AFTER INSERT OR UPDATE OR DELETE ON tasklist
    FOR EACH ROW EXECUTE PROCEDURE insert_revision();

CREATE TRIGGER task_revision AFTER INSERT OR UPDATE OR DELETE ON task
    FOR EACH ROW EXECUTE PROCEDURE insert_revision('tasklist_id', 'task_number');

DEV Community

Use your database! (part 3) - Creating a revision system

Top comments (0)