Officially out now: The TypeDB 3.0 Roadmap

Lesson 8.3: Aggregations

Aggregating over results

We have seen how we can use subqueries to group results. In this lesson, we will see how we can use them to aggregate over results. Let’s consider this query from Lesson 8.2, which lists all books and any promotions they are in.

match
$book isa book;
fetch
$book: title;
promotions: {
    match
    $promotion isa promotion;
    ($book, $promotion) isa promotion-inclusion;
    fetch
    $promotion: name;
};

Suppose that we only want to count the promotions each book is in rather than listing them all out by name. To do so, we can use an aggregation, as in the following query.

match
$book isa book;
fetch
$book: title;
promotions: {
    match
    $promotion isa promotion;
    ($book, $promotion) isa promotion-inclusion;
    get;
    count;
};

An aggregation is done by using a Get query as a subquery. We will explore Get queries fully in Lesson 11.1, where they will be used for stateful object retrieval. For now, we will only consider the aspects needed for aggregations when used as subqueries.

A Get query consists of a match clause followed by a get clause. In this case, the get clause is empty, denoted by the immediately following ; delimiter. We have then appended a modifier to the query, in this case the count modifier. This causes the subquery to return the number of results matched by the match clause, rather than the results themselves. Modifiers are appended to queries to modify the results returned in some manner. The count modifier is one of the aggregation modifiers, which can only be appended to Get queries. We will explore the aggregation modifiers in this lesson, and learn about some others in Lesson 8.4.

If we run studio run this query, then we can see that the promotions field of each result contains an object representing a value. This field is in the same format as those of the value variables we learned to retrieve in Lesson 7.5.

Exercise

Write a Fetch query to retrieve a list of publishers. For each publisher, retrieve their name and the number of books they have published.

You may find it useful to refer to the bookstore’s schema.

Schema
define

book sub entity,
    abstract,
    owns isbn-13 @key,
    owns isbn-10 @unique,
    owns title,
    owns page-count,
    owns genre,
    owns price,
    plays contribution:work,
    plays publishing:published,
    plays promotion-inclusion:item,
    plays order-line:item,
    plays rating:rated,
    plays recommendation:recommended;

hardback sub book,
    owns stock;

paperback sub book,
    owns stock;

ebook sub book;

contributor sub entity,
    owns name,
    plays contribution:contributor,
    plays authoring:author,
    plays editing:editor,
    plays illustrating:illustrator;

company sub entity,
    abstract,
    owns name;

publisher sub company,
    plays publishing:publisher;

courier sub company,
    plays delivery:deliverer;

publication sub entity,
    owns year,
    plays publishing:publication,
    plays locating:located;

user sub entity,
    owns id @key,
    owns name,
    owns birth-date,
    plays action-execution:executor,
    plays locating:located,
    plays recommendation:recipient;

order sub entity,
    owns id @key,
    owns status,
    plays order-line:order,
    plays action-execution:action,
    plays delivery:delivered;

promotion sub entity,
    owns code @key,
    owns name,
    owns start-timestamp,
    owns end-timestamp,
    plays promotion-inclusion:promotion;

review sub entity,
    owns id @key,
    owns score,
    owns verified,
    plays rating:review,
    plays action-execution:action;

login sub entity,
    owns success,
    plays action-execution:action;

address sub entity,
    owns street,
    plays delivery:destination,
    plays locating:located;

place sub entity,
    abstract,
    owns name,
    plays locating:located,
    plays locating:location;

city sub place;

state sub place;

country sub place;

contribution sub relation,
    relates contributor,
    relates work;

authoring sub contribution,
    relates author as contributor;

editing sub contribution,
    relates editor as contributor;

illustrating sub contribution,
    relates illustrator as contributor;

publishing sub relation,
    relates publisher,
    relates published,
    relates publication;

promotion-inclusion sub relation,
    relates promotion,
    relates item,
    owns discount;

order-line sub relation,
    relates order,
    relates item,
    owns quantity,
    owns price;

rating sub relation,
    relates review,
    relates rated;

action-execution sub relation,
    relates action,
    relates executor,
    owns timestamp;

delivery sub relation,
    relates deliverer,
    relates delivered,
    relates destination;

locating sub relation,
    relates located,
    relates location;

recommendation sub relation,
    relates recommended,
    relates recipient;

isbn sub attribute, abstract, value string;
isbn-13 sub isbn;
isbn-10 sub isbn;
title sub attribute, value string;
page-count sub attribute, value long;
genre sub attribute, value string;
stock sub attribute, value long;
price sub attribute, value double;
discount sub attribute, value double;
id sub attribute, value string;
code sub attribute, value string;
name sub attribute, value string;
birth-date sub attribute, value datetime;
street sub attribute, value string;
year sub attribute, value long;
quantity sub attribute, value long;
score sub attribute, value long;
verified sub attribute, value boolean;
timestamp sub attribute, value datetime;
start-timestamp sub attribute, value datetime;
end-timestamp sub attribute, value datetime;
status sub attribute, value string, regex "^(paid|dispatched|delivered|returned|canceled)$";
success sub attribute, value boolean;

rule review-verified-by-purchase:
    when {
        ($review, $product) isa rating;
        ($order, $product) isa order-line;
        ($user, $review) isa action-execution, has timestamp $review-time;
        ($user, $order) isa action-execution, has timestamp $order-time;
        $review-time > $order-time;
    } then {
        $review has verified true;
    };

rule review-unverified:
    when {
        $review isa review;
        not { $review has verified true; };
    } then {
        $review has verified false;
    };

rule book-recommendation-by-genre:
    when {
        $user isa user;
        $liked-book isa book;
        {
            ($user, $order) isa action-execution;
            ($order, $liked-book) isa order-line;
        } or {
            ($user, $review) isa action-execution;
            ($review, $liked-book) isa rating;
            $review has score >= 7;
        };
        $new-book isa book;
        not { {
            ($user, $order) isa action-execution;
            ($order, $new-book) isa order-line;
        } or {
            ($user, $review) isa action-execution;
            ($review, $new-book) isa rating;
        }; };
        $liked-book has genre $shared-genre;
        $new-book has genre $shared-genre;
        not { {
            $shared-genre == "fiction";
        } or {
            $shared-genre == "nonfiction";
        }; };
    } then {
        (recommended: $new-book, recipient: $user) isa recommendation;
    };

rule book-recommendation-by-author:
    when {
        $user isa user;
        $liked-book isa book;
        {
            ($user, $order) isa action-execution;
            ($order, $liked-book) isa order-line;
        } or {
            ($user, $review) isa action-execution;
            ($review, $liked-book) isa rating;
            $review has score >= 7;
        };
        $new-book isa book;
        not { {
            ($user, $order) isa action-execution;
            ($order, $new-book) isa order-line;
        } or {
            ($user, $review) isa action-execution;
            ($review, $new-book) isa rating;
        }; };
        ($liked-book, $shared-author) isa authoring;
        ($new-book, $shared-author) isa authoring;
    } then {
        (recommended: $new-book, recipient: $user) isa recommendation;
    };

rule order-line-total-retail-price:
    when {
        ($order) isa action-execution, has timestamp $order-time;
        $line ($order, $item) isa order-line;
        not {
            ($promotion, $item) isa promotion-inclusion;
            $promotion has start-timestamp <= $order-time,
                has end-timestamp >= $order-time;
        };
        $item has price $retail-price;
        $line has quantity $quantity;
        ?line-total = $quantity * $retail-price;
    } then {
        $line has price ?line-total;
    };

rule order-line-total-discounted-price:
    when {
        ($order) isa action-execution, has timestamp $order-time;
        $line ($order, $item) isa order-line;
        ($best-promotion, $item) isa promotion-inclusion, has discount $best-discount;
        $best-promotion has start-timestamp <= $order-time,
            has end-timestamp >= $order-time;
        not {
            ($other-promotion, $item) isa promotion-inclusion, has discount > $best-discount;
            $other-promotion has start-timestamp <= $order-time,
                has end-timestamp >= $order-time;
        };
        $item has price $retail-price;
        ?discounted-price = round(100 * $retail-price * (1 - $best-discount)) / 100;
        $line has quantity $quantity;
        ?line-total = $quantity * ?discounted-price;
    } then {
        $line has price ?line-total;
    };

rule transitive-location:
    when {
        (location: $parent-place, located: $child-place) isa locating;
        (location: $child-place, located: $x) isa locating;
    } then {
        (location: $parent-place, located: $x) isa locating;
    };
Sample solution
match
$publisher isa publisher;
fetch
$publisher: name;
books: {
    match
    $book isa book;
    ($book, $publisher) isa publishing;
    get;
    count;
};

Aggregation modifiers

We have seen an example of the count modifier, and now we will explore examples of some of the others. The table below lists the aggregation modifiers currently available in TypeQL.

Aggregation modifier Name Takes argument

count

Count

No

sum

Sum

Yes

max

Maximum

Yes

min

Minimum

Yes

mean

Mean

Yes

median

Median

Yes

std

Standard deviation

Yes

The std modifier returns the sample standard deviation as obtained by Bessel’s correction.

Unlike the count modifier, the other aggregation modifiers all take a variable as an argument. Let’s see an example. In the following query, we retrieve the average review score for each book.

match
$book isa book;
fetch
$book: title;
average-score: {
    match
    $review isa review, has score $score;
    ($review, $book) isa rating;
    get;
    mean $score;
};

Whereas the count modifier returns the number of results matched regardless of their content, the other aggregation modifiers require a specified numeric value to operate over. Here, we have supplied the $score attribute as the argument to the mean modifiers, indicating that we want the average of its values. The argument can be either a concept variable representing an attribute, or a value variable, as we learned about in Lesson 7.5. In the next example, we use the sum modifier over the value variable ?line-total to get the grand total for each order in the database.

match
$order isa order, has id $id;
fetch
$id;
order-total: {
    match
    $book isa book, has price $retail-price;
    ($order, $book) isa order-line,
        has quantity $quantity;
    ?line-total = $quantity * $retail-price;
    get;
    sum ?line-total;
};

Filtering unique values

By default, aggregation modifiers in a Get query operate over the matches returned by the match clause. It is important to understand the affect this has on the aggregations returned. Consider the following query, which is intended to retrieve the number of users that have ordered each book.

match
$book isa book, has isbn-13 $isbn;
fetch
$isbn;
buyers: {
    match
    $user isa user;
    $order isa order;
    ($user, $order) isa action-execution;
    ($book, $order) isa order-line;
    get;
    count;
};

Let’s examine one of the results, corresponding to the book Great Discoveries in Medicine. The results tell us it has had six buyers.

{
    "buyers": { "value": 6, "value_type": "long" },
    "isbn": { "value": "9780500291221", "value_type": "string", "type": { "label": "isbn-13", "root": "attribute" } }
}

Now let’s use the following query to retrieve the IDs of orders containing this book and the IDs of the users that placed those orders.

match
$book isa book, has isbn-13 "9780500291221";
$order isa order, has id $order-id;
$user isa user, has id $user-id;
($user, $order) isa action-execution;
($book, $order) isa order-line;
fetch
$order-id;
$user-id;
{
    "order-id": { "value": "o0008", "value_type": "string", "type": { "label": "id", "root": "attribute" } },
    "user-id": { "value": "u0003", "value_type": "string", "type": { "label": "id", "root": "attribute" } }
}
{
    "order-id": { "value": "o0001", "value_type": "string", "type": { "label": "id", "root": "attribute" } },
    "user-id": { "value": "u0001", "value_type": "string", "type": { "label": "id", "root": "attribute" } }
}
{
    "order-id": { "value": "o0031", "value_type": "string", "type": { "label": "id", "root": "attribute" } },
    "user-id": { "value": "u0001", "value_type": "string", "type": { "label": "id", "root": "attribute" } }
}
{
    "order-id": { "value": "o0035", "value_type": "string", "type": { "label": "id", "root": "attribute" } },
    "user-id": { "value": "u0007", "value_type": "string", "type": { "label": "id", "root": "attribute" } }
}
{
    "order-id": { "value": "o0034", "value_type": "string", "type": { "label": "id", "root": "attribute" } },
    "user-id": { "value": "u0007", "value_type": "string", "type": { "label": "id", "root": "attribute" } }
}
{
    "order-id": { "value": "o0024", "value_type": "string", "type": { "label": "id", "root": "attribute" } },
    "user-id": { "value": "u0010", "value_type": "string", "type": { "label": "id", "root": "attribute" } }
}

We get six results back. However, if we examine them, we see that user "u0001" placed both orders "o0001" and "o0031", and that user "u0007" placed both orders "o0034" and "o0035". So while there are six orders of this book, only four users have ordered it. To understand why the aggregation gave the wrong value, we must examine the pattern in the match clause of the subquery.

$user isa user;
$order isa order;
($user, $order) isa action-execution;
($book, $order) isa order-line;

If the book in question has been ordered by the same user multiple times, then this pattern will match the user once per order because it contains the $order variable, which differs between the matches. We saw in Lesson 8.1 that Fetch queries return one result per unique combination of variables retrieved. The same applies to the Get queries we use for aggregations, except that one result is instead returned per unique combination of variables matched.

If we want to retrieve only the unique set of users, we can filter the matches before aggregating by modifying the get query. In the following query, we add a filter to return only the unique set of users matched in the results, by adding the filtering variable $user to the get clause.

match
$book isa book, has isbn-13 $isbn;
fetch
$isbn;
buyers: {
    match
    $user isa user;
    $order isa order;
    ($user, $order) isa action-execution;
    ($book, $order) isa order-line;
    get $user;
    count;
};

If we run this new query, we see that the correct number of buyers is now returned for this book.

{
    "buyers": { "value": 4, "value_type": "long" },
    "isbn": { "value": "9780500291221", "value_type": "string", "type": { "label": "isbn-13", "root": "attribute" } }
}

In order for a variable to be used as the argument of an aggregation modifier, either the get clause must be empty (unfiltered), or the variable must be included in it.

To filter on multiple variables, aggregating over the set of results in which each tuple of them is unique, we can separate them by commas in the get clause, as follows.

get $user, $order;

Filtering all variables in the match clause is the same as not filtering at all, and will produce the same results as an empty get clause!

Exercise

Write a Fetch query to retrieve a list of users. For each user, retrieve:

  • Their user ID.

  • The median retail price of books they have ordered.

  • The retail price of the cheapest book they have ordered.

  • The retail price of the most expensive book they have ordered.

Ensure that each book only counts once towards the median, even if they have ordered it multiple times.

You may find it useful to refer to the bookstore’s schema.

Schema
define

book sub entity,
    abstract,
    owns isbn-13 @key,
    owns isbn-10 @unique,
    owns title,
    owns page-count,
    owns genre,
    owns price,
    plays contribution:work,
    plays publishing:published,
    plays promotion-inclusion:item,
    plays order-line:item,
    plays rating:rated,
    plays recommendation:recommended;

hardback sub book,
    owns stock;

paperback sub book,
    owns stock;

ebook sub book;

contributor sub entity,
    owns name,
    plays contribution:contributor,
    plays authoring:author,
    plays editing:editor,
    plays illustrating:illustrator;

company sub entity,
    abstract,
    owns name;

publisher sub company,
    plays publishing:publisher;

courier sub company,
    plays delivery:deliverer;

publication sub entity,
    owns year,
    plays publishing:publication,
    plays locating:located;

user sub entity,
    owns id @key,
    owns name,
    owns birth-date,
    plays action-execution:executor,
    plays locating:located,
    plays recommendation:recipient;

order sub entity,
    owns id @key,
    owns status,
    plays order-line:order,
    plays action-execution:action,
    plays delivery:delivered;

promotion sub entity,
    owns code @key,
    owns name,
    owns start-timestamp,
    owns end-timestamp,
    plays promotion-inclusion:promotion;

review sub entity,
    owns id @key,
    owns score,
    owns verified,
    plays rating:review,
    plays action-execution:action;

login sub entity,
    owns success,
    plays action-execution:action;

address sub entity,
    owns street,
    plays delivery:destination,
    plays locating:located;

place sub entity,
    abstract,
    owns name,
    plays locating:located,
    plays locating:location;

city sub place;

state sub place;

country sub place;

contribution sub relation,
    relates contributor,
    relates work;

authoring sub contribution,
    relates author as contributor;

editing sub contribution,
    relates editor as contributor;

illustrating sub contribution,
    relates illustrator as contributor;

publishing sub relation,
    relates publisher,
    relates published,
    relates publication;

promotion-inclusion sub relation,
    relates promotion,
    relates item,
    owns discount;

order-line sub relation,
    relates order,
    relates item,
    owns quantity,
    owns price;

rating sub relation,
    relates review,
    relates rated;

action-execution sub relation,
    relates action,
    relates executor,
    owns timestamp;

delivery sub relation,
    relates deliverer,
    relates delivered,
    relates destination;

locating sub relation,
    relates located,
    relates location;

recommendation sub relation,
    relates recommended,
    relates recipient;

isbn sub attribute, abstract, value string;
isbn-13 sub isbn;
isbn-10 sub isbn;
title sub attribute, value string;
page-count sub attribute, value long;
genre sub attribute, value string;
stock sub attribute, value long;
price sub attribute, value double;
discount sub attribute, value double;
id sub attribute, value string;
code sub attribute, value string;
name sub attribute, value string;
birth-date sub attribute, value datetime;
street sub attribute, value string;
year sub attribute, value long;
quantity sub attribute, value long;
score sub attribute, value long;
verified sub attribute, value boolean;
timestamp sub attribute, value datetime;
start-timestamp sub attribute, value datetime;
end-timestamp sub attribute, value datetime;
status sub attribute, value string, regex "^(paid|dispatched|delivered|returned|canceled)$";
success sub attribute, value boolean;

rule review-verified-by-purchase:
    when {
        ($review, $product) isa rating;
        ($order, $product) isa order-line;
        ($user, $review) isa action-execution, has timestamp $review-time;
        ($user, $order) isa action-execution, has timestamp $order-time;
        $review-time > $order-time;
    } then {
        $review has verified true;
    };

rule review-unverified:
    when {
        $review isa review;
        not { $review has verified true; };
    } then {
        $review has verified false;
    };

rule book-recommendation-by-genre:
    when {
        $user isa user;
        $liked-book isa book;
        {
            ($user, $order) isa action-execution;
            ($order, $liked-book) isa order-line;
        } or {
            ($user, $review) isa action-execution;
            ($review, $liked-book) isa rating;
            $review has score >= 7;
        };
        $new-book isa book;
        not { {
            ($user, $order) isa action-execution;
            ($order, $new-book) isa order-line;
        } or {
            ($user, $review) isa action-execution;
            ($review, $new-book) isa rating;
        }; };
        $liked-book has genre $shared-genre;
        $new-book has genre $shared-genre;
        not { {
            $shared-genre == "fiction";
        } or {
            $shared-genre == "nonfiction";
        }; };
    } then {
        (recommended: $new-book, recipient: $user) isa recommendation;
    };

rule book-recommendation-by-author:
    when {
        $user isa user;
        $liked-book isa book;
        {
            ($user, $order) isa action-execution;
            ($order, $liked-book) isa order-line;
        } or {
            ($user, $review) isa action-execution;
            ($review, $liked-book) isa rating;
            $review has score >= 7;
        };
        $new-book isa book;
        not { {
            ($user, $order) isa action-execution;
            ($order, $new-book) isa order-line;
        } or {
            ($user, $review) isa action-execution;
            ($review, $new-book) isa rating;
        }; };
        ($liked-book, $shared-author) isa authoring;
        ($new-book, $shared-author) isa authoring;
    } then {
        (recommended: $new-book, recipient: $user) isa recommendation;
    };

rule order-line-total-retail-price:
    when {
        ($order) isa action-execution, has timestamp $order-time;
        $line ($order, $item) isa order-line;
        not {
            ($promotion, $item) isa promotion-inclusion;
            $promotion has start-timestamp <= $order-time,
                has end-timestamp >= $order-time;
        };
        $item has price $retail-price;
        $line has quantity $quantity;
        ?line-total = $quantity * $retail-price;
    } then {
        $line has price ?line-total;
    };

rule order-line-total-discounted-price:
    when {
        ($order) isa action-execution, has timestamp $order-time;
        $line ($order, $item) isa order-line;
        ($best-promotion, $item) isa promotion-inclusion, has discount $best-discount;
        $best-promotion has start-timestamp <= $order-time,
            has end-timestamp >= $order-time;
        not {
            ($other-promotion, $item) isa promotion-inclusion, has discount > $best-discount;
            $other-promotion has start-timestamp <= $order-time,
                has end-timestamp >= $order-time;
        };
        $item has price $retail-price;
        ?discounted-price = round(100 * $retail-price * (1 - $best-discount)) / 100;
        $line has quantity $quantity;
        ?line-total = $quantity * ?discounted-price;
    } then {
        $line has price ?line-total;
    };

rule transitive-location:
    when {
        (location: $parent-place, located: $child-place) isa locating;
        (location: $child-place, located: $x) isa locating;
    } then {
        (location: $parent-place, located: $x) isa locating;
    };
Sample solution
match
$user isa user, has id $id;
fetch
$id;
median-price: {
    match
    $book isa book, has price $price;
    $order isa order;
    ($book, $order) isa order-line;
    ($user, $order) isa action-execution;
    get $book, $price; median $price;
};
min-price: {
    match
    $book isa book, has price $price;
    $order isa order;
    ($book, $order) isa order-line;
    ($user, $order) isa action-execution;
    get; min $price;
};
max-price: {
    match
    $book isa book, has price $price;
    $order isa order;
    ($book, $order) isa order-line;
    ($user, $order) isa action-execution;
    get; max $price;
};