Skip to content

Regional Sales Analytics

Combining graph traversal with columnar aggregation across multiple regions and product categories.

import os
import shutil
import tempfile

import uni_db
db_path = os.path.join(tempfile.gettempdir(), "sales_db")
if os.path.exists(db_path):
    shutil.rmtree(db_path)
db = uni_db.Uni.open(db_path)
session = db.session()
print(f"Opened database at {db_path}")
Opened database at /tmp/sales_db

1. Schema

Regions, Categories, and Orders with two edge types.

(
    db.schema()
    .label("Region")
    .property("name", "string")
    .done()
    .label("Category")
    .property("name", "string")
    .done()
    .label("Order")
    .property("amount", "float64")
    .done()
    .edge_type("SHIPPED_TO", ["Order"], ["Region"])
    .done()
    .edge_type("IN_CATEGORY", ["Order"], ["Category"])
    .done()
    .apply()
)

print("Schema created")
Schema created

2. Ingest Data

4 regions, 3 categories, ~38 orders distributed non-uniformly.

tx = session.tx()
with tx.bulk_writer().build() as bw:
    # Regions
    region_vids = bw.insert_vertices(
        "Region",
        [
            {"name": "North"},
            {"name": "South"},
            {"name": "East"},
            {"name": "West"},
        ],
    )
    north, south, east, west = region_vids

    # Categories
    cat_vids = bw.insert_vertices(
        "Category",
        [
            {"name": "Electronics"},
            {"name": "Apparel"},
            {"name": "Home & Garden"},
        ],
    )
    electronics, apparel, home_garden = cat_vids

    # Orders: (amount, region_vid, category_vid)
    orders_data = [
        # East: high-value electronics
        (1200.0, east, electronics),
        (980.0, east, electronics),
        (450.0, east, apparel),
        (120.0, east, apparel),
        (85.0, east, apparel),
        (60.0, east, home_garden),
        # West: strong Home & Garden
        (890.0, west, home_garden),
        (620.0, west, home_garden),
        (450.0, west, home_garden),
        (340.0, west, home_garden),
        (500.0, west, electronics),
        (210.0, west, apparel),
        (180.0, west, apparel),
        # South: most electronics orders
        (750.0, south, electronics),
        (680.0, south, electronics),
        (590.0, south, electronics),
        (520.0, south, electronics),
        (480.0, south, electronics),
        (300.0, south, apparel),
        (250.0, south, home_garden),
        # North: balanced mix
        (400.0, north, electronics),
        (350.0, north, electronics),
        (280.0, north, apparel),
        (260.0, north, apparel),
        (240.0, north, apparel),
        (320.0, north, home_garden),
        (290.0, north, home_garden),
    ]

    order_props = [{"amount": amt} for amt, _, _ in orders_data]
    order_vids = bw.insert_vertices("Order", order_props)

    shipped_edges = [(ov, rv, {}) for ov, (_, rv, _) in zip(order_vids, orders_data)]
    category_edges = [(ov, cv, {}) for ov, (_, _, cv) in zip(order_vids, orders_data)]

    bw.insert_edges("SHIPPED_TO", shipped_edges)
    bw.insert_edges("IN_CATEGORY", category_edges)
    bw.commit()
tx.commit()
print("Data ingested")
Data ingested

3. Revenue by Region

Total revenue and order count per region, ordered by revenue.

query_region = """
    MATCH (r:Region)<-[:SHIPPED_TO]-(o:Order)
    RETURN r.name AS region, COUNT(o) AS order_count, SUM(o.amount) AS total_revenue
    ORDER BY total_revenue DESC
"""
results = session.query(query_region)
print("Revenue by region:")
for r in results:
    print(
        f"  {r['region']:10s}: {r['order_count']:3d} orders, ${r['total_revenue']:8.2f}"
    )
assert len(results) == 4, f"Expected 4 regions, got {len(results)}"
Revenue by region:
  South     :   7 orders, $ 3570.00
  West      :   7 orders, $ 3190.00
  East      :   6 orders, $ 2895.00
  North     :   7 orders, $ 2140.00

4. Region × Category Breakdown

12-row breakdown showing revenue for every region/category pair.

query_breakdown = """
    MATCH (r:Region)<-[:SHIPPED_TO]-(o:Order)-[:IN_CATEGORY]->(c:Category)
    RETURN r.name AS region, c.name AS category,
           COUNT(o) AS orders, SUM(o.amount) AS revenue
    ORDER BY region, revenue DESC
"""
results = session.query(query_breakdown)
print("Region x Category breakdown:")
current_region = None
for r in results:
    if r["region"] != current_region:
        current_region = r["region"]
        print(f"  {current_region}:")
    print(f"    {r['category']:15s}: {r['orders']} orders, ${r['revenue']:.2f}")
assert len(results) == 12, (
    f"Expected 12 rows (4 regions x 3 categories), got {len(results)}"
)
Region x Category breakdown:
  East:
    Electronics    : 2 orders, $2180.00
    Apparel        : 3 orders, $655.00
    Home & Garden  : 1 orders, $60.00
  North:
    Apparel        : 3 orders, $780.00
    Electronics    : 2 orders, $750.00
    Home & Garden  : 2 orders, $610.00
  South:
    Electronics    : 5 orders, $3020.00
    Apparel        : 1 orders, $300.00
    Home & Garden  : 1 orders, $250.00
  West:
    Home & Garden  : 4 orders, $2300.00
    Electronics    : 1 orders, $500.00
    Apparel        : 2 orders, $390.00

5. Top Orders per Region

Highest-value orders in each region.

from collections import defaultdict

# Query top 2 orders per region by fetching all and processing
query_top = """
    MATCH (r:Region)<-[:SHIPPED_TO]-(o:Order)
    RETURN r.name AS region, o.amount AS amount
    ORDER BY region, amount DESC
"""
all_orders = session.query(query_top)

# Group by region, take top 2
region_orders = defaultdict(list)
for row in all_orders:
    region_orders[row["region"]].append(row["amount"])

print("Top 2 orders per region:")
for region in sorted(region_orders):
    top2 = region_orders[region][:2]
    print(f"  {region}: {[f'${v:.0f}' for v in top2]}")

assert region_orders["East"][0] == 1200.0, "East should lead with $1200"
Top 2 orders per region:
  East: ['$1200', '$980']
  North: ['$400', '$350']
  South: ['$750', '$680']
  West: ['$890', '$620']

6. Best Category per Region

The highest-revenue category for each region.

query_best_cat = """
    MATCH (r:Region)<-[:SHIPPED_TO]-(o:Order)-[:IN_CATEGORY]->(c:Category)
    RETURN r.name AS region, c.name AS category, SUM(o.amount) AS revenue
    ORDER BY region, revenue DESC
"""
results = session.query(query_best_cat)

# Take first (highest revenue) category per region
best = {}
for r in results:
    if r["region"] not in best:
        best[r["region"]] = (r["category"], r["revenue"])

print("Best category per region:")
for region, (cat, rev) in sorted(best.items()):
    print(f"  {region:10s}: {cat} (${rev:.2f})")

# Each region should have a different best category
best_cats = [cat for cat, _ in best.values()]
assert len(set(best_cats)) > 1, f"Expected variance across regions, got {best_cats}"
Best category per region:
  East      : Electronics ($2180.00)
  North     : Apparel ($780.00)
  South     : Electronics ($3020.00)
  West      : Home & Garden ($2300.00)