Regional Sales Analytics¶
Combining graph traversal with columnar aggregation across multiple regions and product categories.
In [1]:
Copied!
import os
import shutil
import tempfile
import uni_db
import os
import shutil
import tempfile
import uni_db
In [2]:
Copied!
db_path = os.path.join(tempfile.gettempdir(), "sales_db")
if os.path.exists(db_path):
shutil.rmtree(db_path)
db = uni_db.Database(db_path)
print(f"Opened database at {db_path}")
db_path = os.path.join(tempfile.gettempdir(), "sales_db")
if os.path.exists(db_path):
shutil.rmtree(db_path)
db = uni_db.Database(db_path)
print(f"Opened database at {db_path}")
Opened database at /tmp/sales_db
1. Schema¶
Regions, Categories, and Orders with two edge types.
In [3]:
Copied!
(
db.schema()
.label("Region")
.property("name", "string")
.done()
.label("Category")
.property("name", "string")
.done()
.label("Order")
.property("amount", "float64")
.done()
.edge_type("SHIPPED_TO", ["Order"], ["Region"])
.done()
.edge_type("IN_CATEGORY", ["Order"], ["Category"])
.done()
.apply()
)
print("Schema created")
(
db.schema()
.label("Region")
.property("name", "string")
.done()
.label("Category")
.property("name", "string")
.done()
.label("Order")
.property("amount", "float64")
.done()
.edge_type("SHIPPED_TO", ["Order"], ["Region"])
.done()
.edge_type("IN_CATEGORY", ["Order"], ["Category"])
.done()
.apply()
)
print("Schema created")
Schema created
2. Ingest Data¶
4 regions, 3 categories, ~38 orders distributed non-uniformly.
In [4]:
Copied!
# Regions
region_vids = db.bulk_insert_vertices(
"Region",
[
{"name": "North"},
{"name": "South"},
{"name": "East"},
{"name": "West"},
],
)
north, south, east, west = region_vids
# Categories
cat_vids = db.bulk_insert_vertices(
"Category",
[
{"name": "Electronics"},
{"name": "Apparel"},
{"name": "Home & Garden"},
],
)
electronics, apparel, home_garden = cat_vids
# Orders: (amount, region_vid, category_vid)
orders_data = [
# East: high-value electronics
(1200.0, east, electronics),
(980.0, east, electronics),
(450.0, east, apparel),
(120.0, east, apparel),
(85.0, east, apparel),
(60.0, east, home_garden),
# West: strong Home & Garden
(890.0, west, home_garden),
(620.0, west, home_garden),
(450.0, west, home_garden),
(340.0, west, home_garden),
(500.0, west, electronics),
(210.0, west, apparel),
(180.0, west, apparel),
# South: most electronics orders
(750.0, south, electronics),
(680.0, south, electronics),
(590.0, south, electronics),
(520.0, south, electronics),
(480.0, south, electronics),
(300.0, south, apparel),
(250.0, south, home_garden),
# North: balanced mix
(400.0, north, electronics),
(350.0, north, electronics),
(280.0, north, apparel),
(260.0, north, apparel),
(240.0, north, apparel),
(320.0, north, home_garden),
(290.0, north, home_garden),
]
order_props = [{"amount": amt} for amt, _, _ in orders_data]
order_vids = db.bulk_insert_vertices("Order", order_props)
shipped_edges = [(ov, rv, {}) for ov, (_, rv, _) in zip(order_vids, orders_data)]
category_edges = [(ov, cv, {}) for ov, (_, _, cv) in zip(order_vids, orders_data)]
db.bulk_insert_edges("SHIPPED_TO", shipped_edges)
db.bulk_insert_edges("IN_CATEGORY", category_edges)
db.flush()
print("Data ingested")
# Regions
region_vids = db.bulk_insert_vertices(
"Region",
[
{"name": "North"},
{"name": "South"},
{"name": "East"},
{"name": "West"},
],
)
north, south, east, west = region_vids
# Categories
cat_vids = db.bulk_insert_vertices(
"Category",
[
{"name": "Electronics"},
{"name": "Apparel"},
{"name": "Home & Garden"},
],
)
electronics, apparel, home_garden = cat_vids
# Orders: (amount, region_vid, category_vid)
orders_data = [
# East: high-value electronics
(1200.0, east, electronics),
(980.0, east, electronics),
(450.0, east, apparel),
(120.0, east, apparel),
(85.0, east, apparel),
(60.0, east, home_garden),
# West: strong Home & Garden
(890.0, west, home_garden),
(620.0, west, home_garden),
(450.0, west, home_garden),
(340.0, west, home_garden),
(500.0, west, electronics),
(210.0, west, apparel),
(180.0, west, apparel),
# South: most electronics orders
(750.0, south, electronics),
(680.0, south, electronics),
(590.0, south, electronics),
(520.0, south, electronics),
(480.0, south, electronics),
(300.0, south, apparel),
(250.0, south, home_garden),
# North: balanced mix
(400.0, north, electronics),
(350.0, north, electronics),
(280.0, north, apparel),
(260.0, north, apparel),
(240.0, north, apparel),
(320.0, north, home_garden),
(290.0, north, home_garden),
]
order_props = [{"amount": amt} for amt, _, _ in orders_data]
order_vids = db.bulk_insert_vertices("Order", order_props)
shipped_edges = [(ov, rv, {}) for ov, (_, rv, _) in zip(order_vids, orders_data)]
category_edges = [(ov, cv, {}) for ov, (_, _, cv) in zip(order_vids, orders_data)]
db.bulk_insert_edges("SHIPPED_TO", shipped_edges)
db.bulk_insert_edges("IN_CATEGORY", category_edges)
db.flush()
print("Data ingested")
Data ingested
3. Revenue by Region¶
Total revenue and order count per region, ordered by revenue.
In [5]:
Copied!
query_region = """
MATCH (r:Region)<-[:SHIPPED_TO]-(o:Order)
RETURN r.name AS region, COUNT(o) AS order_count, SUM(o.amount) AS total_revenue
ORDER BY total_revenue DESC
"""
results = db.query(query_region)
print("Revenue by region:")
for r in results:
print(
f" {r['region']:10s}: {r['order_count']:3d} orders, ${r['total_revenue']:8.2f}"
)
assert len(results) == 4, f"Expected 4 regions, got {len(results)}"
query_region = """
MATCH (r:Region)<-[:SHIPPED_TO]-(o:Order)
RETURN r.name AS region, COUNT(o) AS order_count, SUM(o.amount) AS total_revenue
ORDER BY total_revenue DESC
"""
results = db.query(query_region)
print("Revenue by region:")
for r in results:
print(
f" {r['region']:10s}: {r['order_count']:3d} orders, ${r['total_revenue']:8.2f}"
)
assert len(results) == 4, f"Expected 4 regions, got {len(results)}"
Revenue by region: South : 7 orders, $ 3570.00 West : 7 orders, $ 3190.00 East : 6 orders, $ 2895.00 North : 7 orders, $ 2140.00
4. Region × Category Breakdown¶
12-row breakdown showing revenue for every region/category pair.
In [6]:
Copied!
query_breakdown = """
MATCH (r:Region)<-[:SHIPPED_TO]-(o:Order)-[:IN_CATEGORY]->(c:Category)
RETURN r.name AS region, c.name AS category,
COUNT(o) AS orders, SUM(o.amount) AS revenue
ORDER BY region, revenue DESC
"""
results = db.query(query_breakdown)
print("Region x Category breakdown:")
current_region = None
for r in results:
if r["region"] != current_region:
current_region = r["region"]
print(f" {current_region}:")
print(f" {r['category']:15s}: {r['orders']} orders, ${r['revenue']:.2f}")
assert len(results) == 12, (
f"Expected 12 rows (4 regions x 3 categories), got {len(results)}"
)
query_breakdown = """
MATCH (r:Region)<-[:SHIPPED_TO]-(o:Order)-[:IN_CATEGORY]->(c:Category)
RETURN r.name AS region, c.name AS category,
COUNT(o) AS orders, SUM(o.amount) AS revenue
ORDER BY region, revenue DESC
"""
results = db.query(query_breakdown)
print("Region x Category breakdown:")
current_region = None
for r in results:
if r["region"] != current_region:
current_region = r["region"]
print(f" {current_region}:")
print(f" {r['category']:15s}: {r['orders']} orders, ${r['revenue']:.2f}")
assert len(results) == 12, (
f"Expected 12 rows (4 regions x 3 categories), got {len(results)}"
)
Region x Category breakdown:
East:
Electronics : 2 orders, $2180.00
Apparel : 3 orders, $655.00
Home & Garden : 1 orders, $60.00
North:
Apparel : 3 orders, $780.00
Electronics : 2 orders, $750.00
Home & Garden : 2 orders, $610.00
South:
Electronics : 5 orders, $3020.00
Apparel : 1 orders, $300.00
Home & Garden : 1 orders, $250.00
West:
Home & Garden : 4 orders, $2300.00
Electronics : 1 orders, $500.00
Apparel : 2 orders, $390.00
5. Top Orders per Region¶
Highest-value orders in each region.
In [7]:
Copied!
from collections import defaultdict
# Query top 2 orders per region by fetching all and processing
query_top = """
MATCH (r:Region)<-[:SHIPPED_TO]-(o:Order)
RETURN r.name AS region, o.amount AS amount
ORDER BY region, amount DESC
"""
all_orders = db.query(query_top)
# Group by region, take top 2
region_orders = defaultdict(list)
for row in all_orders:
region_orders[row["region"]].append(row["amount"])
print("Top 2 orders per region:")
for region in sorted(region_orders):
top2 = region_orders[region][:2]
print(f" {region}: {[f'${v:.0f}' for v in top2]}")
assert region_orders["East"][0] == 1200.0, "East should lead with $1200"
from collections import defaultdict
# Query top 2 orders per region by fetching all and processing
query_top = """
MATCH (r:Region)<-[:SHIPPED_TO]-(o:Order)
RETURN r.name AS region, o.amount AS amount
ORDER BY region, amount DESC
"""
all_orders = db.query(query_top)
# Group by region, take top 2
region_orders = defaultdict(list)
for row in all_orders:
region_orders[row["region"]].append(row["amount"])
print("Top 2 orders per region:")
for region in sorted(region_orders):
top2 = region_orders[region][:2]
print(f" {region}: {[f'${v:.0f}' for v in top2]}")
assert region_orders["East"][0] == 1200.0, "East should lead with $1200"
Top 2 orders per region: East: ['$1200', '$980'] North: ['$400', '$350'] South: ['$750', '$680'] West: ['$890', '$620']
6. Best Category per Region¶
The highest-revenue category for each region.
In [8]:
Copied!
query_best_cat = """
MATCH (r:Region)<-[:SHIPPED_TO]-(o:Order)-[:IN_CATEGORY]->(c:Category)
RETURN r.name AS region, c.name AS category, SUM(o.amount) AS revenue
ORDER BY region, revenue DESC
"""
results = db.query(query_best_cat)
# Take first (highest revenue) category per region
best = {}
for r in results:
if r["region"] not in best:
best[r["region"]] = (r["category"], r["revenue"])
print("Best category per region:")
for region, (cat, rev) in sorted(best.items()):
print(f" {region:10s}: {cat} (${rev:.2f})")
# Each region should have a different best category
best_cats = [cat for cat, _ in best.values()]
assert len(set(best_cats)) > 1, f"Expected variance across regions, got {best_cats}"
query_best_cat = """
MATCH (r:Region)<-[:SHIPPED_TO]-(o:Order)-[:IN_CATEGORY]->(c:Category)
RETURN r.name AS region, c.name AS category, SUM(o.amount) AS revenue
ORDER BY region, revenue DESC
"""
results = db.query(query_best_cat)
# Take first (highest revenue) category per region
best = {}
for r in results:
if r["region"] not in best:
best[r["region"]] = (r["category"], r["revenue"])
print("Best category per region:")
for region, (cat, rev) in sorted(best.items()):
print(f" {region:10s}: {cat} (${rev:.2f})")
# Each region should have a different best category
best_cats = [cat for cat, _ in best.values()]
assert len(set(best_cats)) > 1, f"Expected variance across regions, got {best_cats}"
Best category per region: East : Electronics ($2180.00) North : Apparel ($780.00) South : Electronics ($3020.00) West : Home & Garden ($2300.00)