Regional Sales Analytics with uni-pydantic¶
Combining graph traversal with columnar aggregation across multiple regions and product categories.
In [1]:
Copied!
import os
import shutil
import tempfile
import uni_db
from uni_pydantic import Field, Relationship, UniEdge, UniNode, UniSession
import os
import shutil
import tempfile
import uni_db
from uni_pydantic import Field, Relationship, UniEdge, UniNode, UniSession
1. Define Models¶
Regions, Categories, and Orders with two edge types.
In [2]:
Copied!
class Region(UniNode):
"""A geographic region for sales tracking."""
__label__ = "Region"
name: str = Field(index="btree")
# Relationships
orders: list["Order"] = Relationship("SHIPPED_TO", direction="incoming")
class Category(UniNode):
"""A product category."""
__label__ = "Category"
name: str
# Relationships
orders: list["Order"] = Relationship("IN_CATEGORY", direction="incoming")
class Order(UniNode):
"""A sales order."""
__label__ = "Order"
amount: float
# Relationships
region: "Region | None" = Relationship("SHIPPED_TO", direction="outgoing")
category: "Category | None" = Relationship("IN_CATEGORY", direction="outgoing")
class ShippedTo(UniEdge):
"""Edge representing order shipped to region."""
__edge_type__ = "SHIPPED_TO"
__from__ = Order
__to__ = Region
class InCategory(UniEdge):
"""Edge representing order in a product category."""
__edge_type__ = "IN_CATEGORY"
__from__ = Order
__to__ = Category
class Region(UniNode):
"""A geographic region for sales tracking."""
__label__ = "Region"
name: str = Field(index="btree")
# Relationships
orders: list["Order"] = Relationship("SHIPPED_TO", direction="incoming")
class Category(UniNode):
"""A product category."""
__label__ = "Category"
name: str
# Relationships
orders: list["Order"] = Relationship("IN_CATEGORY", direction="incoming")
class Order(UniNode):
"""A sales order."""
__label__ = "Order"
amount: float
# Relationships
region: "Region | None" = Relationship("SHIPPED_TO", direction="outgoing")
category: "Category | None" = Relationship("IN_CATEGORY", direction="outgoing")
class ShippedTo(UniEdge):
"""Edge representing order shipped to region."""
__edge_type__ = "SHIPPED_TO"
__from__ = Order
__to__ = Region
class InCategory(UniEdge):
"""Edge representing order in a product category."""
__edge_type__ = "IN_CATEGORY"
__from__ = Order
__to__ = Category
2. Setup Database and Session¶
In [3]:
Copied!
db_path = os.path.join(tempfile.gettempdir(), "sales_pydantic_db")
if os.path.exists(db_path):
shutil.rmtree(db_path)
db = uni_db.Database(db_path)
# Create session and register models
session = UniSession(db)
session.register(Region, Category, Order, ShippedTo, InCategory)
session.sync_schema()
print(f"Opened database at {db_path}")
db_path = os.path.join(tempfile.gettempdir(), "sales_pydantic_db")
if os.path.exists(db_path):
shutil.rmtree(db_path)
db = uni_db.Database(db_path)
# Create session and register models
session = UniSession(db)
session.register(Region, Category, Order, ShippedTo, InCategory)
session.sync_schema()
print(f"Opened database at {db_path}")
Opened database at /tmp/sales_pydantic_db
3. Create Data¶
4 regions, 3 categories, 27 orders distributed non-uniformly.
In [4]:
Copied!
# Regions
north = Region(name="North")
south = Region(name="South")
east = Region(name="East")
west = Region(name="West")
# Categories
electronics = Category(name="Electronics")
apparel = Category(name="Apparel")
home_garden = Category(name="Home & Garden")
session.add_all([north, south, east, west, electronics, apparel, home_garden])
session.commit()
print("Regions and categories created")
# Regions
north = Region(name="North")
south = Region(name="South")
east = Region(name="East")
west = Region(name="West")
# Categories
electronics = Category(name="Electronics")
apparel = Category(name="Apparel")
home_garden = Category(name="Home & Garden")
session.add_all([north, south, east, west, electronics, apparel, home_garden])
session.commit()
print("Regions and categories created")
Regions and categories created
In [5]:
Copied!
# Orders: (amount, region_obj, category_obj)
orders_data = [
# East: high-value electronics
(1200.0, east, electronics),
(980.0, east, electronics),
(450.0, east, apparel),
(120.0, east, apparel),
(85.0, east, apparel),
(60.0, east, home_garden),
# West: strong Home & Garden
(890.0, west, home_garden),
(620.0, west, home_garden),
(450.0, west, home_garden),
(340.0, west, home_garden),
(500.0, west, electronics),
(210.0, west, apparel),
(180.0, west, apparel),
# South: most electronics orders
(750.0, south, electronics),
(680.0, south, electronics),
(590.0, south, electronics),
(520.0, south, electronics),
(480.0, south, electronics),
(300.0, south, apparel),
(250.0, south, home_garden),
# North: balanced mix
(400.0, north, electronics),
(350.0, north, electronics),
(280.0, north, apparel),
(260.0, north, apparel),
(240.0, north, apparel),
(320.0, north, home_garden),
(290.0, north, home_garden),
]
order_objects = [Order(amount=amt) for amt, _, _ in orders_data]
session.add_all(order_objects)
session.commit()
for order_obj, (amt, region_obj, cat_obj) in zip(order_objects, orders_data):
session.create_edge(order_obj, "SHIPPED_TO", region_obj)
session.create_edge(order_obj, "IN_CATEGORY", cat_obj)
session.commit()
print("Data ingested")
# Orders: (amount, region_obj, category_obj)
orders_data = [
# East: high-value electronics
(1200.0, east, electronics),
(980.0, east, electronics),
(450.0, east, apparel),
(120.0, east, apparel),
(85.0, east, apparel),
(60.0, east, home_garden),
# West: strong Home & Garden
(890.0, west, home_garden),
(620.0, west, home_garden),
(450.0, west, home_garden),
(340.0, west, home_garden),
(500.0, west, electronics),
(210.0, west, apparel),
(180.0, west, apparel),
# South: most electronics orders
(750.0, south, electronics),
(680.0, south, electronics),
(590.0, south, electronics),
(520.0, south, electronics),
(480.0, south, electronics),
(300.0, south, apparel),
(250.0, south, home_garden),
# North: balanced mix
(400.0, north, electronics),
(350.0, north, electronics),
(280.0, north, apparel),
(260.0, north, apparel),
(240.0, north, apparel),
(320.0, north, home_garden),
(290.0, north, home_garden),
]
order_objects = [Order(amount=amt) for amt, _, _ in orders_data]
session.add_all(order_objects)
session.commit()
for order_obj, (amt, region_obj, cat_obj) in zip(order_objects, orders_data):
session.create_edge(order_obj, "SHIPPED_TO", region_obj)
session.create_edge(order_obj, "IN_CATEGORY", cat_obj)
session.commit()
print("Data ingested")
Data ingested
4. Revenue by Region¶
Total revenue and order count per region, ordered by revenue.
In [6]:
Copied!
query_region = """
MATCH (r:Region)<-[:SHIPPED_TO]-(o:Order)
RETURN r.name AS region, COUNT(o) AS order_count, SUM(o.amount) AS total_revenue
ORDER BY total_revenue DESC
"""
results = session.cypher(query_region)
print("Revenue by region:")
for r in results:
print(
f" {r['region']:10s}: {r['order_count']:3d} orders, ${r['total_revenue']:8.2f}"
)
assert len(results) == 4, f"Expected 4 regions, got {len(results)}"
query_region = """
MATCH (r:Region)<-[:SHIPPED_TO]-(o:Order)
RETURN r.name AS region, COUNT(o) AS order_count, SUM(o.amount) AS total_revenue
ORDER BY total_revenue DESC
"""
results = session.cypher(query_region)
print("Revenue by region:")
for r in results:
print(
f" {r['region']:10s}: {r['order_count']:3d} orders, ${r['total_revenue']:8.2f}"
)
assert len(results) == 4, f"Expected 4 regions, got {len(results)}"
Revenue by region: South : 7 orders, $ 3570.00 West : 7 orders, $ 3190.00 East : 6 orders, $ 2895.00 North : 7 orders, $ 2140.00
5. Region × Category Breakdown¶
12-row breakdown showing revenue for every region/category pair.
In [7]:
Copied!
query_breakdown = """
MATCH (r:Region)<-[:SHIPPED_TO]-(o:Order)-[:IN_CATEGORY]->(c:Category)
RETURN r.name AS region, c.name AS category,
COUNT(o) AS orders, SUM(o.amount) AS revenue
ORDER BY region, revenue DESC
"""
results = session.cypher(query_breakdown)
print("Region x Category breakdown:")
current_region = None
for r in results:
if r["region"] != current_region:
current_region = r["region"]
print(f" {current_region}:")
print(f" {r['category']:15s}: {r['orders']} orders, ${r['revenue']:.2f}")
assert len(results) == 12, f"Expected 12 rows (4x3), got {len(results)}"
query_breakdown = """
MATCH (r:Region)<-[:SHIPPED_TO]-(o:Order)-[:IN_CATEGORY]->(c:Category)
RETURN r.name AS region, c.name AS category,
COUNT(o) AS orders, SUM(o.amount) AS revenue
ORDER BY region, revenue DESC
"""
results = session.cypher(query_breakdown)
print("Region x Category breakdown:")
current_region = None
for r in results:
if r["region"] != current_region:
current_region = r["region"]
print(f" {current_region}:")
print(f" {r['category']:15s}: {r['orders']} orders, ${r['revenue']:.2f}")
assert len(results) == 12, f"Expected 12 rows (4x3), got {len(results)}"
Region x Category breakdown:
East:
Electronics : 2 orders, $2180.00
Apparel : 3 orders, $655.00
Home & Garden : 1 orders, $60.00
North:
Apparel : 3 orders, $780.00
Electronics : 2 orders, $750.00
Home & Garden : 2 orders, $610.00
South:
Electronics : 5 orders, $3020.00
Apparel : 1 orders, $300.00
Home & Garden : 1 orders, $250.00
West:
Home & Garden : 4 orders, $2300.00
Electronics : 1 orders, $500.00
Apparel : 2 orders, $390.00
6. Top Orders per Region¶
Highest-value orders in each region.
In [8]:
Copied!
from collections import defaultdict
query_top = """
MATCH (r:Region)<-[:SHIPPED_TO]-(o:Order)
RETURN r.name AS region, o.amount AS amount
ORDER BY region, amount DESC
"""
all_orders = session.cypher(query_top)
region_orders = defaultdict(list)
for row in all_orders:
region_orders[row["region"]].append(row["amount"])
print("Top 2 orders per region:")
for region in sorted(region_orders):
top2 = region_orders[region][:2]
print(f" {region}: {[f'${v:.0f}' for v in top2]}")
assert region_orders["East"][0] == 1200.0, "East should lead with $1200"
from collections import defaultdict
query_top = """
MATCH (r:Region)<-[:SHIPPED_TO]-(o:Order)
RETURN r.name AS region, o.amount AS amount
ORDER BY region, amount DESC
"""
all_orders = session.cypher(query_top)
region_orders = defaultdict(list)
for row in all_orders:
region_orders[row["region"]].append(row["amount"])
print("Top 2 orders per region:")
for region in sorted(region_orders):
top2 = region_orders[region][:2]
print(f" {region}: {[f'${v:.0f}' for v in top2]}")
assert region_orders["East"][0] == 1200.0, "East should lead with $1200"
Top 2 orders per region: East: ['$1200', '$980'] North: ['$400', '$350'] South: ['$750', '$680'] West: ['$890', '$620']
7. Best Category per Region¶
The highest-revenue category for each region.
In [9]:
Copied!
query_best_cat = """
MATCH (r:Region)<-[:SHIPPED_TO]-(o:Order)-[:IN_CATEGORY]->(c:Category)
RETURN r.name AS region, c.name AS category, SUM(o.amount) AS revenue
ORDER BY region, revenue DESC
"""
results = session.cypher(query_best_cat)
best = {}
for r in results:
if r["region"] not in best:
best[r["region"]] = (r["category"], r["revenue"])
print("Best category per region:")
for region, (cat, rev) in sorted(best.items()):
print(f" {region:10s}: {cat} (${rev:.2f})")
best_cats = [cat for cat, _ in best.values()]
assert len(set(best_cats)) > 1, f"Expected variance across regions, got {best_cats}"
query_best_cat = """
MATCH (r:Region)<-[:SHIPPED_TO]-(o:Order)-[:IN_CATEGORY]->(c:Category)
RETURN r.name AS region, c.name AS category, SUM(o.amount) AS revenue
ORDER BY region, revenue DESC
"""
results = session.cypher(query_best_cat)
best = {}
for r in results:
if r["region"] not in best:
best[r["region"]] = (r["category"], r["revenue"])
print("Best category per region:")
for region, (cat, rev) in sorted(best.items()):
print(f" {region:10s}: {cat} (${rev:.2f})")
best_cats = [cat for cat, _ in best.values()]
assert len(set(best_cats)) > 1, f"Expected variance across regions, got {best_cats}"
Best category per region: East : Electronics ($2180.00) North : Apparel ($780.00) South : Electronics ($3020.00) West : Home & Garden ($2300.00)
8. Query Builder Demo¶
Using the type-safe query builder for high-value order analysis.
In [10]:
Copied!
# Find high-value orders using query builder
high_value_orders = (
session.query(Order)
.filter(Order.amount >= 500.0)
.order_by(Order.amount, descending=True)
.limit(5)
.all()
)
print("Top 5 High-Value Orders (>=$500):")
for i, order in enumerate(high_value_orders, 1):
print(f" {i}. ${order.amount:,.2f}")
# Count orders by value range
small = session.query(Order).filter(Order.amount < 100).count()
medium = (
session.query(Order).filter(Order.amount >= 100).filter(Order.amount < 500).count()
)
large = session.query(Order).filter(Order.amount >= 500).count()
print("Order Distribution:")
print(f" Small (<$100): {small}")
print(f" Medium ($100-499): {medium}")
print(f" Large (>=$500): {large}")
# Find high-value orders using query builder
high_value_orders = (
session.query(Order)
.filter(Order.amount >= 500.0)
.order_by(Order.amount, descending=True)
.limit(5)
.all()
)
print("Top 5 High-Value Orders (>=$500):")
for i, order in enumerate(high_value_orders, 1):
print(f" {i}. ${order.amount:,.2f}")
# Count orders by value range
small = session.query(Order).filter(Order.amount < 100).count()
medium = (
session.query(Order).filter(Order.amount >= 100).filter(Order.amount < 500).count()
)
large = session.query(Order).filter(Order.amount >= 500).count()
print("Order Distribution:")
print(f" Small (<$100): {small}")
print(f" Medium ($100-499): {medium}")
print(f" Large (>=$500): {large}")
Top 5 High-Value Orders (>=$500): 1. $1,200.00 2. $980.00 3. $890.00 4. $750.00 5. $680.00
Order Distribution: Small (<$100): 2 Medium ($100-499): 16 Large (>=$500): 9