Recommendation Engine with uni-pydantic¶
This notebook demonstrates two approaches to recommendation: Collaborative Filtering (Graph) and Vector Similarity (Semantic) using Pydantic models.
In [1]:
Copied!
import os
import shutil
import tempfile
import uni_db
from uni_pydantic import UniNode, UniEdge, UniSession, Field, Relationship, Vector
import os
import shutil
import tempfile
import uni_db
from uni_pydantic import UniNode, UniEdge, UniSession, Field, Relationship, Vector
1. Define Models¶
Users view and purchase products. Products have vector embeddings for semantic similarity.
In [2]:
Copied!
class User(UniNode):
"""A user who views and purchases products."""
__label__ = "User"
name: str
# Relationships
viewed: list["Product"] = Relationship("VIEWED", direction="outgoing")
purchased: list["Product"] = Relationship("PURCHASED", direction="outgoing")
class Product(UniNode):
"""A product with semantic embedding."""
__label__ = "Product"
name: str
price: float
embedding: Vector[4] = Field(metric="cosine") # 4-dim vector with cosine similarity
# Relationships
viewed_by: list[User] = Relationship("VIEWED", direction="incoming")
purchased_by: list[User] = Relationship("PURCHASED", direction="incoming")
class Viewed(UniEdge):
"""Edge representing a user viewing a product."""
__edge_type__ = "VIEWED"
__from__ = User
__to__ = Product
class Purchased(UniEdge):
"""Edge representing a user purchasing a product."""
__edge_type__ = "PURCHASED"
__from__ = User
__to__ = Product
class User(UniNode):
"""A user who views and purchases products."""
__label__ = "User"
name: str
# Relationships
viewed: list["Product"] = Relationship("VIEWED", direction="outgoing")
purchased: list["Product"] = Relationship("PURCHASED", direction="outgoing")
class Product(UniNode):
"""A product with semantic embedding."""
__label__ = "Product"
name: str
price: float
embedding: Vector[4] = Field(metric="cosine") # 4-dim vector with cosine similarity
# Relationships
viewed_by: list[User] = Relationship("VIEWED", direction="incoming")
purchased_by: list[User] = Relationship("PURCHASED", direction="incoming")
class Viewed(UniEdge):
"""Edge representing a user viewing a product."""
__edge_type__ = "VIEWED"
__from__ = User
__to__ = Product
class Purchased(UniEdge):
"""Edge representing a user purchasing a product."""
__edge_type__ = "PURCHASED"
__from__ = User
__to__ = Product
2. Setup Database and Session¶
In [3]:
Copied!
db_path = os.path.join(tempfile.gettempdir(), "recommendation_pydantic_db")
if os.path.exists(db_path):
shutil.rmtree(db_path)
db = uni_db.Database(db_path)
# Create session and register models
session = UniSession(db)
session.register(User, Product, Viewed, Purchased)
session.sync_schema()
print(f"Opened database at {db_path}")
db_path = os.path.join(tempfile.gettempdir(), "recommendation_pydantic_db")
if os.path.exists(db_path):
shutil.rmtree(db_path)
db = uni_db.Database(db_path)
# Create session and register models
session = UniSession(db)
session.register(User, Product, Viewed, Purchased)
session.sync_schema()
print(f"Opened database at {db_path}")
Opened database at /tmp/recommendation_pydantic_db
3. Create Data¶
Create products with embeddings and users with purchase/view history.
In [4]:
Copied!
# Create products with semantic embeddings
# Similar products have similar vectors
running_shoes = Product(
name="Running Shoes",
price=100.0,
embedding=[1.0, 0.0, 0.0, 0.0] # Sports category
)
socks = Product(
name="Socks",
price=10.0,
embedding=[0.9, 0.1, 0.0, 0.0] # Similar to shoes
)
shampoo = Product(
name="Shampoo",
price=5.0,
embedding=[0.0, 1.0, 0.0, 0.0] # Different category
)
# Create users
alice = User(name="Alice")
bob = User(name="Bob")
charlie = User(name="Charlie")
# Add all to session
session.add_all([running_shoes, socks, shampoo, alice, bob, charlie])
session.commit()
print(f"Created products: {running_shoes.name}, {socks.name}, {shampoo.name}")
print(f"Created users: {alice.name}, {bob.name}, {charlie.name}")
# Create products with semantic embeddings
# Similar products have similar vectors
running_shoes = Product(
name="Running Shoes",
price=100.0,
embedding=[1.0, 0.0, 0.0, 0.0] # Sports category
)
socks = Product(
name="Socks",
price=10.0,
embedding=[0.9, 0.1, 0.0, 0.0] # Similar to shoes
)
shampoo = Product(
name="Shampoo",
price=5.0,
embedding=[0.0, 1.0, 0.0, 0.0] # Different category
)
# Create users
alice = User(name="Alice")
bob = User(name="Bob")
charlie = User(name="Charlie")
# Add all to session
session.add_all([running_shoes, socks, shampoo, alice, bob, charlie])
session.commit()
print(f"Created products: {running_shoes.name}, {socks.name}, {shampoo.name}")
print(f"Created users: {alice.name}, {bob.name}, {charlie.name}")
Created products: Running Shoes, Socks, Shampoo Created users: Alice, Bob, Charlie
In [5]:
Copied!
# Purchase history: Alice, Bob, Charlie all bought Running Shoes
session.create_edge(alice, "PURCHASED", running_shoes)
session.create_edge(bob, "PURCHASED", running_shoes)
session.create_edge(charlie, "PURCHASED", running_shoes)
# View history: Alice viewed Socks and Shampoo
session.create_edge(alice, "VIEWED", socks)
session.create_edge(alice, "VIEWED", shampoo)
session.commit()
print("Created purchase and view relationships")
# Purchase history: Alice, Bob, Charlie all bought Running Shoes
session.create_edge(alice, "PURCHASED", running_shoes)
session.create_edge(bob, "PURCHASED", running_shoes)
session.create_edge(charlie, "PURCHASED", running_shoes)
# View history: Alice viewed Socks and Shampoo
session.create_edge(alice, "VIEWED", socks)
session.create_edge(alice, "VIEWED", shampoo)
session.commit()
print("Created purchase and view relationships")
Created purchase and view relationships
4. Collaborative Filtering¶
Who else bought what Alice bought? Using graph traversal for recommendations.
In [6]:
Copied!
# Find users with similar purchase history to Alice
query = """
MATCH (alice:User {name: 'Alice'})-[:PURCHASED]->(p:Product)<-[:PURCHASED]-(other:User)
WHERE other._vid <> alice._vid
RETURN DISTINCT other.name as name
"""
results = session.cypher(query)
print("Users with similar purchase history to Alice:")
for r in results:
print(f" - {r['name']}")
# Find users with similar purchase history to Alice
query = """
MATCH (alice:User {name: 'Alice'})-[:PURCHASED]->(p:Product)<-[:PURCHASED]-(other:User)
WHERE other._vid <> alice._vid
RETURN DISTINCT other.name as name
"""
results = session.cypher(query)
print("Users with similar purchase history to Alice:")
for r in results:
print(f" - {r['name']}")
Users with similar purchase history to Alice: - Bob - Charlie
5. Vector-Based Recommendation¶
Find products semantically similar to what Alice viewed using vector similarity.
In [7]:
Copied!
# Get embeddings of products Alice viewed
res = session.cypher(
"MATCH (u:User {name: 'Alice'})-[:VIEWED]->(p:Product) RETURN p.embedding as emb, p.name as name"
)
for row in res:
emb = row["emb"]
viewed_name = row["name"]
print(f"Finding products similar to '{viewed_name}'...")
# Find similar products using vector similarity
query_sim = """
MATCH (p:Product)
WHERE vector_similarity(p.embedding, $emb) > 0.8
RETURN p.name as name, p.price as price
"""
sim_products = session.cypher(query_sim, {"emb": emb})
for p in sim_products:
print(f" -> {p['name']} (${p['price']})")
# Get embeddings of products Alice viewed
res = session.cypher(
"MATCH (u:User {name: 'Alice'})-[:VIEWED]->(p:Product) RETURN p.embedding as emb, p.name as name"
)
for row in res:
emb = row["emb"]
viewed_name = row["name"]
print(f"Finding products similar to '{viewed_name}'...")
# Find similar products using vector similarity
query_sim = """
MATCH (p:Product)
WHERE vector_similarity(p.embedding, $emb) > 0.8
RETURN p.name as name, p.price as price
"""
sim_products = session.cypher(query_sim, {"emb": emb})
for p in sim_products:
print(f" -> {p['name']} (${p['price']})")
Finding products similar to 'Socks'... -> Socks ($10.0) -> Running Shoes ($100.0) Finding products similar to 'Shampoo'... -> Shampoo ($5.0)
DEBUG 2: DataFusion execution failed (falling back to execute_subplan): Schema error: No field named "p.name". Valid fields are "p._vid", p, "p._score". DEBUG 2: DataFusion execution failed (falling back to execute_subplan): Schema error: No field named "p.name". Valid fields are "p._vid", p, "p._score".
6. Query Builder Demo¶
Using the type-safe query builder to find products.
In [8]:
Copied!
# Find all products under $50 using the query builder
affordable_products = (
session.query(Product)
.filter(Product.price < 50.0)
.order_by(Product.price)
.all()
)
print("Affordable products (under $50):")
for product in affordable_products:
print(f" - {product.name}: ${product.price}")
# Find all products under $50 using the query builder
affordable_products = (
session.query(Product)
.filter(Product.price < 50.0)
.order_by(Product.price)
.all()
)
print("Affordable products (under $50):")
for product in affordable_products:
print(f" - {product.name}: ${product.price}")
Affordable products (under $50): - Shampoo: $5.0 - Socks: $10.0
DEBUG 2: DataFusion execution failed (falling back to execute_subplan): Error during planning: UDF 'properties' is not registered. Register it via SessionContext.