Recommendation Engine with uni-pydantic¶
Collaborative filtering via graph traversal combined with semantic vector search for book recommendations.
import os
import shutil
import tempfile
import uni_db
from uni_pydantic import UniNode, UniEdge, UniSession, Field, Relationship, Vector
1. Define Models¶
Books with 4D semantic embeddings; users linked via PURCHASED edges.
class User(UniNode):
"""A user who purchases books."""
__label__ = "User"
name: str
# Relationships
purchased: list["Book"] = Relationship("PURCHASED", direction="outgoing")
class Book(UniNode):
"""A book with semantic embedding."""
__label__ = "Book"
name: str
genre: str
embedding: Vector[4] = Field(metric="l2") # 4D: [tech, fiction, history, science]
# Relationships
purchased_by: list[User] = Relationship("PURCHASED", direction="incoming")
class Purchased(UniEdge):
"""Edge representing a user purchasing a book."""
__edge_type__ = "PURCHASED"
__from__ = User
__to__ = Book
2. Setup Database and Session¶
db_path = os.path.join(tempfile.gettempdir(), "recommendation_pydantic_db")
if os.path.exists(db_path):
shutil.rmtree(db_path)
db = uni_db.Uni.open(db_path)
# Create session and register models
session = UniSession(db)
session.register(User, Book, Purchased)
session.sync_schema()
print(f"Opened database at {db_path}")
Opened database at /tmp/recommendation_pydantic_db
3. Create Data¶
6 books in 3 genre clusters, 4 users with purchase history.
# 4D embeddings: [tech, fiction, history, science]
clean_code = Book(name="Clean Code", genre="tech", embedding=[0.95, 0.05, 0.0, 0.0])
pragmatic = Book(
name="The Pragmatic Programmer", genre="tech", embedding=[0.90, 0.10, 0.0, 0.0]
)
ddia = Book(
name="Designing Data-Intensive Apps", genre="tech", embedding=[0.85, 0.0, 0.0, 0.15]
)
dune = Book(name="Dune", genre="fiction", embedding=[0.0, 0.95, 0.0, 0.05])
foundation = Book(name="Foundation", genre="fiction", embedding=[0.0, 0.85, 0.0, 0.15])
sapiens = Book(name="Sapiens", genre="history", embedding=[0.0, 0.05, 0.7, 0.25])
alice = User(name="Alice")
bob = User(name="Bob")
carol = User(name="Carol")
dave = User(name="Dave")
session.add_all(
[clean_code, pragmatic, ddia, dune, foundation, sapiens, alice, bob, carol, dave]
)
session.commit()
print("Data ingested")
Data ingested
# Purchase history
session.create_edge(alice, "PURCHASED", clean_code)
session.create_edge(alice, "PURCHASED", pragmatic)
session.create_edge(bob, "PURCHASED", clean_code)
session.create_edge(bob, "PURCHASED", dune)
session.create_edge(carol, "PURCHASED", pragmatic)
session.create_edge(carol, "PURCHASED", foundation)
session.create_edge(dave, "PURCHASED", dune)
session.create_edge(dave, "PURCHASED", foundation)
session.create_edge(dave, "PURCHASED", sapiens)
session.commit()
print("Purchase edges created")
Purchase edges created
4. Collaborative Filtering¶
Books that users-who-bought-Alice's-books also bought (that Alice hasn't read).
query_collab = """
MATCH (alice:User {name: 'Alice'})-[:PURCHASED]->(b:Book)<-[:PURCHASED]-(other:User)
WHERE other._vid <> alice._vid
MATCH (other)-[:PURCHASED]->(rec:Book)
WHERE NOT (alice)-[:PURCHASED]->(rec)
RETURN rec.name AS recommendation, COUNT(DISTINCT other) AS buyers
ORDER BY buyers DESC
"""
results = session.cypher(query_collab)
print("Collaborative recommendations for Alice:")
for r in results:
print(f" {r['recommendation']} (bought by {r['buyers']} similar user(s))")
Collaborative recommendations for Alice:
Foundation (bought by 1 similar user(s))
Dune (bought by 1 similar user(s))
5. Semantic Vector Search¶
Find the 3 books most similar to a 'tech' query vector.
tech_query = [0.95, 0.05, 0.0, 0.0]
query_vec = """
CALL uni.vector.query('Book', 'embedding', $vec, 3)
YIELD node, distance
RETURN node.name AS title, node.genre AS genre, distance
ORDER BY distance
"""
results = session.cypher(query_vec, {"vec": tech_query})
print("Top 3 books semantically similar to tech query:")
for r in results:
print(f" [{r['distance']:.4f}] {r['title']} ({r['genre']})")
genres = [r["genre"] for r in results]
assert all(g == "tech" for g in genres), f"Expected all tech, got {genres}"
Top 3 books semantically similar to tech query:
[0.0000] Clean Code (tech)
[0.0050] The Pragmatic Programmer (tech)
[0.0350] Designing Data-Intensive Apps (tech)
6. Hybrid: Vector + Graph¶
Vector search for fiction books, then find which users bought them.
fiction_query = [0.0, 0.95, 0.0, 0.05]
query_hybrid = """
CALL uni.vector.query('Book', 'embedding', $vec, 3)
YIELD node, distance
MATCH (u:User)-[:PURCHASED]->(node)
RETURN node.name AS book, u.name AS buyer, distance
ORDER BY distance, buyer
"""
results = session.cypher(query_hybrid, {"vec": fiction_query})
print("Fiction book buyers (via vector + graph):")
for r in results:
print(f" {r['buyer']} bought '{r['book']}' (distance={r['distance']:.4f})")
Fiction book buyers (via vector + graph):
Bob bought 'Dune' (distance=0.0000)
Dave bought 'Dune' (distance=0.0000)
Carol bought 'Foundation' (distance=0.0200)
Dave bought 'Foundation' (distance=0.0200)
Dave bought 'Sapiens' (distance=1.3400)
7. Discovery: Popular Books Alice Hasn't Read¶
Books Alice hasn't bought, ranked by how many users bought them.
query_discovery = """
MATCH (alice:User {name: 'Alice'})
MATCH (u:User)-[:PURCHASED]->(b:Book)
WHERE NOT (alice)-[:PURCHASED]->(b) AND u._vid <> alice._vid
RETURN b.name AS book, COUNT(DISTINCT u) AS buyers
ORDER BY buyers DESC
"""
results = session.cypher(query_discovery)
print("Popular books Alice has not read:")
for r in results:
print(f" {r['book']}: {r['buyers']} buyer(s)")
Popular books Alice has not read:
Dune: 2 buyer(s)
Foundation: 2 buyer(s)
Sapiens: 1 buyer(s)
8. Query Builder Demo¶
Using the type-safe query builder to browse books by genre.
# Find all tech books using query builder
tech_books = session.query(Book).filter(Book.genre == "tech").all()
print("Tech books:")
for book in tech_books:
print(f" - {book.name}")
Tech books:
- Clean Code
- The Pragmatic Programmer
- Designing Data-Intensive Apps