Recommendation Engine¶
This notebook demonstrates two approaches to recommendation: Collaborative Filtering (Graph) and Vector Similarity (Semantic).
In [1]:
Copied!
import os
import shutil
import tempfile
import uni_db
import os
import shutil
import tempfile
import uni_db
In [2]:
Copied!
db_path = os.path.join(tempfile.gettempdir(), "recommendation_db")
if os.path.exists(db_path):
shutil.rmtree(db_path)
db = uni_db.Database(db_path)
print(f"Opened database at {db_path}")
db_path = os.path.join(tempfile.gettempdir(), "recommendation_db")
if os.path.exists(db_path):
shutil.rmtree(db_path)
db = uni_db.Database(db_path)
print(f"Opened database at {db_path}")
Opened database at /tmp/recommendation_db
1. Schema¶
Users view and purchase products. Products have vector embeddings.
In [3]:
Copied!
db.create_label("User")
db.create_label("Product")
db.create_edge_type("VIEWED", ["User"], ["Product"])
db.create_edge_type("PURCHASED", ["User"], ["Product"])
db.add_property("User", "name", "string", False)
db.add_property("Product", "name", "string", False)
db.add_property("Product", "price", "float64", False)
db.add_property("Product", "embedding", "vector:4", False)
db.create_vector_index("Product", "embedding", "cosine")
db.create_label("User")
db.create_label("Product")
db.create_edge_type("VIEWED", ["User"], ["Product"])
db.create_edge_type("PURCHASED", ["User"], ["Product"])
db.add_property("User", "name", "string", False)
db.add_property("Product", "name", "string", False)
db.add_property("Product", "price", "float64", False)
db.add_property("Product", "embedding", "vector:4", False)
db.create_vector_index("Product", "embedding", "cosine")
2. Ingest Data¶
In [4]:
Copied!
p1_vec = [1.0, 0.0, 0.0, 0.0]
p2_vec = [0.9, 0.1, 0.0, 0.0]
p3_vec = [0.0, 1.0, 0.0, 0.0]
# Using single quotes for inner dictionary keys to avoid escape hell
vids = db.bulk_insert_vertices('Product', [
{'name': 'Running Shoes', 'price': 100.0, 'embedding': p1_vec},
{'name': 'Socks', 'price': 10.0, 'embedding': p2_vec},
{'name': 'Shampoo', 'price': 5.0, 'embedding': p3_vec}
])
p1, p2, p3 = vids
u_vids = db.bulk_insert_vertices('User', [{'name': 'Alice'}, {'name': 'Bob'}, {'name': 'Charlie'}])
u1, u2, u3 = u_vids
# Purchase History: Alice, Bob, Charlie all bought Shoes
db.bulk_insert_edges('PURCHASED', [(u1, p1, {}), (u2, p1, {}), (u3, p1, {})])
# View History: Alice viewed Socks (similar to Shoes) and Shampoo (different)
db.bulk_insert_edges('VIEWED', [(u1, p2, {}), (u1, p3, {})])
db.flush()
p1_vec = [1.0, 0.0, 0.0, 0.0]
p2_vec = [0.9, 0.1, 0.0, 0.0]
p3_vec = [0.0, 1.0, 0.0, 0.0]
# Using single quotes for inner dictionary keys to avoid escape hell
vids = db.bulk_insert_vertices('Product', [
{'name': 'Running Shoes', 'price': 100.0, 'embedding': p1_vec},
{'name': 'Socks', 'price': 10.0, 'embedding': p2_vec},
{'name': 'Shampoo', 'price': 5.0, 'embedding': p3_vec}
])
p1, p2, p3 = vids
u_vids = db.bulk_insert_vertices('User', [{'name': 'Alice'}, {'name': 'Bob'}, {'name': 'Charlie'}])
u1, u2, u3 = u_vids
# Purchase History: Alice, Bob, Charlie all bought Shoes
db.bulk_insert_edges('PURCHASED', [(u1, p1, {}), (u2, p1, {}), (u3, p1, {})])
# View History: Alice viewed Socks (similar to Shoes) and Shampoo (different)
db.bulk_insert_edges('VIEWED', [(u1, p2, {}), (u1, p3, {})])
db.flush()
3. Collaborative Filtering¶
Who else bought what Alice bought?
In [5]:
Copied!
query = "MATCH (u1:User {name: 'Alice'})-[:PURCHASED]->(p:Product)<-[:PURCHASED]-(other:User) WHERE other._vid <> u1._vid RETURN count(DISTINCT other) as count"
results = db.query(query)
print(f"Users with similar purchase history: {results[0]['count']}")
query = "MATCH (u1:User {name: 'Alice'})-[:PURCHASED]->(p:Product)<-[:PURCHASED]-(other:User) WHERE other._vid <> u1._vid RETURN count(DISTINCT other) as count"
results = db.query(query)
print(f"Users with similar purchase history: {results[0]['count']}")
Users with similar purchase history: 1
4. Vector-Based Recommendation¶
Find products semantically similar to what Alice viewed.
In [6]:
Copied!
# First, get embeddings of products Alice viewed
res = db.query("MATCH (u:User {name: 'Alice'})-[:VIEWED]->(p:Product) RETURN p.embedding as emb, p.name as name")
for row in res:
emb = row['emb']
viewed_name = row['name']
print(f"Finding products similar to {viewed_name}...")
# Find similar products
query_sim = "MATCH (p:Product) WHERE vector_similarity(p.embedding, $emb) > 0.8 RETURN p.name as name"
sim_products = db.query(query_sim, {"emb": emb})
names = [r['name'] for r in sim_products]
print(f" -> Found: {names}")
# First, get embeddings of products Alice viewed
res = db.query("MATCH (u:User {name: 'Alice'})-[:VIEWED]->(p:Product) RETURN p.embedding as emb, p.name as name")
for row in res:
emb = row['emb']
viewed_name = row['name']
print(f"Finding products similar to {viewed_name}...")
# Find similar products
query_sim = "MATCH (p:Product) WHERE vector_similarity(p.embedding, $emb) > 0.8 RETURN p.name as name"
sim_products = db.query(query_sim, {"emb": emb})
names = [r['name'] for r in sim_products]
print(f" -> Found: {names}")
Finding products similar to Socks... -> Found: ['Socks', 'Running Shoes'] Finding products similar to Shampoo... -> Found: ['Shampoo']
DEBUG 2: DataFusion execution failed (falling back to execute_subplan): Schema error: No field named "p.name". Valid fields are "p._vid", p, "p._score". DEBUG 2: DataFusion execution failed (falling back to execute_subplan): Schema error: No field named "p.name". Valid fields are "p._vid", p, "p._score".