Recommendation Engine¶

This notebook demonstrates two approaches to recommendation: Collaborative Filtering (Graph) and Vector Similarity (Semantic).

In [1]:

Copied!

import os
import shutil
import tempfile

import uni_db
import os
import shutil
import tempfile

import uni_db

In [2]:

Copied!





db_path = os.path.join(tempfile.gettempdir(), "recommendation_db")
if os.path.exists(db_path):
    shutil.rmtree(db_path)
db = uni_db.Database(db_path)
print(f"Opened database at {db_path}")
db_path = os.path.join(tempfile.gettempdir(), "recommendation_db")
if os.path.exists(db_path):
    shutil.rmtree(db_path)
db = uni_db.Database(db_path)
print(f"Opened database at {db_path}")

Opened database at /tmp/recommendation_db

1. Schema¶

Users view and purchase products. Products have vector embeddings.

In [3]:

Copied!





db.create_label("User")
db.create_label("Product")

db.create_edge_type("VIEWED", ["User"], ["Product"])
db.create_edge_type("PURCHASED", ["User"], ["Product"])

db.add_property("User", "name", "string", False)
db.add_property("Product", "name", "string", False)
db.add_property("Product", "price", "float64", False)
db.add_property("Product", "embedding", "vector:4", False)

db.create_vector_index("Product", "embedding", "cosine")
db.create_label("User")
db.create_label("Product")

db.create_edge_type("VIEWED", ["User"], ["Product"])
db.create_edge_type("PURCHASED", ["User"], ["Product"])

db.add_property("User", "name", "string", False)
db.add_property("Product", "name", "string", False)
db.add_property("Product", "price", "float64", False)
db.add_property("Product", "embedding", "vector:4", False)

db.create_vector_index("Product", "embedding", "cosine")

2. Ingest Data¶

In [4]:

Copied!





p1_vec = [1.0, 0.0, 0.0, 0.0]
p2_vec = [0.9, 0.1, 0.0, 0.0]
p3_vec = [0.0, 1.0, 0.0, 0.0]

# Using single quotes for inner dictionary keys to avoid escape hell
vids = db.bulk_insert_vertices('Product', [
    {'name': 'Running Shoes', 'price': 100.0, 'embedding': p1_vec},
    {'name': 'Socks', 'price': 10.0, 'embedding': p2_vec},
    {'name': 'Shampoo', 'price': 5.0, 'embedding': p3_vec}
]) 
p1, p2, p3 = vids

u_vids = db.bulk_insert_vertices('User', [{'name': 'Alice'}, {'name': 'Bob'}, {'name': 'Charlie'}])
u1, u2, u3 = u_vids

# Purchase History: Alice, Bob, Charlie all bought Shoes
db.bulk_insert_edges('PURCHASED', [(u1, p1, {}), (u2, p1, {}), (u3, p1, {})])

# View History: Alice viewed Socks (similar to Shoes) and Shampoo (different)
db.bulk_insert_edges('VIEWED', [(u1, p2, {}), (u1, p3, {})])

db.flush()
p1_vec = [1.0, 0.0, 0.0, 0.0]
p2_vec = [0.9, 0.1, 0.0, 0.0]
p3_vec = [0.0, 1.0, 0.0, 0.0]

# Using single quotes for inner dictionary keys to avoid escape hell
vids = db.bulk_insert_vertices('Product', [
    {'name': 'Running Shoes', 'price': 100.0, 'embedding': p1_vec},
    {'name': 'Socks', 'price': 10.0, 'embedding': p2_vec},
    {'name': 'Shampoo', 'price': 5.0, 'embedding': p3_vec}
]) 
p1, p2, p3 = vids

u_vids = db.bulk_insert_vertices('User', [{'name': 'Alice'}, {'name': 'Bob'}, {'name': 'Charlie'}])
u1, u2, u3 = u_vids

# Purchase History: Alice, Bob, Charlie all bought Shoes
db.bulk_insert_edges('PURCHASED', [(u1, p1, {}), (u2, p1, {}), (u3, p1, {})])

# View History: Alice viewed Socks (similar to Shoes) and Shampoo (different)
db.bulk_insert_edges('VIEWED', [(u1, p2, {}), (u1, p3, {})])

db.flush()

3. Collaborative Filtering¶

Who else bought what Alice bought?

In [5]:

Copied!

query = "MATCH (u1:User {name: 'Alice'})-[:PURCHASED]->(p:Product)<-[:PURCHASED]-(other:User) WHERE other._vid <> u1._vid RETURN count(DISTINCT other) as count"
results = db.query(query)
print(f"Users with similar purchase history: {results[0]['count']}")
query = "MATCH (u1:User {name: 'Alice'})-[:PURCHASED]->(p:Product)<-[:PURCHASED]-(other:User) WHERE other._vid <> u1._vid RETURN count(DISTINCT other) as count"
results = db.query(query)
print(f"Users with similar purchase history: {results[0]['count']}")

Users with similar purchase history: 1

4. Vector-Based Recommendation¶

Find products semantically similar to what Alice viewed.

In [6]:

Copied!





# First, get embeddings of products Alice viewed
res = db.query("MATCH (u:User {name: 'Alice'})-[:VIEWED]->(p:Product) RETURN p.embedding as emb, p.name as name")

for row in res:
    emb = row['emb']
    viewed_name = row['name']
    print(f"Finding products similar to {viewed_name}...")
    
    # Find similar products
    query_sim = "MATCH (p:Product) WHERE vector_similarity(p.embedding, $emb) > 0.8 RETURN p.name as name"
    sim_products = db.query(query_sim, {"emb": emb})
    names = [r['name'] for r in sim_products]
    print(f"  -> Found: {names}")
# First, get embeddings of products Alice viewed
res = db.query("MATCH (u:User {name: 'Alice'})-[:VIEWED]->(p:Product) RETURN p.embedding as emb, p.name as name")

for row in res:
    emb = row['emb']
    viewed_name = row['name']
    print(f"Finding products similar to {viewed_name}...")
    
    # Find similar products
    query_sim = "MATCH (p:Product) WHERE vector_similarity(p.embedding, $emb) > 0.8 RETURN p.name as name"
    sim_products = db.query(query_sim, {"emb": emb})
    names = [r['name'] for r in sim_products]
    print(f"  -> Found: {names}")

Finding products similar to Socks...
  -> Found: ['Socks', 'Running Shoes']
Finding products similar to Shampoo...
  -> Found: ['Shampoo']

DEBUG 2: DataFusion execution failed (falling back to execute_subplan): Schema error: No field named "p.name". Valid fields are "p._vid", p, "p._score".
DEBUG 2: DataFusion execution failed (falling back to execute_subplan): Schema error: No field named "p.name". Valid fields are "p._vid", p, "p._score".