Recommendation Engine¶
Collaborative filtering via graph traversal combined with semantic vector search for book recommendations.
In [1]:
Copied!
import os
import shutil
import tempfile
import uni_db
import os
import shutil
import tempfile
import uni_db
In [2]:
Copied!
db_path = os.path.join(tempfile.gettempdir(), "recommendation_db")
if os.path.exists(db_path):
shutil.rmtree(db_path)
db = uni_db.Database(db_path)
print(f"Opened database at {db_path}")
db_path = os.path.join(tempfile.gettempdir(), "recommendation_db")
if os.path.exists(db_path):
shutil.rmtree(db_path)
db = uni_db.Database(db_path)
print(f"Opened database at {db_path}")
Opened database at /tmp/recommendation_db
1. Schema¶
Books with 4D semantic embeddings; users linked via PURCHASED edges.
In [3]:
Copied!
(
db.schema()
.label("User")
.property("name", "string")
.done()
.label("Book")
.property("name", "string")
.property("genre", "string")
.vector("embedding", 4)
.done()
.edge_type("PURCHASED", ["User"], ["Book"])
.done()
.apply()
)
print("Schema created")
(
db.schema()
.label("User")
.property("name", "string")
.done()
.label("Book")
.property("name", "string")
.property("genre", "string")
.vector("embedding", 4)
.done()
.edge_type("PURCHASED", ["User"], ["Book"])
.done()
.apply()
)
print("Schema created")
Schema created
2. Ingest Data¶
In [4]:
Copied!
# 4D embeddings: [tech, fiction, history, science]
book_vids = db.bulk_insert_vertices('Book', [
{'name': 'Clean Code', 'genre': 'tech', 'embedding': [0.95, 0.05, 0.0, 0.0 ]},
{'name': 'The Pragmatic Programmer', 'genre': 'tech', 'embedding': [0.90, 0.10, 0.0, 0.0 ]},
{'name': 'Designing Data-Intensive Apps', 'genre': 'tech', 'embedding': [0.85, 0.0, 0.0, 0.15]},
{'name': 'Dune', 'genre': 'fiction', 'embedding': [0.0, 0.95, 0.0, 0.05]},
{'name': 'Foundation', 'genre': 'fiction', 'embedding': [0.0, 0.85, 0.0, 0.15]},
{'name': 'Sapiens', 'genre': 'history', 'embedding': [0.0, 0.05, 0.7, 0.25]},
])
clean_code, pragmatic, ddia, dune, foundation, sapiens = book_vids
user_vids = db.bulk_insert_vertices('User', [
{'name': 'Alice'},
{'name': 'Bob'},
{'name': 'Carol'},
{'name': 'Dave'},
])
alice, bob, carol, dave = user_vids
# Purchase history
db.bulk_insert_edges('PURCHASED', [
(alice, clean_code, {}),
(alice, pragmatic, {}),
(bob, clean_code, {}),
(bob, dune, {}),
(carol, pragmatic, {}),
(carol, foundation, {}),
(dave, dune, {}),
(dave, foundation, {}),
(dave, sapiens, {}),
])
db.flush()
# Create vector index AFTER flush
db.create_vector_index("Book", "embedding", "l2")
print("Data ingested and vector index created")
# 4D embeddings: [tech, fiction, history, science]
book_vids = db.bulk_insert_vertices('Book', [
{'name': 'Clean Code', 'genre': 'tech', 'embedding': [0.95, 0.05, 0.0, 0.0 ]},
{'name': 'The Pragmatic Programmer', 'genre': 'tech', 'embedding': [0.90, 0.10, 0.0, 0.0 ]},
{'name': 'Designing Data-Intensive Apps', 'genre': 'tech', 'embedding': [0.85, 0.0, 0.0, 0.15]},
{'name': 'Dune', 'genre': 'fiction', 'embedding': [0.0, 0.95, 0.0, 0.05]},
{'name': 'Foundation', 'genre': 'fiction', 'embedding': [0.0, 0.85, 0.0, 0.15]},
{'name': 'Sapiens', 'genre': 'history', 'embedding': [0.0, 0.05, 0.7, 0.25]},
])
clean_code, pragmatic, ddia, dune, foundation, sapiens = book_vids
user_vids = db.bulk_insert_vertices('User', [
{'name': 'Alice'},
{'name': 'Bob'},
{'name': 'Carol'},
{'name': 'Dave'},
])
alice, bob, carol, dave = user_vids
# Purchase history
db.bulk_insert_edges('PURCHASED', [
(alice, clean_code, {}),
(alice, pragmatic, {}),
(bob, clean_code, {}),
(bob, dune, {}),
(carol, pragmatic, {}),
(carol, foundation, {}),
(dave, dune, {}),
(dave, foundation, {}),
(dave, sapiens, {}),
])
db.flush()
# Create vector index AFTER flush
db.create_vector_index("Book", "embedding", "l2")
print("Data ingested and vector index created")
Data ingested and vector index created
3. Collaborative Filtering¶
Books that users-who-bought-Alice's-books also bought (that Alice hasn't read).
In [5]:
Copied!
query_collab = """
MATCH (alice:User {name: 'Alice'})-[:PURCHASED]->(b:Book)<-[:PURCHASED]-(other:User)
WHERE other._vid <> alice._vid
MATCH (other)-[:PURCHASED]->(rec:Book)
WHERE NOT (alice)-[:PURCHASED]->(rec)
RETURN rec.name AS recommendation, COUNT(DISTINCT other) AS buyers
ORDER BY buyers DESC
"""
results = db.query(query_collab)
print('Collaborative recommendations for Alice:')
for r in results:
print(f" {r['recommendation']} (bought by {r['buyers']} similar user(s))")
query_collab = """
MATCH (alice:User {name: 'Alice'})-[:PURCHASED]->(b:Book)<-[:PURCHASED]-(other:User)
WHERE other._vid <> alice._vid
MATCH (other)-[:PURCHASED]->(rec:Book)
WHERE NOT (alice)-[:PURCHASED]->(rec)
RETURN rec.name AS recommendation, COUNT(DISTINCT other) AS buyers
ORDER BY buyers DESC
"""
results = db.query(query_collab)
print('Collaborative recommendations for Alice:')
for r in results:
print(f" {r['recommendation']} (bought by {r['buyers']} similar user(s))")
Collaborative recommendations for Alice: Dune (bought by 1 similar user(s)) Foundation (bought by 1 similar user(s))
4. Semantic Vector Search¶
Find the 3 books most similar to a 'tech' query vector.
In [6]:
Copied!
tech_query = [0.95, 0.05, 0.0, 0.0]
results = db.query("""
CALL uni.vector.query('Book', 'embedding', $vec, 3)
YIELD node, distance
RETURN node.name AS title, node.genre AS genre, distance
ORDER BY distance
""", {'vec': tech_query})
print('Top 3 books semantically similar to tech query:')
for r in results:
print(f" [{r['distance']:.4f}] {r['title']} ({r['genre']})")
# All 3 results should be tech books
genres = [r['genre'] for r in results]
assert all(g == 'tech' for g in genres), f'Expected all tech, got {genres}'
tech_query = [0.95, 0.05, 0.0, 0.0]
results = db.query("""
CALL uni.vector.query('Book', 'embedding', $vec, 3)
YIELD node, distance
RETURN node.name AS title, node.genre AS genre, distance
ORDER BY distance
""", {'vec': tech_query})
print('Top 3 books semantically similar to tech query:')
for r in results:
print(f" [{r['distance']:.4f}] {r['title']} ({r['genre']})")
# All 3 results should be tech books
genres = [r['genre'] for r in results]
assert all(g == 'tech' for g in genres), f'Expected all tech, got {genres}'
Top 3 books semantically similar to tech query: [0.0000] Clean Code (tech) [0.0050] The Pragmatic Programmer (tech) [0.0350] Designing Data-Intensive Apps (tech)
5. Hybrid: Vector + Graph¶
Vector search for fiction books, then find which users bought them.
In [7]:
Copied!
fiction_query = [0.0, 0.95, 0.0, 0.05]
results = db.query("""
CALL uni.vector.query('Book', 'embedding', $vec, 3)
YIELD node, distance
MATCH (u:User)-[:PURCHASED]->(node)
RETURN node.name AS book, u.name AS buyer, distance
ORDER BY distance, buyer
""", {'vec': fiction_query})
print('Fiction book buyers (via vector + graph):')
for r in results:
print(f" {r['buyer']} bought '{r['book']}' (distance={r['distance']:.4f})")
fiction_query = [0.0, 0.95, 0.0, 0.05]
results = db.query("""
CALL uni.vector.query('Book', 'embedding', $vec, 3)
YIELD node, distance
MATCH (u:User)-[:PURCHASED]->(node)
RETURN node.name AS book, u.name AS buyer, distance
ORDER BY distance, buyer
""", {'vec': fiction_query})
print('Fiction book buyers (via vector + graph):')
for r in results:
print(f" {r['buyer']} bought '{r['book']}' (distance={r['distance']:.4f})")
Fiction book buyers (via vector + graph): Bob bought 'Dune' (distance=0.0000) Dave bought 'Dune' (distance=0.0000) Carol bought 'Foundation' (distance=0.0200) Dave bought 'Foundation' (distance=0.0200) Dave bought 'Sapiens' (distance=1.3400)
6. Discovery: Popular Books Outside Alice's Profile¶
Books Alice hasn't bought, ranked by how many users bought them.
In [8]:
Copied!
query_discovery = """
MATCH (alice:User {name: 'Alice'})
MATCH (u:User)-[:PURCHASED]->(b:Book)
WHERE NOT (alice)-[:PURCHASED]->(b) AND u._vid <> alice._vid
RETURN b.name AS book, COUNT(DISTINCT u) AS buyers
ORDER BY buyers DESC
"""
results = db.query(query_discovery)
print('Popular books Alice has not read:')
for r in results:
print(f" {r['book']}: {r['buyers']} buyer(s)")
query_discovery = """
MATCH (alice:User {name: 'Alice'})
MATCH (u:User)-[:PURCHASED]->(b:Book)
WHERE NOT (alice)-[:PURCHASED]->(b) AND u._vid <> alice._vid
RETURN b.name AS book, COUNT(DISTINCT u) AS buyers
ORDER BY buyers DESC
"""
results = db.query(query_discovery)
print('Popular books Alice has not read:')
for r in results:
print(f" {r['book']}: {r['buyers']} buyer(s)")
Popular books Alice has not read: Dune: 2 buyer(s) Foundation: 2 buyer(s) Sapiens: 1 buyer(s)