Skip to content

Recommendation Engine

Collaborative filtering via graph traversal combined with semantic vector search for book recommendations.

import os
import shutil
import tempfile

import uni_db
db_path = os.path.join(tempfile.gettempdir(), "recommendation_db")
if os.path.exists(db_path):
    shutil.rmtree(db_path)
db = uni_db.Uni.open(db_path)
session = db.session()
print(f"Opened database at {db_path}")
Opened database at /tmp/recommendation_db

1. Schema

Books with 4D semantic embeddings; users linked via PURCHASED edges.

(
    db.schema()
    .label("User")
    .property("name", "string")
    .done()
    .label("Book")
    .property("name", "string")
    .property("genre", "string")
    .vector("embedding", 4)
    .done()
    .edge_type("PURCHASED", ["User"], ["Book"])
    .done()
    .apply()
)

print("Schema created")
Schema created

2. Ingest Data

# 4D embeddings: [tech, fiction, history, science]
tx = session.tx()
with tx.bulk_writer().build() as bw:
    book_vids = bw.insert_vertices(
        "Book",
        [
            {
                "name": "Clean Code",
                "genre": "tech",
                "embedding": [0.95, 0.05, 0.0, 0.0],
            },
            {
                "name": "The Pragmatic Programmer",
                "genre": "tech",
                "embedding": [0.90, 0.10, 0.0, 0.0],
            },
            {
                "name": "Designing Data-Intensive Apps",
                "genre": "tech",
                "embedding": [0.85, 0.0, 0.0, 0.15],
            },
            {"name": "Dune", "genre": "fiction", "embedding": [0.0, 0.95, 0.0, 0.05]},
            {
                "name": "Foundation",
                "genre": "fiction",
                "embedding": [0.0, 0.85, 0.0, 0.15],
            },
            {
                "name": "Sapiens",
                "genre": "history",
                "embedding": [0.0, 0.05, 0.7, 0.25],
            },
        ],
    )
    clean_code, pragmatic, ddia, dune, foundation, sapiens = book_vids

    user_vids = bw.insert_vertices(
        "User",
        [
            {"name": "Alice"},
            {"name": "Bob"},
            {"name": "Carol"},
            {"name": "Dave"},
        ],
    )
    alice, bob, carol, dave = user_vids

    # Purchase history
    bw.insert_edges(
        "PURCHASED",
        [
            (alice, clean_code, {}),
            (alice, pragmatic, {}),
            (bob, clean_code, {}),
            (bob, dune, {}),
            (carol, pragmatic, {}),
            (carol, foundation, {}),
            (dave, dune, {}),
            (dave, foundation, {}),
            (dave, sapiens, {}),
        ],
    )

    bw.commit()
tx.commit()

print("Data ingested")
Data ingested

3. Collaborative Filtering

Books that users-who-bought-Alice's-books also bought (that Alice hasn't read).

query_collab = """
    MATCH (alice:User {name: 'Alice'})-[:PURCHASED]->(b:Book)<-[:PURCHASED]-(other:User)
    WHERE other._vid <> alice._vid
    MATCH (other)-[:PURCHASED]->(rec:Book)
    WHERE NOT (alice)-[:PURCHASED]->(rec)
    RETURN rec.name AS recommendation, COUNT(DISTINCT other) AS buyers
    ORDER BY buyers DESC
"""
results = session.query(query_collab)
print("Collaborative recommendations for Alice:")
for r in results:
    print(f"  {r['recommendation']} (bought by {r['buyers']} similar user(s))")
Collaborative recommendations for Alice:
  Dune (bought by 1 similar user(s))
  Foundation (bought by 1 similar user(s))

Find the 3 books most similar to a 'tech' query vector.

tech_query = [0.95, 0.05, 0.0, 0.0]

results = session.query(
    """
    CALL uni.vector.query('Book', 'embedding', $vec, 3)
    YIELD node, distance
    RETURN node.name AS title, node.genre AS genre, distance
    ORDER BY distance
""",
    {"vec": tech_query},
)

print("Top 3 books semantically similar to tech query:")
for r in results:
    print(f"  [{r['distance']:.4f}] {r['title']} ({r['genre']})")

# All 3 results should be tech books
genres = [r["genre"] for r in results]
assert all(g == "tech" for g in genres), f"Expected all tech, got {genres}"
Top 3 books semantically similar to tech query:
  [0.0000] Clean Code (tech)
  [0.0050] The Pragmatic Programmer (tech)
  [0.0350] Designing Data-Intensive Apps (tech)

5. Hybrid: Vector + Graph

Vector search for fiction books, then find which users bought them.

fiction_query = [0.0, 0.95, 0.0, 0.05]

results = session.query(
    """
    CALL uni.vector.query('Book', 'embedding', $vec, 3)
    YIELD node, distance
    MATCH (u:User)-[:PURCHASED]->(node)
    RETURN node.name AS book, u.name AS buyer, distance
    ORDER BY distance, buyer
""",
    {"vec": fiction_query},
)

print("Fiction book buyers (via vector + graph):")
for r in results:
    print(f"  {r['buyer']} bought '{r['book']}' (distance={r['distance']:.4f})")
Fiction book buyers (via vector + graph):
  Bob bought 'Dune' (distance=0.0000)
  Dave bought 'Dune' (distance=0.0000)
  Carol bought 'Foundation' (distance=0.0200)
  Dave bought 'Foundation' (distance=0.0200)
  Dave bought 'Sapiens' (distance=1.3400)

Books Alice hasn't bought, ranked by how many users bought them.

query_discovery = """
    MATCH (alice:User {name: 'Alice'})
    MATCH (u:User)-[:PURCHASED]->(b:Book)
    WHERE NOT (alice)-[:PURCHASED]->(b) AND u._vid <> alice._vid
    RETURN b.name AS book, COUNT(DISTINCT u) AS buyers
    ORDER BY buyers DESC
"""
results = session.query(query_discovery)
print("Popular books Alice has not read:")
for r in results:
    print(f"  {r['book']}: {r['buyers']} buyer(s)")
Popular books Alice has not read:
  Dune: 2 buyer(s)
  Foundation: 2 buyer(s)
  Sapiens: 1 buyer(s)