Retrieval-Augmented Generation (RAG) with Uni (Rust)¶
Combining Vector Search with Knowledge Graph traversal for better context.
In [ ]:
Copied!
:dep uni-db = { path = "../../../crates/uni" }
:dep tokio = { version = "1", features = ["full"] }
:dep serde_json = "1"
:dep uni-db = { path = "../../../crates/uni" }
:dep tokio = { version = "1", features = ["full"] }
:dep serde_json = "1"
In [ ]:
Copied!
use uni::{Uni, DataType, IndexType, ScalarType, VectorMetric, VectorAlgo, VectorIndexCfg};
use std::collections::HashMap;
use serde_json::json;
// Helper macro to run async code in evcxr
macro_rules! run {
($e:expr) => {
tokio::runtime::Runtime::new().unwrap().block_on($e)
};
}
use uni::{Uni, DataType, IndexType, ScalarType, VectorMetric, VectorAlgo, VectorIndexCfg};
use std::collections::HashMap;
use serde_json::json;
// Helper macro to run async code in evcxr
macro_rules! run {
($e:expr) => {
tokio::runtime::Runtime::new().unwrap().block_on($e)
};
}
In [ ]:
Copied!
let db_path = "./rag_db";
// Clean up any existing database
if std::path::Path::new(db_path).exists() {
std::fs::remove_dir_all(db_path).unwrap();
}
let db = run!(Uni::open(db_path).build()).unwrap();
println!("Opened database at {}", db_path);
let db_path = "./rag_db";
// Clean up any existing database
if std::path::Path::new(db_path).exists() {
std::fs::remove_dir_all(db_path).unwrap();
}
let db = run!(Uni::open(db_path).build()).unwrap();
println!("Opened database at {}", db_path);
1. Schema¶
Chunks of text with embeddings, linked to named Entities.
In [ ]:
Copied!
run!(async {
db.schema()
.label("Chunk")
.property("text", DataType::String)
.property("embedding", DataType::Vector { dimensions: 4 })
.index("embedding", IndexType::Vector(VectorIndexCfg {
algorithm: VectorAlgo::Flat,
metric: VectorMetric::Cosine,
}))
.label("Entity")
.property("name", DataType::String)
.property("type", DataType::String)
.edge_type("MENTIONS", &["Chunk"], &["Entity"])
.apply()
.await
}).unwrap();
println!("RAG schema created");
run!(async {
db.schema()
.label("Chunk")
.property("text", DataType::String)
.property("embedding", DataType::Vector { dimensions: 4 })
.index("embedding", IndexType::Vector(VectorIndexCfg {
algorithm: VectorAlgo::Flat,
metric: VectorMetric::Cosine,
}))
.label("Entity")
.property("name", DataType::String)
.property("type", DataType::String)
.edge_type("MENTIONS", &["Chunk"], &["Entity"])
.apply()
.await
}).unwrap();
println!("RAG schema created");
2. Ingest Data¶
In [ ]:
Copied!
// Chunk embeddings
let c1_vec = vec![1.0, 0.0, 0.0, 0.0];
let c2_vec = vec![0.9, 0.1, 0.0, 0.0];
let chunks = vec![
HashMap::from([
("text".to_string(), json!("Function verify() checks signatures.")),
("embedding".to_string(), json!(c1_vec)),
]),
HashMap::from([
("text".to_string(), json!("Other text about verify.")),
("embedding".to_string(), json!(c2_vec)),
]),
];
let chunk_vids = run!(db.bulk_insert_vertices("Chunk", chunks)).unwrap();
let (c1, c2) = (chunk_vids[0], chunk_vids[1]);
// Entities
let entities = vec![
HashMap::from([
("name".to_string(), json!("verify")),
("type".to_string(), json!("function")),
]),
];
let entity_vids = run!(db.bulk_insert_vertices("Entity", entities)).unwrap();
let e1 = entity_vids[0];
// Link chunks to entities
run!(db.bulk_insert_edges("MENTIONS", vec![
(c1, e1, HashMap::new()),
(c2, e1, HashMap::new()),
])).unwrap();
run!(db.flush()).unwrap();
println!("RAG data ingested");
// Chunk embeddings
let c1_vec = vec![1.0, 0.0, 0.0, 0.0];
let c2_vec = vec![0.9, 0.1, 0.0, 0.0];
let chunks = vec![
HashMap::from([
("text".to_string(), json!("Function verify() checks signatures.")),
("embedding".to_string(), json!(c1_vec)),
]),
HashMap::from([
("text".to_string(), json!("Other text about verify.")),
("embedding".to_string(), json!(c2_vec)),
]),
];
let chunk_vids = run!(db.bulk_insert_vertices("Chunk", chunks)).unwrap();
let (c1, c2) = (chunk_vids[0], chunk_vids[1]);
// Entities
let entities = vec![
HashMap::from([
("name".to_string(), json!("verify")),
("type".to_string(), json!("function")),
]),
];
let entity_vids = run!(db.bulk_insert_vertices("Entity", entities)).unwrap();
let e1 = entity_vids[0];
// Link chunks to entities
run!(db.bulk_insert_edges("MENTIONS", vec![
(c1, e1, HashMap::new()),
(c2, e1, HashMap::new()),
])).unwrap();
run!(db.flush()).unwrap();
println!("RAG data ingested");
3. Hybrid Retrieval¶
Find chunks related to a specific chunk via shared entities.
In [ ]:
Copied!
// Find related chunks through shared entity mentions
let query = format!(r#"
MATCH (c:Chunk)-[:MENTIONS]->(e:Entity)<-[:MENTIONS]-(related:Chunk)
WHERE c._vid = {} AND related._vid <> c._vid
RETURN related.text as text
"#, c1.as_u64()); // Get the raw vid value
let results = run!(db.query(&query)).unwrap();
println!("Related chunks:");
for row in results.rows {
println!(" {:?}", row);
}
// Find related chunks through shared entity mentions
let query = format!(r#"
MATCH (c:Chunk)-[:MENTIONS]->(e:Entity)<-[:MENTIONS]-(related:Chunk)
WHERE c._vid = {} AND related._vid <> c._vid
RETURN related.text as text
"#, c1.as_u64()); // Get the raw vid value
let results = run!(db.query(&query)).unwrap();
println!("Related chunks:");
for row in results.rows {
println!(" {:?}", row);
}