SQLStore
The SQLStrStore
and SQLDocStore
implement remote data access and
persistence to store strings or LangChain documents in your SQL
instance.
from langchain_community.storage import SQLStrStore
# simple example using an SQLStrStore to store strings
# same as you would use in "InMemoryStore" but using SQL persistence
CONNECTION_STRING = "postgresql+psycopg2://user:pass@localhost:5432/db"
COLLECTION_NAME = "test_collection"
store = SQLStrStore(
collection_name=COLLECTION_NAME,
connection_string=CONNECTION_STRING,
)
store.mset([("key1", "value1"), ("key2", "value2")])
print(store.mget(["key1", "key2"]))
# ['value1', 'value2']
store.mdelete(["key1"])
print(list(store.yield_keys()))
# ['key2']
print(list(store.yield_keys(prefix="k")))
# ['key2']
# delete the COLLECTION_NAME collection
['value1', 'value2']
['key2']
['key2']
Integration with ParentRetriever and PGVector
When using PGVector, you already have a SQL instance running. Here is a convenient way of using this instance to store documents associated to vectors.
Prepare the PGVector vectorestore with something like this:
from langchain_community.vectorstores import PGVector
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()
vector_db = PGVector.from_existing_index(
embedding=embeddings,
collection_name=COLLECTION_NAME,
connection_string=CONNECTION_STRING,
)
Then create the parent retiever using SQLDocStore
to persist the
documents
from langchain.document_loaders import TextLoader
from langchain.retrievers import ParentDocumentRetriever
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.storage import SQLDocStore
CONNECTION_STRING = "postgresql+psycopg2://user:pass@localhost:5432/db"
COLLECTION_NAME = "state_of_the_union_test"
docstore = SQLDocStore(
collection_name=COLLECTION_NAME,
connection_string=CONNECTION_STRING,
)
loader = TextLoader("./state_of_the_union.txt")
documents = loader.load()
parent_splitter = RecursiveCharacterTextSplitter(chunk_size=400)
child_splitter = RecursiveCharacterTextSplitter(chunk_size=50)
retriever = ParentDocumentRetriever(
vectorstore=vector_db,
docstore=docstore,
child_splitter=child_splitter,
parent_splitter=parent_splitter,
)
retriever.add_documents(documents)
Delete a collection
from langchain_community.storage import SQLStrStore
# delete the COLLECTION_NAME collection
CONNECTION_STRING = "postgresql+psycopg2://user:pass@localhost:5432/db"
COLLECTION_NAME = "test_collection"
store = SQLStrStore(
collection_name=COLLECTION_NAME,
connection_string=CONNECTION_STRING,
)
store.delete_collection()