Skip to main content

SQLStore

The SQLStrStore and SQLDocStore implement remote data access and persistence to store strings or LangChain documents in your SQL instance.

from langchain_community.storage import SQLStrStore

# simple example using an SQLStrStore to store strings
# same as you would use in "InMemoryStore" but using SQL persistence
CONNECTION_STRING = "postgresql+psycopg2://user:pass@localhost:5432/db"
COLLECTION_NAME = "test_collection"

store = SQLStrStore(
collection_name=COLLECTION_NAME,
connection_string=CONNECTION_STRING,
)
store.mset([("key1", "value1"), ("key2", "value2")])
print(store.mget(["key1", "key2"]))
# ['value1', 'value2']
store.mdelete(["key1"])
print(list(store.yield_keys()))
# ['key2']
print(list(store.yield_keys(prefix="k")))
# ['key2']
# delete the COLLECTION_NAME collection
['value1', 'value2']
['key2']
['key2']

Integration with ParentRetriever and PGVector

When using PGVector, you already have a SQL instance running. Here is a convenient way of using this instance to store documents associated to vectors.

Prepare the PGVector vectorestore with something like this:

from langchain_community.vectorstores import PGVector
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()
vector_db = PGVector.from_existing_index(
embedding=embeddings,
collection_name=COLLECTION_NAME,
connection_string=CONNECTION_STRING,
)

Then create the parent retiever using SQLDocStore to persist the documents

from langchain.document_loaders import TextLoader
from langchain.retrievers import ParentDocumentRetriever
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.storage import SQLDocStore

CONNECTION_STRING = "postgresql+psycopg2://user:pass@localhost:5432/db"
COLLECTION_NAME = "state_of_the_union_test"
docstore = SQLDocStore(
collection_name=COLLECTION_NAME,
connection_string=CONNECTION_STRING,
)

loader = TextLoader("./state_of_the_union.txt")
documents = loader.load()

parent_splitter = RecursiveCharacterTextSplitter(chunk_size=400)
child_splitter = RecursiveCharacterTextSplitter(chunk_size=50)

retriever = ParentDocumentRetriever(
vectorstore=vector_db,
docstore=docstore,
child_splitter=child_splitter,
parent_splitter=parent_splitter,
)
retriever.add_documents(documents)

Delete a collection

from langchain_community.storage import SQLStrStore

# delete the COLLECTION_NAME collection
CONNECTION_STRING = "postgresql+psycopg2://user:pass@localhost:5432/db"
COLLECTION_NAME = "test_collection"
store = SQLStrStore(
collection_name=COLLECTION_NAME,
connection_string=CONNECTION_STRING,
)
store.delete_collection()