AI FileVectorStore
Vector storage based on a local JSON file.
Allows storing, searching and managing documents with their vector embeddings in collections persisted in a single JSON file. Supports cosine similarity search, metadata filtering and batch insertion with file-level lock concurrency control.
Suitable for development, prototyping or applications with small document volumes. The storage file is created automatically on first initialization, defaulting to storage/vector_store.json.
const vector = _ai.vector('default')
const client = _ai.client()
const chunker = _ai.contextRetrievalChunker()
// Create the collection if it does not yet exist
if (!vector.collectionExists('netuno')) {
vector.createCollection('netuno', 768)
}
// Recursively collect all Markdown files
const files = collectFiles(_app.folder(_app.pathStorage() + '/netuno_docs'))
for (const path of files) {
const file = _app.file(path)
if (file.exists()) {
const content = file.input().readAllAndClose()
const chunks = chunker.markdown(content, 1500 * 3, 400)
for (const chunk of chunks) {
const options = _val.init()
.set('encoding_format', 'float')
.set('dimensions', 768)
const embeddingResponse = client.embeddings(
'embeddinggemma:latest',
chunk.get('text'),
options
)
const embedding = embeddingResponse.get('data').get(0).get('embedding')
vector.add('netuno', embedding, chunk.get('text'), null)
}
}
}
function collectFiles(folder) {
const list = _val.list()
folder.list().forEach(item => {
if (item.isDirectory()) {
collectFiles(item).forEach(f => list.add(f))
} else {
const path = item.fullPath()
if (path.endsWith('.md') || path.endsWith('.mdx')) {
list.add(path)
}
}
})
return list
}
add
add(collection: string, id: string, embedding: Values, text: string, metadata: Values) : void
Description
Inserts or updates a document in a collection with an explicit ID. If the collection does not yet exist, it is created automatically with the dimensions of the provided embedding. If a document with the same ID already exists, the content, embedding and metadata are replaced.
How To Use
const options = _val.init()
.set('encoding_format', 'float')
.set('dimensions', 768)
const embeddingResponse = client.embeddings('embeddinggemma:latest', 'Document text.', options)
const embedding = embeddingResponse.get('data').get(0).get('embedding')
vector.add('netuno', 'doc-001', embedding, 'Document text.', _val.map().set('source', 'web'))
Attributes
| NAME | TYPE | DESCRIPTION |
|---|---|---|
| collection | string | Name of the collection where the document will be inserted. |
| id | string | Unique identifier of the document. If null or empty, a UUID is auto-generated. |
| embedding | Values | List of numeric values representing the document vector. |
| text | string | Textual content of the document to store. |
| metadata | Values | Object with arbitrary metadata associated with the document, usable for filtering in searches. Can be null. |
Return
( void )
add(collection: string, embedding: Values, text: string, metadata: Values) : void
Description
Inserts or updates a document in a collection with an auto-generated ID. If the collection does not yet exist, it is created automatically with the dimensions of the provided embedding. If a document with the same ID already exists, the content, embedding and metadata are replaced.
How To Use
const options = _val.init()
.set('encoding_format', 'float')
.set('dimensions', 768)
const embeddingResponse = client.embeddings('embeddinggemma:latest', 'Document text.', options)
const embedding = embeddingResponse.get('data').get(0).get('embedding')
vector.add('netuno', embedding, 'Document text.', _val.map().set('source', 'web'))
Attributes
| NAME | TYPE | DESCRIPTION |
|---|---|---|
| collection | string | Name of the collection where the document will be inserted. |
| embedding | Values | List of numeric values representing the document vector. |
| text | string | Textual content of the document to store. |
| metadata | Values | Object with arbitrary metadata associated with the document, usable for filtering in searches. Can be null. |
Return
( void )
addBatch
addBatch(collection: string, documents: Values) : void
Description
Inserts or updates multiple documents in a collection in a single atomic transaction. If any document fails, the entire operation is rolled back. Each item in the list must be an object with the fields text (required), embedding (required), id (optional, auto-generated if absent) and metadata (optional).
How To Use
const options = _val.init().set('encoding_format', 'float').set('dimensions', 768)
const documents = _val.list()
const texts = _val.list().add('First document.').add('Second document.')
const embeddingResponse = client.embeddings('embeddinggemma:latest', texts, options)
const data = embeddingResponse.get('data')
for (let i = 0; i < data.size(); i++) {
const item = data.get(i)
documents.add(
_val.map()
.set('text', texts.get(i))
.set('embedding', item.get('embedding'))
.set('metadata', _val.map().set('index', i))
)
}
vector.addBatch('netuno', documents)
Attributes
| NAME | TYPE | DESCRIPTION |
|---|---|---|
| collection | string | Name of the collection where the documents will be inserted. |
| documents | Values | List of documents. Each item must contain: text (document text), embedding (numeric vector), id (optional) and metadata (optional). |
Return
( void )
collectionExists
collectionExists(collection: string) : boolean
Description
Checks whether a collection exists in the storage file.
How To Use
if (!vector.collectionExists('netuno')) {
vector.createCollection('netuno', 768)
}
Attributes
| NAME | TYPE | DESCRIPTION |
|---|---|---|
| collection | string | Name of the collection to check. |
Return
( boolean )
True if the collection exists, false otherwise.
count
count(collection: string) : int
Description
Returns the total number of documents in a collection in the storage file. Returns 0 if the collection does not exist.
How To Use
const total = vector.count('netuno')
_log.info('Total indexed documents: ' + total)
Attributes
| NAME | TYPE | DESCRIPTION |
|---|---|---|
| collection | string | Name of the collection to count. |
Return
( int )
Total number of documents in the collection. Returns 0 if the collection does not exist or is empty.
createCollection
createCollection(collection: string, dimensions: int) : boolean
Description
Explicitly creates a collection with a fixed number of dimensions in the storage file. If the collection already exists, the operation is silently ignored and returns false. Normally there is no need to call this method directly, as the collection is created automatically on the first call to add or addBatch.
How To Use
if (!vector.collectionExists('netuno')) {
const created = vector.createCollection('netuno', 768)
_log.info('Collection created: ' + created)
}
Attributes
| NAME | TYPE | DESCRIPTION |
|---|---|---|
| collection | string | Name of the collection to create. |
| dimensions | int | Number of dimensions of the vectors in this collection. Must be greater than zero and consistent with the embeddings model used. |
Return
( boolean )
True if the collection was created, false if it already existed.
delete
delete(collection: string, id: string) : void
Description
Removes a specific document from a collection by its ID. If the document or collection does not exist, the operation is silently ignored.
How To Use
vector.delete('netuno', 'doc-001')
Attributes
| NAME | TYPE | DESCRIPTION |
|---|---|---|
| collection | string | Name of the collection containing the document. |
| id | string | Unique identifier of the document to remove. |
Return
( void )
deleteCollection
deleteCollection(collection: string) : void
Description
Removes an entire collection and all its documents from the storage file. If the collection does not exist, the operation is silently ignored.
How To Use
vector.deleteCollection('netuno')
Attributes
| NAME | TYPE | DESCRIPTION |
|---|---|---|
| collection | string | Name of the collection to remove. |
Return
( void )
getProvider
getProvider() : string
Return
( string )
init
init() : void
Return
( void )
isInitialized
isInitialized() : boolean
Return
( boolean )
listCollections
listCollections() : Values
Description
Lists all existing collections in the storage file, including the number of dimensions and the total number of documents in each one.
How To Use
const collections = vector.listCollections()
for (const c of collections) {
_log.info(c.get('name') + ' | dims: ' + c.get('dimensions') + ' | docs: ' + c.get('count'))
}
Return
( Values )
List of collections, each with the fields: name (collection name), dimensions (number of dimensions) and count (total number of documents).
provider
provider(provider: string) : org.netuno.tritao.ai.vector.VectorStore
Attributes
| NAME | TYPE | DESCRIPTION |
|---|---|---|
| provider | string |
Return
( org.netuno.tritao.ai.vector.VectorStore )
search
search(collection: string, embedding: Values, topK: int) : Values
Description
Searches for the most similar documents to the provided embedding in a collection, using cosine distance. Returns the topK closest documents, ordered by descending similarity score.
How To Use
const results = store.search('articles', embedding, 5)
for (const r of results) {
_log.info('Score: ' + r.get('score') + ' | ' + r.get('text'))
}
Attributes
| NAME | TYPE | DESCRIPTION |
|---|---|---|
| collection | string | Name of the collection where the search will be performed. |
| embedding | Values | Query vector to compare against stored documents. |
| topK | int | Maximum number of results to return. |
Return
( Values )
List of matching documents, each with the fields: id, text, embedding, metadata, score (0.0–1.0) and timestamp.
search(collection: string, embedding: Values, topK: int, filter: Values) : Values
Description
Searches for the most similar documents to the provided embedding in a collection, with additional metadata filtering. The filter is applied as exact value equality per key. Returns the topK closest documents that satisfy the filter, ordered by descending similarity score.
How To Use
const options = _val.init().set('encoding_format', 'float').set('dimensions', 768)
const queryEmbedding = client.embeddings('embeddinggemma:latest', 'What is Netuno?', options)
.get('data').get(0).get('embedding')
const filter = _val.map().set('source', 'pdf')
const results = vector.search('netuno', queryEmbedding, 5, filter)
for (const r of results) {
_log.info('Score: ' + r.get('score') + ' | ' + r.get('text'))
}
Attributes
| NAME | TYPE | DESCRIPTION |
|---|---|---|
| collection | string | Name of the collection where the search will be performed. |
| embedding | Values | Query vector to compare against stored documents. |
| topK | int | Maximum number of results to return. |
| filter | Values | Metadata object to filter results. Only documents whose metadata contains all equal key-value pairs are returned. Can be null to disable filtering. |
Return
( Values )
List of matching documents, each with the fields: id, text, embedding, metadata, score (0.0–1.0) and timestamp.