Skip to content

Methods

API (Application)

Base API template. The API is an extended txtai application, adding the ability to cluster API instances together.

Downstream applications can extend this base template to add/modify functionality.

Source code in txtai/api/base.py
class API(Application):
    """
    Base API template. The API is an extended txtai application, adding the ability to cluster API instances together.

    Downstream applications can extend this base template to add/modify functionality.
    """

    def __init__(self, config, loaddata=True):
        super().__init__(config, loaddata)

        # Embeddings cluster
        self.cluster = None
        if self.config.get("cluster"):
            self.cluster = Cluster(self.config["cluster"])

    # pylint: disable=W0221
    def search(self, query, request=None, limit=None):
        # When search is invoked via the API, limit is set from the request
        # When search is invoked directly, limit is set using the method parameter
        limit = self.limit(request.query_params.get("limit") if request and hasattr(request, "query_params") else limit)

        if self.cluster:
            return self.cluster.search(query, limit)

        return super().search(query, limit)

    def batchsearch(self, queries, limit=None):
        if self.cluster:
            return self.cluster.batchsearch(queries, self.limit(limit))

        return super().batchsearch(queries, limit)

    def add(self, documents):
        """
        Adds a batch of documents for indexing.

        Downstream applications can override this method to also store full documents in an external system.

        Args:
            documents: list of {id: value, text: value}

        Returns:
            unmodified input documents
        """

        if self.cluster:
            self.cluster.add(documents)
        else:
            super().add(documents)

        return documents

    def index(self):
        """
        Builds an embeddings index for previously batched documents.
        """

        if self.cluster:
            self.cluster.index()
        else:
            super().index()

    def upsert(self):
        """
        Runs an embeddings upsert operation for previously batched documents.
        """

        if self.cluster:
            self.cluster.upsert()
        else:
            super().upsert()

    def delete(self, ids):
        """
        Deletes from an embeddings index. Returns list of ids deleted.

        Args:
            ids: list of ids to delete

        Returns:
            ids deleted
        """

        if self.cluster:
            return self.cluster.delete(ids)

        return super().delete(ids)

    def count(self):
        """
        Total number of elements in this embeddings index.

        Returns:
            number of elements in embeddings index
        """

        if self.cluster:
            return self.cluster.count()

        return super().count()

    def limit(self, limit):
        """
        Parses the number of results to return from the request. Allows range of 1-250, with a default of 10.

        Args:
            limit: limit parameter

        Returns:
            bounded limit
        """

        # Return between 1 and 250 results, defaults to 10
        return max(1, min(250, int(limit) if limit else 10))

add(self, documents)

Adds a batch of documents for indexing.

Downstream applications can override this method to also store full documents in an external system.

Parameters:

Name Type Description Default
documents

list of {id: value, text: value}

required

Returns:

Type Description

unmodified input documents

Source code in txtai/api/base.py
def add(self, documents):
    """
    Adds a batch of documents for indexing.

    Downstream applications can override this method to also store full documents in an external system.

    Args:
        documents: list of {id: value, text: value}

    Returns:
        unmodified input documents
    """

    if self.cluster:
        self.cluster.add(documents)
    else:
        super().add(documents)

    return documents

batchsearch(self, queries, limit=None)

Finds documents in the embeddings model most similar to the input queries. Returns a list of {id: value, score: value} sorted by highest score per query, where id is the document id in the embeddings model.

Parameters:

Name Type Description Default
queries

queries text

required
limit

maximum results

None

Returns:

Type Description
list of {id

value, score: value} per query

Source code in txtai/api/base.py
def batchsearch(self, queries, limit=None):
    if self.cluster:
        return self.cluster.batchsearch(queries, self.limit(limit))

    return super().batchsearch(queries, limit)

count(self)

Total number of elements in this embeddings index.

Returns:

Type Description

number of elements in embeddings index

Source code in txtai/api/base.py
def count(self):
    """
    Total number of elements in this embeddings index.

    Returns:
        number of elements in embeddings index
    """

    if self.cluster:
        return self.cluster.count()

    return super().count()

delete(self, ids)

Deletes from an embeddings index. Returns list of ids deleted.

Parameters:

Name Type Description Default
ids

list of ids to delete

required

Returns:

Type Description

ids deleted

Source code in txtai/api/base.py
def delete(self, ids):
    """
    Deletes from an embeddings index. Returns list of ids deleted.

    Args:
        ids: list of ids to delete

    Returns:
        ids deleted
    """

    if self.cluster:
        return self.cluster.delete(ids)

    return super().delete(ids)

index(self)

Builds an embeddings index for previously batched documents.

Source code in txtai/api/base.py
def index(self):
    """
    Builds an embeddings index for previously batched documents.
    """

    if self.cluster:
        self.cluster.index()
    else:
        super().index()

search(self, query, request=None, limit=None)

Finds documents in the embeddings model most similar to the input query. Returns a list of {id: value, score: value} sorted by highest score, where id is the document id in the embeddings model.

Downstream applications can override this method to provide enriched search results.

Parameters:

Name Type Description Default
query

query text

required
limit

maximum results, used if request is None

None

Returns:

Type Description
list of {id

value, score: value}

Source code in txtai/api/base.py
def search(self, query, request=None, limit=None):
    # When search is invoked via the API, limit is set from the request
    # When search is invoked directly, limit is set using the method parameter
    limit = self.limit(request.query_params.get("limit") if request and hasattr(request, "query_params") else limit)

    if self.cluster:
        return self.cluster.search(query, limit)

    return super().search(query, limit)

upsert(self)

Runs an embeddings upsert operation for previously batched documents.

Source code in txtai/api/base.py
def upsert(self):
    """
    Runs an embeddings upsert operation for previously batched documents.
    """

    if self.cluster:
        self.cluster.upsert()
    else:
        super().upsert()