Skip to content

Methods

Bases: Application

Base API template. The API is an extended txtai application, adding the ability to cluster API instances together.

Downstream applications can extend this base template to add/modify functionality.

Source code in txtai/api/base.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
class API(Application):
    """
    Base API template. The API is an extended txtai application, adding the ability to cluster API instances together.

    Downstream applications can extend this base template to add/modify functionality.
    """

    def __init__(self, config, loaddata=True):
        super().__init__(config, loaddata)

        # Embeddings cluster
        self.cluster = None
        if self.config.get("cluster"):
            self.cluster = Cluster(self.config["cluster"])

    # pylint: disable=W0221
    def search(self, query, limit=None, weights=None, index=None, parameters=None, graph=False, request=None):
        # When search is invoked via the API, limit is set from the request
        # When search is invoked directly, limit is set using the method parameter
        limit = self.limit(request.query_params.get("limit") if request and hasattr(request, "query_params") else limit)
        weights = self.weights(request.query_params.get("weights") if request and hasattr(request, "query_params") else weights)
        index = request.query_params.get("index") if request and hasattr(request, "query_params") else index
        parameters = request.query_params.get("parameters") if request and hasattr(request, "query_params") else parameters
        graph = request.query_params.get("graph") if request and hasattr(request, "query_params") else graph

        # Decode parameters
        parameters = json.loads(parameters) if parameters and isinstance(parameters, str) else parameters

        if self.cluster:
            return self.cluster.search(query, limit, weights, index, parameters, graph)

        return super().search(query, limit, weights, index, parameters, graph)

    def batchsearch(self, queries, limit=None, weights=None, index=None, parameters=None, graph=False):
        if self.cluster:
            return self.cluster.batchsearch(queries, self.limit(limit), weights, index, parameters, graph)

        return super().batchsearch(queries, limit, weights, index, parameters, graph)

    def add(self, documents):
        """
        Adds a batch of documents for indexing.

        Downstream applications can override this method to also store full documents in an external system.

        Args:
            documents: list of {id: value, text: value}

        Returns:
            unmodified input documents
        """

        if self.cluster:
            self.cluster.add(documents)
        else:
            super().add(documents)

        return documents

    def index(self):
        """
        Builds an embeddings index for previously batched documents.
        """

        if self.cluster:
            self.cluster.index()
        else:
            super().index()

    def upsert(self):
        """
        Runs an embeddings upsert operation for previously batched documents.
        """

        if self.cluster:
            self.cluster.upsert()
        else:
            super().upsert()

    def delete(self, ids):
        """
        Deletes from an embeddings index. Returns list of ids deleted.

        Args:
            ids: list of ids to delete

        Returns:
            ids deleted
        """

        if self.cluster:
            return self.cluster.delete(ids)

        return super().delete(ids)

    def reindex(self, config, function=None):
        """
        Recreates this embeddings index using config. This method only works if document content storage is enabled.

        Args:
            config: new config
            function: optional function to prepare content for indexing
        """

        if self.cluster:
            self.cluster.reindex(config, function)
        else:
            super().reindex(config, function)

    def count(self):
        """
        Total number of elements in this embeddings index.

        Returns:
            number of elements in embeddings index
        """

        if self.cluster:
            return self.cluster.count()

        return super().count()

    def limit(self, limit):
        """
        Parses the number of results to return from the request. Allows range of 1-250, with a default of 10.

        Args:
            limit: limit parameter

        Returns:
            bounded limit
        """

        # Return between 1 and 250 results, defaults to 10
        return max(1, min(250, int(limit) if limit else 10))

    def weights(self, weights):
        """
        Parses the weights parameter from the request.

        Args:
            weights: weights parameter

        Returns:
            weights
        """

        return float(weights) if weights else weights

add(documents)

Adds a batch of documents for indexing.

Downstream applications can override this method to also store full documents in an external system.

Parameters:

Name Type Description Default
documents

list of {id: value, text: value}

required

Returns:

Type Description

unmodified input documents

Source code in txtai/api/base.py
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def add(self, documents):
    """
    Adds a batch of documents for indexing.

    Downstream applications can override this method to also store full documents in an external system.

    Args:
        documents: list of {id: value, text: value}

    Returns:
        unmodified input documents
    """

    if self.cluster:
        self.cluster.add(documents)
    else:
        super().add(documents)

    return documents

addobject(data, uid, field)

Helper method that builds a batch of object documents.

Parameters:

Name Type Description Default
data

object content

required
uid

optional list of corresponding uids

required
field

optional field to set

required

Returns:

Type Description

documents

Source code in txtai/app/base.py
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
def addobject(self, data, uid, field):
    """
    Helper method that builds a batch of object documents.

    Args:
        data: object content
        uid: optional list of corresponding uids
        field: optional field to set

    Returns:
        documents
    """

    # Raise error if index is not writable
    if not self.config.get("writable"):
        raise ReadOnlyError("Attempting to add documents to a read-only index (writable != True)")

    documents = []
    for x, content in enumerate(data):
        if field:
            row = {"id": uid[x], field: content} if uid else {field: content}
        elif uid:
            row = (uid[x], content)
        else:
            row = content

        documents.append(row)

    return self.add(documents)

batchexplain(queries, texts=None, limit=10)

Explains the importance of each input token in text for a list of queries.

Parameters:

Name Type Description Default
query

queries text

required
texts

optional list of text, otherwise runs search queries

None
limit

optional limit if texts is None

10

Returns:

Type Description

list of dict per input text per query where a higher token scores represents higher importance relative to the query

Source code in txtai/app/base.py
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
def batchexplain(self, queries, texts=None, limit=10):
    """
    Explains the importance of each input token in text for a list of queries.

    Args:
        query: queries text
        texts: optional list of text, otherwise runs search queries
        limit: optional limit if texts is None

    Returns:
        list of dict per input text per query where a higher token scores represents higher importance relative to the query
    """

    if self.embeddings:
        with self.lock:
            return self.embeddings.batchexplain(queries, texts, limit)

    return None

batchsimilarity(queries, texts)

Computes the similarity between list of queries and list of text. Returns a list of {id: value, score: value} sorted by highest score per query, where id is the index in texts.

Parameters:

Name Type Description Default
queries

queries text

required
texts

list of text

required

Returns:

Type Description

list of {id: value, score: value} per query

Source code in txtai/app/base.py
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
def batchsimilarity(self, queries, texts):
    """
    Computes the similarity between list of queries and list of text. Returns a list
    of {id: value, score: value} sorted by highest score per query, where id is the
    index in texts.

    Args:
        queries: queries text
        texts: list of text

    Returns:
        list of {id: value, score: value} per query
    """

    # Use similarity instance if available otherwise fall back to embeddings model
    if "similarity" in self.pipelines:
        return [[{"id": uid, "score": float(score)} for uid, score in r] for r in self.pipelines["similarity"](queries, texts)]
    if self.embeddings:
        return [[{"id": uid, "score": float(score)} for uid, score in r] for r in self.embeddings.batchsimilarity(queries, texts)]

    return None

batchtransform(texts)

Transforms list of text into embeddings arrays.

Parameters:

Name Type Description Default
texts

list of text

required

Returns:

Type Description

embeddings arrays

Source code in txtai/app/base.py
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
def batchtransform(self, texts):
    """
    Transforms list of text into embeddings arrays.

    Args:
        texts: list of text

    Returns:
        embeddings arrays
    """

    if self.embeddings:
        documents = [(None, text, None) for text in texts]
        return [[float(x) for x in result] for result in self.embeddings.batchtransform(documents)]

    return None

count()

Total number of elements in this embeddings index.

Returns:

Type Description

number of elements in embeddings index

Source code in txtai/api/base.py
121
122
123
124
125
126
127
128
129
130
131
132
def count(self):
    """
    Total number of elements in this embeddings index.

    Returns:
        number of elements in embeddings index
    """

    if self.cluster:
        return self.cluster.count()

    return super().count()

delete(ids)

Deletes from an embeddings index. Returns list of ids deleted.

Parameters:

Name Type Description Default
ids

list of ids to delete

required

Returns:

Type Description

ids deleted

Source code in txtai/api/base.py
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
def delete(self, ids):
    """
    Deletes from an embeddings index. Returns list of ids deleted.

    Args:
        ids: list of ids to delete

    Returns:
        ids deleted
    """

    if self.cluster:
        return self.cluster.delete(ids)

    return super().delete(ids)

explain(query, texts=None, limit=10)

Explains the importance of each input token in text for a query.

Parameters:

Name Type Description Default
query

query text

required
texts

optional list of text, otherwise runs search query

None
limit

optional limit if texts is None

10

Returns:

Type Description

list of dict per input text where a higher token scores represents higher importance relative to the query

Source code in txtai/app/base.py
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
def explain(self, query, texts=None, limit=10):
    """
    Explains the importance of each input token in text for a query.

    Args:
        query: query text
        texts: optional list of text, otherwise runs search query
        limit: optional limit if texts is None

    Returns:
        list of dict per input text where a higher token scores represents higher importance relative to the query
    """

    if self.embeddings:
        with self.lock:
            return self.embeddings.explain(query, texts, limit)

    return None

extract(queue, texts=None)

Extracts answers to input questions.

Parameters:

Name Type Description Default
queue

list of {name: value, query: value, question: value, snippet: value}

required
texts

optional list of text

None

Returns:

Type Description

list of {name: value, answer: value}

Source code in txtai/app/base.py
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
def extract(self, queue, texts=None):
    """
    Extracts answers to input questions.

    Args:
        queue: list of {name: value, query: value, question: value, snippet: value}
        texts: optional list of text

    Returns:
        list of {name: value, answer: value}
    """

    if self.embeddings and "extractor" in self.pipelines:
        # Get extractor instance
        extractor = self.pipelines["extractor"]

        # Run extractor and return results as dicts
        return extractor(queue, texts)

    return None

index()

Builds an embeddings index for previously batched documents.

Source code in txtai/api/base.py
71
72
73
74
75
76
77
78
79
def index(self):
    """
    Builds an embeddings index for previously batched documents.
    """

    if self.cluster:
        self.cluster.index()
    else:
        super().index()

label(text, labels)

Applies a zero shot classifier to text using a list of labels. Returns a list of {id: value, score: value} sorted by highest score, where id is the index in labels.

Parameters:

Name Type Description Default
text

text|list

required
labels

list of labels

required

Returns:

Type Description

list of {id: value, score: value} per text element

Source code in txtai/app/base.py
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
def label(self, text, labels):
    """
    Applies a zero shot classifier to text using a list of labels. Returns a list of
    {id: value, score: value} sorted by highest score, where id is the index in labels.

    Args:
        text: text|list
        labels: list of labels

    Returns:
        list of {id: value, score: value} per text element
    """

    if "labels" in self.pipelines:
        # Text is a string
        if isinstance(text, str):
            return [{"id": uid, "score": float(score)} for uid, score in self.pipelines["labels"](text, labels)]

        # Text is a list
        return [[{"id": uid, "score": float(score)} for uid, score in result] for result in self.pipelines["labels"](text, labels)]

    return None

pipeline(name, args)

Generic pipeline execution method.

Parameters:

Name Type Description Default
name

pipeline name

required
args

pipeline arguments

required

Returns:

Type Description

pipeline results

Source code in txtai/app/base.py
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
def pipeline(self, name, args):
    """
    Generic pipeline execution method.

    Args:
        name: pipeline name
        args: pipeline arguments

    Returns:
        pipeline results
    """

    if name in self.pipelines:
        return self.pipelines[name](*args)

    return None

reindex(config, function=None)

Recreates this embeddings index using config. This method only works if document content storage is enabled.

Parameters:

Name Type Description Default
config

new config

required
function

optional function to prepare content for indexing

None
Source code in txtai/api/base.py
107
108
109
110
111
112
113
114
115
116
117
118
119
def reindex(self, config, function=None):
    """
    Recreates this embeddings index using config. This method only works if document content storage is enabled.

    Args:
        config: new config
        function: optional function to prepare content for indexing
    """

    if self.cluster:
        self.cluster.reindex(config, function)
    else:
        super().reindex(config, function)

similarity(query, texts)

Computes the similarity between query and list of text. Returns a list of {id: value, score: value} sorted by highest score, where id is the index in texts.

Parameters:

Name Type Description Default
query

query text

required
texts

list of text

required

Returns:

Type Description

list of {id: value, score: value}

Source code in txtai/app/base.py
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
def similarity(self, query, texts):
    """
    Computes the similarity between query and list of text. Returns a list of
    {id: value, score: value} sorted by highest score, where id is the index
    in texts.

    Args:
        query: query text
        texts: list of text

    Returns:
        list of {id: value, score: value}
    """

    # Use similarity instance if available otherwise fall back to embeddings model
    if "similarity" in self.pipelines:
        return [{"id": uid, "score": float(score)} for uid, score in self.pipelines["similarity"](query, texts)]
    if self.embeddings:
        return [{"id": uid, "score": float(score)} for uid, score in self.embeddings.similarity(query, texts)]

    return None

transform(text)

Transforms text into embeddings arrays.

Parameters:

Name Type Description Default
text

input text

required

Returns:

Type Description

embeddings array

Source code in txtai/app/base.py
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
def transform(self, text):
    """
    Transforms text into embeddings arrays.

    Args:
        text: input text

    Returns:
        embeddings array
    """

    if self.embeddings:
        return [float(x) for x in self.embeddings.transform((None, text, None))]

    return None

upsert()

Runs an embeddings upsert operation for previously batched documents.

Source code in txtai/api/base.py
81
82
83
84
85
86
87
88
89
def upsert(self):
    """
    Runs an embeddings upsert operation for previously batched documents.
    """

    if self.cluster:
        self.cluster.upsert()
    else:
        super().upsert()

wait()

Closes threadpool and waits for completion.

Source code in txtai/app/base.py
740
741
742
743
744
745
746
747
748
def wait(self):
    """
    Closes threadpool and waits for completion.
    """

    if self.pool:
        self.pool.close()
        self.pool.join()
        self.pool = None

workflow(name, elements)

Executes a workflow.

Parameters:

Name Type Description Default
name

workflow name

required
elements

elements to process

required

Returns:

Type Description

processed elements

Source code in txtai/app/base.py
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
def workflow(self, name, elements):
    """
    Executes a workflow.

    Args:
        name: workflow name
        elements: elements to process

    Returns:
        processed elements
    """

    if hasattr(elements, "__len__") and hasattr(elements, "__getitem__"):
        # Convert to tuples and return as a list since input is sized
        elements = [tuple(element) if isinstance(element, list) else element for element in elements]
    else:
        # Convert to tuples and return as a generator since input is not sized
        elements = (tuple(element) if isinstance(element, list) else element for element in elements)

    # Execute workflow
    return self.workflows[name](elements)