How to use the eli5.base.WeightedSpans function in eli5

To help you get started, we’ve selected a few eli5 examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github TeamHG-Memex / eli5 / tests / test_sklearn_text.py View on Github external
def test_weighted_spans_char():
    doc = 'I see: a leaning lemon tree'
    vec = CountVectorizer(analyzer='char', ngram_range=(3, 4))
    vec.fit([doc])
    w_spans = get_weighted_spans(
        doc, vec,
        FeatureWeights(
            pos=[FW('see', 2), FW('a le', 5), FW('on ', 8)],
            neg=[FW('lem', -6)]))
    assert w_spans == WeightedSpans(
        analyzer='char',
        document='i see: a leaning lemon tree',
        weighted_spans=[
            ('see', [(2, 5)], 2),
            ('lem', [(17, 20)], -6),
            ('on ', [(20, 23)], 8),
            ('a le', [(7, 11)], 5)],
        other=FeatureWeights(
            pos=[FW(hl_in_text, 9)],
            neg=[],
        ))
github TeamHG-Memex / eli5 / tests / test_sklearn_text.py View on Github external
def test_no_weighted_spans():
    doc = 'I see: a leaning lemon tree'
    vec = CountVectorizer(analyzer='char', ngram_range=(3, 4))
    vec.fit([doc])
    w_spans = get_weighted_spans(doc, vec, FeatureWeights(pos=[], neg=[]))
    assert w_spans == WeightedSpans(
        analyzer='char',
        document='i see: a leaning lemon tree',
        weighted_spans=[],
        other=FeatureWeights(pos=[], neg=[]))
github TeamHG-Memex / eli5 / tests / test_sklearn_text.py View on Github external
def test_weighted_spans_word_stopwords():
    doc = 'I see: a leaning lemon tree'
    vec = CountVectorizer(analyzer='word', stop_words='english')
    vec.fit([doc])
    w_spans = get_weighted_spans(
        doc, vec,
        FeatureWeights(
            pos=[FW('see', 2), FW('lemon', 5), FW('bias', 8)],
            neg=[FW('tree', -6)]))
    assert w_spans == WeightedSpans(
        analyzer='word',
        document='i see: a leaning lemon tree',
        weighted_spans=[
            ('lemon', [(17, 22)], 5),
            ('tree', [(23, 27)], -6)],
        other=FeatureWeights(
            pos=[FW('bias', 8), FW('see', 2)],
            neg=[FW(hl_in_text, -1)],
        ))
github TeamHG-Memex / eli5 / tests / test_sklearn_text.py View on Github external
"""
    doc = 'I see: a leaning lemon tree'
    vec = CountVectorizer(analyzer='char', ngram_range=(3, 3))
    vec.fit([doc])
    w_spans = get_weighted_spans(
        doc, vec,
        FeatureWeights(
            pos=[
                FW([{'name': 'foo', 'sign': 1}, {'name': 'see', 'sign': -1}], 2),
                FW([{'name': 'zoo', 'sign': 1}, {'name': 'bar', 'sign': 1}], 3),
            ],
            neg=[
                FW([{'name': 'ree', 'sign': 1}, {'name': 'tre', 'sign': 1}], -4),
            ],
        ))
    assert w_spans == WeightedSpans(
        analyzer='char',
        document='i see: a leaning lemon tree',
        weighted_spans=[
            ('see', [(2, 5)], 2),
            ('tre', [(23, 26)], -4),
            ('ree', [(24, 27)], -4),
            ],
        other=FeatureWeights(
            pos=[
                FW([{'name': 'zoo', 'sign': 1}, {'name': 'bar', 'sign': 1}], 3),
            ],
            neg=[FW(hl_in_text, -2)],
        ))
github TeamHG-Memex / eli5 / tests / test_sklearn_text.py View on Github external
def test_weighted_spans_char_wb():
    doc = 'I see: a leaning lemon tree'
    vec = CountVectorizer(analyzer='char_wb', ngram_range=(3, 4))
    vec.fit([doc])
    w_spans = get_weighted_spans(
        doc, vec,
        FeatureWeights(
            pos=[FW('see', 2), FW('a le', 5), FW('on ', 8)],
            neg=[FW('lem', -6), FW(' lem', -4)]))
    assert w_spans == WeightedSpans(
        analyzer='char_wb',
        document='i see: a leaning lemon tree',
        weighted_spans=[
            ('see', [(2, 5)], 2),
            ('lem', [(17, 20)], -6),
            ('on ', [(20, 23)], 8),
            (' lem', [(16, 20)], -4)],
        other=FeatureWeights(
            pos=[FW('a le', 5), FW(hl_in_text, 0)],
            neg=[],
        ))
github TeamHG-Memex / eli5 / tests / test_sklearn_text.py View on Github external
def test_weighted_spans_word():
    doc = 'I see: a leaning lemon tree'
    vec = CountVectorizer(analyzer='word')
    vec.fit([doc])
    w_spans = get_weighted_spans(
        doc, vec,
        FeatureWeights(
            pos=[FW('see', 2), FW('lemon', 4), FW('bias', 8)],
            neg=[FW('tree', -6)],
            neg_remaining=10
        ))
    assert w_spans == WeightedSpans(
        analyzer='word',
        document='i see: a leaning lemon tree',
        weighted_spans=[
            ('see', [(2, 5)], 2),
            ('lemon', [(17, 22)], 4),
            ('tree', [(23, 27)], -6)],
        other=FeatureWeights(
            pos=[FW('bias', 8), FW(hl_in_text, 0)],
            neg=[],
            neg_remaining=10,
        ))
github TeamHG-Memex / eli5 / tests / test_formatters_text_helpers.py View on Github external
spans=[
                            ('a', [(0, 1)], 1.5),
                            ('b', [(1, 2)], 2.5),
                        ],
                    ),
                    DocWeightedSpans(
                        document='xy',
                        spans=[
                            ('xy', [(0, 2)], -4.5),
                        ],
                    )]
            )),
        TargetExplanation(
            target='two',
            feature_weights=FeatureWeights(pos=[], neg=[]),
            weighted_spans=WeightedSpans(
                docs_weighted_spans=[
                    DocWeightedSpans(
                        document='abc',
                        spans=[
                            ('a', [(0, 1)], 0.5),
                            ('c', [(2, 3)], 3.5),
                        ],
                    ),
                    DocWeightedSpans(
                        document='xz',
                        spans=[
                            # char_wb at the start of the document
                            (' xz', [(-1, 2)], 1.5),
                        ],
                    )],
            )),
github TeamHG-Memex / eli5 / eli5 / sklearn / text.py View on Github external
def get_weighted_spans(doc, vec, feature_weights):
    # type: (Any, Any, FeatureWeights) -> Optional[WeightedSpans]
    """ If possible, return a dict with preprocessed document and a list
    of spans with weights, corresponding to features in the document.
    """
    if isinstance(vec, FeatureUnion):
        return _get_weighted_spans_from_union(doc, vec, feature_weights)
    else:
        result = _get_doc_weighted_spans(doc, vec, feature_weights)
        if result is not None:
            found_features, doc_weighted_spans = result
            return WeightedSpans(
                [doc_weighted_spans],
                other=_get_other(feature_weights, [('', found_features)]),
            )
    return None
github TeamHG-Memex / eli5 / eli5 / sklearn / text.py View on Github external
def feature_fn(name):
            if isinstance(name, FormattedFeatureName):
                return
            if not name.startswith(vec_prefix):
                return  # drop feature
            return name[len(vec_prefix):]  # remove prefix

        result = _get_doc_weighted_spans(doc, vec, feature_weights, feature_fn)
        if result:
            found_features, doc_weighted_spans = result
            doc_weighted_spans.vec_name = vec_name
            named_found_features.append((vec_name, found_features))
            docs_weighted_spans.append(doc_weighted_spans)

    if docs_weighted_spans:
        return WeightedSpans(
            docs_weighted_spans,
            other=_get_other(feature_weights, named_found_features),
        )
    else:
        return None