How to use the konlpy.stream.BaseStreamer function in konlpy

To help you get started, we’ve selected a few konlpy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github konlpy / konlpy / konlpy / stream / dcinside.py View on Github external
from __future__ import unicode_literals

from datetime import datetime
from bs4 import BeautifulSoup, SoupStrainer
from konlpy.stream import BaseStreamer
from konlpy.data import StringWriter

import requests
import time
import colorama

from colorama import Style, Fore
from konlpy.utils import pprint


class DCInsideStreamer(BaseStreamer):
    """DCInside is a biggest community website in Korea.
    DCInsideStreamer helps to stream specific gallery from past to future.
    """

    def __init__(self, markup='lxml', is_async=True):
        super(DCInsideStreamer, self).__init__(is_async=is_async)
        self.is_async = is_async

        parser = self.get_parser()
        parser.add_argument(
            '--include_comments',
            help='include comments',
            action='store_true'
        )
        parser.add_argument(
            '--comments_per_page',
github konlpy / konlpy / konlpy / stream / daum.py View on Github external
counts = []
    keywords = []

    item_list = soup.find("div", {"class": "realtime_part"}).findAll("div", {"class": "roll_txt"})
    item_list = [item.find("div", {"class": "rank_cont"}) for item in item_list]

    for item in item_list:
        count = item.find("span", {"class": "ir_wa"}).getText()
        keyword = item.find("span", {"class": "txt_issue"}).getText()
        counts.append(count)
        keywords.append(keyword)

    return counts, keywords


class DaumStreamer(BaseStreamer):
    """DaumStreamer helps to stream daum trending keywords asynchronously.

    .. code-block:: python

        >>> from konlpy.stream import daum
        >>> streamer = daum.DaumStreamer()
        >>> streamer.stream()
        김민승
        이유애린
        훈남정음
        소유진
        ...

    """

    def __init__(self, is_async=True):
github konlpy / konlpy / konlpy / stream / twitter.py View on Github external
if (self.limit == self.options.tweet_limits) | (
                        (time.time() - self.init_time) >= self.options.time_limits):
                    return False

        else:
            write_tweets_to_files(tweet)
            self.limit += 1
            if self.limit == self.options.tweet_limits:
                return False

    def on_error(self, status_code):
        if status_code == 420:  # if connection failed
            return False


class TwitterStreamer(BaseStreamer):
    """Start streaming on Twitter with your api keys and tokens.

    Args:
        dirname (str): directory to save output files.
        word_list (list): list of words to be streamed.
        async (bool): if true, apply threading in tweepy layer.
    """

    def __init__(self, dirname=DATA_DIR, word_list=ALPHABET, is_async=True):
        super(TwitterStreamer, self).__init__(is_async=is_async)
        self.is_async = is_async

        parser = self.get_parser()
        parser.add_argument(
            '--consumer_key',
            help='consumer key',
github konlpy / konlpy / konlpy / stream / google_trend.py View on Github external
from __future__ import unicode_literals

from datetime import datetime, timedelta
from konlpy.stream import BaseStreamer
from konlpy.data import StringWriter

import requests
import time
import json
import colorama

from colorama import Style, Fore
from konlpy.utils import pprint


class GoogleTrendStreamer(BaseStreamer):
    """Google is a biggest website in the world.
    GoogleTrendStreamer helps to stream trends from past to future.
    """

    def __init__(self, markup='lxml', is_async=True):
        super(GoogleTrendStreamer, self).__init__(is_async=is_async)
        self.is_async = is_async

        parser = self.get_parser()
        parser.add_argument(
            '--init_date',
            help='initial post_id to start crawling',
            default=datetime.today().strftime("%Y%m%d")
        )
        parser.add_argument(
            '--final_date',
github konlpy / konlpy / konlpy / stream / misc.py View on Github external
# -*- coding: utf-8 -*-
from __future__ import print_function
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals

from time import sleep
from konlpy.stream import BaseStreamer, TwitterStreamer
from konlpy.stream.naver import get_current_trend
from konlpy.utils import pprint


class NavtterStreamer(BaseStreamer):
    """Start streaming of twitter about naver's current top trending keywords.
    In order to use NavtterStreamer, you have to set-up both twitter and Navtter's options.

    .. code-block:: python

        from konlpy.stream import NavtterStreamer

        app = NavtterStreamer()
        app.show_options()  # Print available options
        app.options.interval = 3600  # Update naver trends every 3600 secs
        app.options.verbose = True  # Print trends

        # Your twitter api keys and tokens.
        app.twitter.options.consumer_key = 'consumer_key'
        app.twitter.options.consumer_secret = 'consumer_secret'
        app.twitter.options.access_token = 'access_token'
github konlpy / konlpy / konlpy / stream / naver.py View on Github external
url = 'https://www.naver.com/'
    html = urlopen(url)
    soup = BeautifulSoup(html, 'html.parser')
    counts = []
    keywords = []

    for item in soup.find("div", {"class": "ah_roll_area PM_CL_realtimeKeyword_rolling"}).findAll("li", {"class": "ah_item"}):
        count = item.find("span", {"class": "ah_r"}).getText()
        keyword = item.find("span", {"class": "ah_k"}).getText()
        counts.append(count)
        keywords.append(keyword)

    return counts, keywords


class NaverStreamer(BaseStreamer):
    """NaverStreamer helps to stream naver trending keywords asynchronously.

    .. code-block:: python

        >>> from konlpy.stream import naver
        >>> streamer = naver.NaverStreamer()
        >>> streamer.stream()
        cj채용
        온주완의 뮤직쇼
        유상무
        현대차
        ...

    """

    def __init__(self, is_async=True):