import snscrape.modules.twitter as sntwitter
import pandas as pd

from src.functions import construct_query, convert_date_str, calc_frequency

table = pd.DataFrame(columns=['date', 'tweet'])

terms = ["Metaverse"]
negativeTerms = ["#metaverse"]

filters = []
negativeFilters = ["replies", "links", "retweets", "nativeretweets"]

languages = ["en"]

tweetNumber = 100

for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query=construct_query(terms, negativeTerms, filters, negativeFilters)).get_items()):
    if i > tweetNumber:
        break
    if (tweet.lang in languages):
        dict = {'date': convert_date_str(
            tweet.date), 'tweet': tweet.rawContent}

        if ("metaverse" in tweet.rawContent.lower()):
            table = pd.concat([table, pd.DataFrame.from_records([dict])])

    print(i/tweetNumber*100, " %")

print(calc_frequency(table))
table = table.drop_duplicates(subset=["tweet"]).sort_values(by=["tweet"])

table.to_json("output/twitter_data.json", orient="split")