import snscrape.modules.twitter as sntwitter import pandas as pd from src.functions import construct_query, convert_date_str, calc_frequency table = pd.DataFrame(columns=['date', 'tweet']) terms = ["Metaverse"] negativeTerms = ["#metaverse"] filters = [] negativeFilters = ["replies", "links", "retweets", "nativeretweets"] languages = ["en"] tweetNumber = 100 for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query=construct_query(terms, negativeTerms, filters, negativeFilters)).get_items()): if i > tweetNumber: break if (tweet.lang in languages): dict = {'date': convert_date_str( tweet.date), 'tweet': tweet.rawContent} if ("metaverse" in tweet.rawContent.lower()): table = pd.concat([table, pd.DataFrame.from_records([dict])]) print(i/tweetNumber*100, " %") print(calc_frequency(table)) table = table.drop_duplicates(subset=["tweet"]).sort_values(by=["tweet"]) table.to_json("output/twitter_data.json", orient="split")