Skip to content
Snippets Groups Projects
Commit 1a46c114 authored by Guillaume Di Fatta's avatar Guillaume Di Fatta
Browse files

Merge branch 'master' into 'main'

initial commit

See merge request !1
parents 73185cf8 cc9c0902
Branches
No related tags found
1 merge request!1initial commit
import snscrape.modules.twitter as sntwitter import snscrape.modules.twitter as sntwitter
import pandas as pd import pandas as pd
import datetime as dt
# cool from src.functions import construct_query, convert_date
df = pd.DataFrame(columns=['date', 'tweet'])
limit = 100
for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query=" Metaverse -filter:replies -filter:links -filter:retweets -filter:nativeretweets").get_items()): table = pd.DataFrame(columns=['date', 'tweet'])
if i > limit:
terms = ["Metaverse"]
negativeTerms = ["#metaverse"]
filters = []
negativeFilters = ["replies", "links", "retweets", "nativeretweets"]
languages = ["en"]
tweetNumber = 1000
for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query=construct_query(terms, negativeTerms, filters, negativeFilters)).get_items()):
if i > tweetNumber:
break break
if (tweet.lang == "en"): if (tweet.lang in languages):
df2 = {'date': tweet.date.strftime( dict = {'date': convert_date(tweet.date), 'tweet': tweet.rawContent}
"%m/%d/%Y, %H:%M:%S"), 'tweet': tweet.rawContent}
if ("metaverse" in tweet.rawContent.lower()):
table = pd.concat([table, pd.DataFrame.from_records([dict])])
df = pd.concat([df, pd.DataFrame.from_records([df2])]) print(i/tweetNumber*100, " %")
print(i/limit*100)
table = table.drop_duplicates(subset=["tweet"]).sort_values(by=["tweet"])
df = df.drop_duplicates(subset=["tweet"]) table.to_json("output/twitter_data.json", orient="split")
df = df.sort_values(by=["tweet"])
df.to_json("twitter_data.json", orient="split")
This diff is collapsed.
File added
import datetime as dt
def construct_query(terms, negativeTerms, filters, negativeFilters):
query = ""
for t in terms:
query += t+" "
for nt in negativeTerms:
query += "-"+nt+" "
for f in filters:
query += "filter:"+f+" "
for nf in negativeFilters:
query += "-filter:"+nf+" "
return (query)
def convert_date(date):
return (date.strftime(
"%m/%d/%Y, %H:%M:%S"))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment