From 0f399bfe2be74dae6de4f88f8a7e67377b394eef Mon Sep 17 00:00:00 2001 From: Guillaume Di Fatta <guillaume.difatta@student-cs.fr> Date: Fri, 26 May 2023 16:12:00 +0200 Subject: [PATCH] adding gitignore --- .gitignore | 160 ++++++++++++++++++++++ main.py | 9 +- src/__init__.py | 0 src/__pycache__/functions.cpython-310.pyc | Bin 652 -> 0 bytes src/functions.py | 11 +- 5 files changed, 175 insertions(+), 5 deletions(-) create mode 100644 .gitignore create mode 100644 src/__init__.py delete mode 100644 src/__pycache__/functions.cpython-310.pyc diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6769e21 --- /dev/null +++ b/.gitignore @@ -0,0 +1,160 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ \ No newline at end of file diff --git a/main.py b/main.py index b8e9f29..0beed75 100644 --- a/main.py +++ b/main.py @@ -1,7 +1,7 @@ import snscrape.modules.twitter as sntwitter import pandas as pd -from src.functions import construct_query, convert_date +from src.functions import construct_query, convert_date_str, calc_frequency table = pd.DataFrame(columns=['date', 'tweet']) @@ -13,20 +13,21 @@ negativeFilters = ["replies", "links", "retweets", "nativeretweets"] languages = ["en"] -tweetNumber = 1000 +tweetNumber = 100 for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query=construct_query(terms, negativeTerms, filters, negativeFilters)).get_items()): if i > tweetNumber: break if (tweet.lang in languages): - dict = {'date': convert_date(tweet.date), 'tweet': tweet.rawContent} + dict = {'date': convert_date_str( + tweet.date), 'tweet': tweet.rawContent} if ("metaverse" in tweet.rawContent.lower()): table = pd.concat([table, pd.DataFrame.from_records([dict])]) print(i/tweetNumber*100, " %") - +print(calc_frequency(table)) table = table.drop_duplicates(subset=["tweet"]).sort_values(by=["tweet"]) table.to_json("output/twitter_data.json", orient="split") diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/__pycache__/functions.cpython-310.pyc b/src/__pycache__/functions.cpython-310.pyc deleted file mode 100644 index 87e8b7179e9d818f85ebba1de9d1adba8b8cfdc3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 652 zcmd1j<>g{vU|@*eR*-U$k%8ech=Yuo7#J8F7#J9eWf&M3QW#Pga~Pr+Q<zeiTNt94 zQdm-0TNt94Qy7C8G}&H)^!X*TfG8;DWME(b@tr{?<bX_ItYxWTaAAn$sAa8TsbNiF z5@#@HsAVZ+sbO?sh~=tft6@oDu3-~rfXndLFo9**YgkfPYS@uvWNMhfG8{E5DXcXd zNHUr=EFc&8-D3E~sPKzXw~9S2Gp8iA$f}A%7s^`6pvifQwIsDDxA+!sUTS(`NoHAU z2$&01TzrckChi8-U3`nRur#%(@)l#sEvCGZTa0P9nDWw!SQ!`?z{D^6WUH9a;?$zz z828f5oSekc+|(GC{N&Qy)Vz}780Y+y)R=<Gl8pSkn2_?+)RLIuqU4yg(!Au7%>2A! zy@JYH{K@%w#U(|h$tCe%n?PY#ECLEdMh+%EMh+$+IA&og5@29pNM?ivHHZSSnHd-u zz`+fQ*BXWe3@MBYLD9qHS0$vHtFM}(uNtYNpz2|z>T9JMtjTzbqqw9ftt2xyRg>`+ zOG;u%D%dX&f}eqb;TBJFeqLE>QAs>VQVbN6Y+x5LF|zzG5@ujv&}6yA0g?k5a*HXY l1ne-d48#<802P7F<FLuiPbtkwwF5b~7-SX)0|zsY5CHlCmKgv5 diff --git a/src/functions.py b/src/functions.py index 63c2429..b7e34d4 100644 --- a/src/functions.py +++ b/src/functions.py @@ -19,6 +19,15 @@ def construct_query(terms, negativeTerms, filters, negativeFilters): return (query) -def convert_date(date): +def convert_date_str(date): return (date.strftime( "%m/%d/%Y, %H:%M:%S")) + + +def time_to_num(time_str): + hh, mm, ss = map(int, time_str.split(':')) + return ss + 60*(mm+60*hh) + + +def calc_frequency(table_tweets): + return (abs(time_to_num(table_tweets["date"][0])-time_to_num(table_tweets["date"][-1]))/len(table_tweets["date"])) -- GitLab