Initial commit
This commit is contained in:
commit
f42830dcd0
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
.idea
|
||||||
|
venv
|
||||||
|
|
||||||
|
*.jpg
|
||||||
|
*.png
|
||||||
|
*.json
|
BIN
ARIALUNI.TTF
Normal file
BIN
ARIALUNI.TTF
Normal file
Binary file not shown.
10
README.md
Normal file
10
README.md
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
# Reddit WordCloud generator
|
||||||
|
This script turns reddit comments into beautiful WordCloud.
|
||||||
|
|
||||||
|
To use it, first [create an application on reddit](https://www.reddit.com/prefs/apps).
|
||||||
|
Choose a name, select "script" and add a dummy description and dummy URLs. Then copy the
|
||||||
|
ClientID and Secret and open your terminal:
|
||||||
|
|
||||||
|
``py configure.py <client_id> <secret>``
|
||||||
|
|
||||||
|
Now you can use the WordCloud generator. Type ``py cloud.py -h`` for more infos.
|
127
cloud.py
Normal file
127
cloud.py
Normal file
|
@ -0,0 +1,127 @@
|
||||||
|
import praw
|
||||||
|
import json
|
||||||
|
import argparse
|
||||||
|
import re
|
||||||
|
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
|
||||||
|
from stop_words import safe_get_stop_words
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
from PIL import Image, ImageColor
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description="Fetches comments from a reddit post and makes a word cloud")
|
||||||
|
parser.add_argument("--sub", action="store_true",
|
||||||
|
help="Treats the ID as the name of a sub")
|
||||||
|
parser.add_argument("id", type=str,
|
||||||
|
help="The ID of the reddit post")
|
||||||
|
parser.add_argument("-n", metavar="limit", type=int,
|
||||||
|
help="The amount of times \"More comments...\" is resolved. (Default: all)")
|
||||||
|
parser.add_argument("-p", metavar="posts", type=int,
|
||||||
|
help="Number of posts to fetch (Only in sub mode) (Default: 25)")
|
||||||
|
parser.add_argument("-l", nargs="+",
|
||||||
|
help="The languages to add stopwords for")
|
||||||
|
parser.add_argument("-o", metavar="out", type=str,
|
||||||
|
help="Output file")
|
||||||
|
parser.add_argument("-s", metavar="scale", type=int,
|
||||||
|
help="The scale of the wordcloud")
|
||||||
|
parser.add_argument("-m", metavar="mask", type=str,
|
||||||
|
help="The mask that is applied to the wordcloud")
|
||||||
|
parser.add_argument("-b", metavar="background", type=str,
|
||||||
|
help="Background color of the wordcloud")
|
||||||
|
parser.add_argument("-c", metavar=("cw", "cc"), type=str, nargs=2,
|
||||||
|
help="Width and color of contour")
|
||||||
|
parser.add_argument("--color", action="store_true",
|
||||||
|
help="Use mask as color mask")
|
||||||
|
parser.add_argument("-N", metavar="max_words", type=int,
|
||||||
|
help="Maximum number of words in WordCloud (Default 200)")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.p is None:
|
||||||
|
args.p = 25
|
||||||
|
|
||||||
|
if args.s is None:
|
||||||
|
args.s = 1
|
||||||
|
|
||||||
|
if args.b is None:
|
||||||
|
args.b = "black"
|
||||||
|
|
||||||
|
if args.m is None:
|
||||||
|
args.m = 200
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_comments(comment) -> list:
|
||||||
|
if len(comment.replies) == 0:
|
||||||
|
return [re.sub(r'https?://\S+', '', comment.body)]
|
||||||
|
|
||||||
|
raw_comments = [comment.body]
|
||||||
|
|
||||||
|
for comm in comment.replies:
|
||||||
|
raw_comments.extend(fetch_comments(comm))
|
||||||
|
|
||||||
|
return raw_comments
|
||||||
|
|
||||||
|
|
||||||
|
with open("config.json") as file:
|
||||||
|
settings = json.load(file)
|
||||||
|
|
||||||
|
reddit = praw.Reddit(client_id=settings["client_id"],
|
||||||
|
client_secret=settings["secret"],
|
||||||
|
user_agent="Windows10:RWC:1.0")
|
||||||
|
|
||||||
|
if args.sub:
|
||||||
|
posts = reddit.subreddit(args.id).hot(limit=args.p)
|
||||||
|
else:
|
||||||
|
posts = [reddit.submission(id=args.id)]
|
||||||
|
|
||||||
|
i = 1
|
||||||
|
comments = []
|
||||||
|
for post in posts:
|
||||||
|
print(f"\rFetching comments... {i}/{args.p} ", end=" ", flush=True)
|
||||||
|
post.comments.replace_more(limit=args.n)
|
||||||
|
for top_level_comment in post.comments:
|
||||||
|
comments.extend(fetch_comments(top_level_comment))
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
print(f"Done! Processed {len(comments)} comments")
|
||||||
|
|
||||||
|
stopwords = set(STOPWORDS)
|
||||||
|
if args.l is not None:
|
||||||
|
for language in args.l:
|
||||||
|
stopwords.update(safe_get_stop_words(language.lower()))
|
||||||
|
|
||||||
|
mask = None
|
||||||
|
colors = None
|
||||||
|
if args.m is not None:
|
||||||
|
print("Creating mask...", end=" ", flush=True)
|
||||||
|
mask = np.array(Image.open(args.m))
|
||||||
|
colors = ImageColorGenerator(mask)
|
||||||
|
print("Done!")
|
||||||
|
|
||||||
|
cw = 0
|
||||||
|
cc = None
|
||||||
|
if args.c is not None:
|
||||||
|
cw = int(args.c[0])
|
||||||
|
cc = args.c[1]
|
||||||
|
|
||||||
|
wordcloud = WordCloud(font_path="ARIALUNI.TTF",
|
||||||
|
max_words=args.N,
|
||||||
|
collocations=False,
|
||||||
|
scale=args.s,
|
||||||
|
stopwords=stopwords,
|
||||||
|
mask=mask,
|
||||||
|
background_color=args.b,
|
||||||
|
mode="RGB",
|
||||||
|
contour_width=cw,
|
||||||
|
contour_color=cc
|
||||||
|
).generate(' '.join(comments))
|
||||||
|
|
||||||
|
if args.color is True:
|
||||||
|
wordcloud.recolor(color_func=colors)
|
||||||
|
|
||||||
|
if args.o is not None:
|
||||||
|
wordcloud.to_file(args.o)
|
||||||
|
|
||||||
|
plt.imshow(wordcloud, interpolation="bilinear")
|
||||||
|
plt.axis("off")
|
||||||
|
|
||||||
|
plt.show()
|
12
configure.py
Normal file
12
configure.py
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description="Creates a config.json from your client_id and secret")
|
||||||
|
parser.add_argument("client_id", type=str)
|
||||||
|
parser.add_argument("secret", type=str)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
data = {"client_id": args.client_id, "secret": args.secret}
|
||||||
|
with open("config.json") as file:
|
||||||
|
json.dump(data, file)
|
Loading…
Reference in a new issue