Fixed https filtering

This commit is contained in:
Robert 2020-07-13 20:45:25 +02:00
parent 8178240120
commit 7aabde7516
2 changed files with 11 additions and 4 deletions

3
.gitignore vendored
View file

@ -3,4 +3,5 @@ venv
*.jpg
*.png
*.json
*.json
*.log

View file

@ -33,6 +33,8 @@ parser.add_argument("--color", action="store_true",
help="Use mask as color mask")
parser.add_argument("-N", metavar="max_words", type=int,
help="Maximum number of words in WordCloud (Default 200)")
parser.add_argument("-v", "--verbose", action="store_true",
help="Dump comments to comments.log")
args = parser.parse_args()
@ -50,11 +52,11 @@ if args.N is None:
def fetch_comments(comment) -> list:
comment_body = re.sub(r'[\[\(]?https?:\/\/[0-9A-Za-z\/\?#\[\]\)@\.!$\&%\-+,;=]+', '', comment.body)
if len(comment.replies) == 0:
c = re.sub(r'(\[text\]\()?https?://[0-9A-Za-z/\?#\[\]@\.!$\&%\-+,;=]+\)?', '', comment.body)
return [re.sub(r'https?://[0-9A-Za-z/\?#\[\]@\.!$\&%\-+,;=]+', '', c)]
return [comment_body]
raw_comments = [comment.body]
raw_comments = [comment_body]
for comm in comment.replies:
raw_comments.extend(fetch_comments(comm))
@ -122,6 +124,10 @@ if args.color is True:
if args.o is not None:
wordcloud.to_file(args.o)
if args.verbose is True:
with open("comments.log", "w+", encoding="utf-8") as file:
file.writelines("%s\n" % comment for comment in comments)
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")