Fixed https filtering
This commit is contained in:
parent
8178240120
commit
7aabde7516
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -3,4 +3,5 @@ venv
|
||||||
|
|
||||||
*.jpg
|
*.jpg
|
||||||
*.png
|
*.png
|
||||||
*.json
|
*.json
|
||||||
|
*.log
|
12
cloud.py
12
cloud.py
|
@ -33,6 +33,8 @@ parser.add_argument("--color", action="store_true",
|
||||||
help="Use mask as color mask")
|
help="Use mask as color mask")
|
||||||
parser.add_argument("-N", metavar="max_words", type=int,
|
parser.add_argument("-N", metavar="max_words", type=int,
|
||||||
help="Maximum number of words in WordCloud (Default 200)")
|
help="Maximum number of words in WordCloud (Default 200)")
|
||||||
|
parser.add_argument("-v", "--verbose", action="store_true",
|
||||||
|
help="Dump comments to comments.log")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
@ -50,11 +52,11 @@ if args.N is None:
|
||||||
|
|
||||||
|
|
||||||
def fetch_comments(comment) -> list:
|
def fetch_comments(comment) -> list:
|
||||||
|
comment_body = re.sub(r'[\[\(]?https?:\/\/[0-9A-Za-z\/\?#\[\]\)@\.!$\&%\-+,;=]+', '', comment.body)
|
||||||
if len(comment.replies) == 0:
|
if len(comment.replies) == 0:
|
||||||
c = re.sub(r'(\[text\]\()?https?://[0-9A-Za-z/\?#\[\]@\.!$\&%\-+,;=]+\)?', '', comment.body)
|
return [comment_body]
|
||||||
return [re.sub(r'https?://[0-9A-Za-z/\?#\[\]@\.!$\&%\-+,;=]+', '', c)]
|
|
||||||
|
|
||||||
raw_comments = [comment.body]
|
raw_comments = [comment_body]
|
||||||
|
|
||||||
for comm in comment.replies:
|
for comm in comment.replies:
|
||||||
raw_comments.extend(fetch_comments(comm))
|
raw_comments.extend(fetch_comments(comm))
|
||||||
|
@ -122,6 +124,10 @@ if args.color is True:
|
||||||
if args.o is not None:
|
if args.o is not None:
|
||||||
wordcloud.to_file(args.o)
|
wordcloud.to_file(args.o)
|
||||||
|
|
||||||
|
if args.verbose is True:
|
||||||
|
with open("comments.log", "w+", encoding="utf-8") as file:
|
||||||
|
file.writelines("%s\n" % comment for comment in comments)
|
||||||
|
|
||||||
plt.imshow(wordcloud, interpolation="bilinear")
|
plt.imshow(wordcloud, interpolation="bilinear")
|
||||||
plt.axis("off")
|
plt.axis("off")
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue