From 72f6bb2fd31e5dad58314090f02607693265657d Mon Sep 17 00:00:00 2001 From: Robert Altner <robert-altner@t-online.de> Date: Fri, 10 Jan 2020 20:23:22 +0100 Subject: [PATCH] improved smart --- api/thesaurus.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/api/thesaurus.py b/api/thesaurus.py index e240b05..9a60dfd 100644 --- a/api/thesaurus.py +++ b/api/thesaurus.py @@ -1,12 +1,14 @@ import requests import re, random +import nltk from util import logging, config key = config.settings["api_keys"]["thesaurus"] url = f"http://thesaurus.altervista.org/thesaurus/v1?key={key}&language=en_US&output=json&word=" def thesaurufy_sentence(sentence): - symbols = re.findall(r"[\w']+|[.,!?;]", sentence) + symbols = nltk.word_tokenize(sentence) + tags = nltk.pos_tag(symbols) if len(symbols) == 0: return "" @@ -15,6 +17,10 @@ def thesaurufy_sentence(sentence): if not symbols[i].isalpha(): continue + if tags[i][1] != "NN" and tags[i][1] != "VB" and tags[i][1] != "VBG" and tags[i][1] != "VBP" and tags[i][1] != "JJ" and tags[i][1] != "RB": + symbols[i] = " " + symbols[i] + continue + response = requests.get(url + symbols[i]) if not response.ok: # logging.warning(f"Thesaurus API returned {response.status_code} ({url + symbols[i]})") @@ -22,14 +28,30 @@ def thesaurufy_sentence(sentence): continue responses = response.json()["response"] - words = responses[random.randint(0, len(responses) - 1)]["list"]["synonyms"].split("|") + condition = "" + if tags[i][1] == "JJ": + condition = "(adj)" + elif tags[i][1] == "RB": + condition = "(adv)" + elif tags[i][1] == "NN": + condition = "(noun)" + else: + condition = "(verb)" + + try: + words = random.choice([a for a in responses if a["list"]["category"] == condition])["list"]["synonyms"].split("|") + except: + symbols[i] = " " + symbols[i] + continue + + # print(words) word = words[random.randint(0, len(words) - 1)] if "(" in word: if "antonym" in word.split("(")[1].lower(): symbols[i] = " " + symbols[i] continue - + word = word.split("(")[0][:-1] symbols[i] = " " + word