From 72f6bb2fd31e5dad58314090f02607693265657d Mon Sep 17 00:00:00 2001
From: Robert Altner <robert-altner@t-online.de>
Date: Fri, 10 Jan 2020 20:23:22 +0100
Subject: [PATCH] improved smart

---
 api/thesaurus.py | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/api/thesaurus.py b/api/thesaurus.py
index e240b05..9a60dfd 100644
--- a/api/thesaurus.py
+++ b/api/thesaurus.py
@@ -1,12 +1,14 @@
 import requests
 import re, random
+import nltk
 from util import logging, config
 
 key = config.settings["api_keys"]["thesaurus"]
 url = f"http://thesaurus.altervista.org/thesaurus/v1?key={key}&language=en_US&output=json&word="
 
 def thesaurufy_sentence(sentence):
-    symbols = re.findall(r"[\w']+|[.,!?;]", sentence)
+    symbols = nltk.word_tokenize(sentence)
+    tags = nltk.pos_tag(symbols)
 
     if len(symbols) == 0:
         return ""
@@ -15,6 +17,10 @@ def thesaurufy_sentence(sentence):
         if not symbols[i].isalpha():
             continue
 
+        if tags[i][1] != "NN" and tags[i][1] != "VB" and tags[i][1] != "VBG" and tags[i][1] != "VBP" and tags[i][1] != "JJ" and tags[i][1] != "RB":
+            symbols[i] = " " + symbols[i]
+            continue
+
         response = requests.get(url + symbols[i])
         if not response.ok:
             # logging.warning(f"Thesaurus API returned {response.status_code} ({url + symbols[i]})")
@@ -22,14 +28,30 @@ def thesaurufy_sentence(sentence):
             continue
         
         responses = response.json()["response"]
-        words = responses[random.randint(0, len(responses) - 1)]["list"]["synonyms"].split("|")
+        condition = ""
+        if tags[i][1] == "JJ":
+            condition = "(adj)"
+        elif tags[i][1] == "RB":
+            condition = "(adv)"
+        elif tags[i][1] == "NN":
+            condition = "(noun)"
+        else:
+            condition = "(verb)"
+
+        try:
+            words = random.choice([a for a in responses if a["list"]["category"] == condition])["list"]["synonyms"].split("|")
+        except:
+            symbols[i] = " " + symbols[i]
+            continue
+        
+        # print(words)
         word = words[random.randint(0, len(words) - 1)]
         
         if "(" in word:
             if "antonym" in word.split("(")[1].lower():
                 symbols[i] = " " + symbols[i]
                 continue
-            
+
             word = word.split("(")[0][:-1]
 
         symbols[i] = " " + word