update: look at todo

main
Thamognya Kodi 2023-01-11 22:59:46 +07:00
parent 21b9b6aaea
commit 66eb7eb9e0
6 changed files with 714 additions and 27 deletions

View File

@ -1,3 +1,4 @@
# type: ignore
from typing import Any, List, Tuple
import logging
@ -7,7 +8,7 @@ from pathlib import Path
import dotenv
import openai
from transformers import list_models, pipeline
from transformers import pipeline
logging.basicConfig(
filename="QA.log",
@ -17,16 +18,18 @@ logging.basicConfig(
)
sys.path.append(str(Path(__file__).parent.parent.parent) + "/tools/NLP/data")
sys.path.append(str(Path(__file__).parent.parent.parent) + "/tools/NLP")
sys.path.append(str(Path(__file__).parent.parent.parent) + "/utils")
import config
import internet
from ChatGPT import Chatbot
dotenv.load_dotenv()
def answer(
query: str,
model: str = "openai-ChatGPT",
model: str = "openai-chatgpt",
GOOGLE_SEARCH_API_KEY: str = "",
GOOGLE_SEARCH_ENGINE_ID: str = "",
OPENAI_API_KEY: str = "",
@ -50,34 +53,54 @@ def answer(
include prefix hf-*
#
"""
if not (model.startswith("openai-") == 0 or model.startswith("hf-") == 0):
model = "openai-ChatGPT" # Default
if not (model.startswith("openai-") or model.startswith("hf-")):
model = "openai-chatgpt" # Default
answer: str = ""
if model.startswith("openai-") == 0:
# results: tuple[list[str], list[str]] = internet.Google(
# query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID
# ).google(filter_irrelevant=True)
print("hi")
if model.startswith("openai-"):
if model == "openai-chatgpt":
# ChatGPT
results: tuple[list[str], list[str]] = internet.Google(
query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID
).google(filter_irrelevant=False)
print(results)
chatbot = Chatbot(
{"session_token": CHATGPT_SESSION_TOKEN},
conversation_id=None,
parent_id=None,
)
response = chatbot.ask(
f"Utilize the following context: {results[0]} ontop of existing knowledge and answer the question: {query}",
conversation_id=None,
parent_id=None,
)
return (response.message, results[1])
else:
if model == "openai-text-davinci-003":
results: tuple[list[str], list[str]] = internet.Google(
query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID
).google(filter_irrelevant=True)
context = " ".join(results[0])
context[: (4097 - len(query) - 10)]
response = openai.Completion.create(
model="text-davinci-003",
prompt=f"{context} Q: {query}",
max_tokens=len(context),
n=1,
stop=None,
temperature=0.5,
)
return (response.choices[0].text, results[1])
# TODO: add suport later
else:
models = [
model
for model in list_models()
if "qa" in model or "question-answering" in model
]
model = model.replace("hf-", "", 1)
if not model in models:
model = "hf-"
# results: tuple[list[str], list[str]] = internet.Google(
# query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID
# ).google(filter_irrelevant=False)
answer_result: tuple[Any, list[str]] = (answer, ["hi"]) # results[1])
if config.CONF_DEBUG:
logging.info(f"Answer: {answer_result}")
return answer_result
results: tuple[list[str], list[str]] = internet.Google(
query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID
).google(filter_irrelevant=False)
qa_model = pipeline("question-answering", model=model)
response = qa_model(question=query, context=" ".join(results[0]))
return (response["answer"], results[1])
# print(os.environ)
print(answer("What is the newest Pokemon Game?"))
# print(answer(query="What is the newest Pokemon Game?", model="hf-deepset/deberta-v3-base-squad2"))
# def custom_answer

View File

@ -0,0 +1,661 @@
# type: ignore
# For some reason package not working so just deal with it and dont ask questions
import json
import logging
import re
import uuid
from time import sleep
import tls_client
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from twocaptcha import TwoCaptcha
# Disable all logging
logging.basicConfig(level=logging.ERROR)
BASE_URL = "https://chat.openai.com/"
class Chrome(uc.Chrome):
def __del__(self):
self.quit()
class Chatbot:
def __init__(
self, config, conversation_id=None, parent_id=None, no_refresh=False
) -> None:
self.config = config
self.session = tls_client.Session(client_identifier="chrome_108")
if "proxy" in config:
if type(config["proxy"]) != str:
raise Exception("Proxy must be a string!")
proxies = {
"http": config["proxy"],
"https": config["proxy"],
}
self.session.proxies.update(proxies)
if "verbose" in config:
if type(config["verbose"]) != bool:
raise Exception("Verbose must be a boolean!")
self.verbose = config["verbose"]
else:
self.verbose = False
self.conversation_id = conversation_id
self.parent_id = parent_id
self.conversation_mapping = {}
self.conversation_id_prev_queue = []
self.parent_id_prev_queue = []
self.isMicrosoftLogin = False
self.twocaptcha_key = None
# stdout colors
self.GREEN = "\033[92m"
self.WARNING = "\033[93m"
self.ENDCOLOR = "\033[0m"
if "email" in config and "password" in config:
if type(config["email"]) != str:
raise Exception("Email must be a string!")
if type(config["password"]) != str:
raise Exception("Password must be a string!")
self.email = config["email"]
self.password = config["password"]
if "isMicrosoftLogin" in config and config["isMicrosoftLogin"] == True:
self.isMicrosoftLogin = True
self.microsoft_login()
elif "captcha" in config:
if type(config["captcha"]) != str:
raise Exception("2Captcha API Key must be a string!")
self.twocaptcha_key = config["captcha"]
self.email_login(self.solve_captcha())
else:
raise Exception("Invalid config!")
elif "session_token" in config:
if no_refresh:
self.get_cf_cookies()
return
if type(config["session_token"]) != str:
raise Exception("Session token must be a string!")
self.session_token = config["session_token"]
self.session.cookies.set(
"__Secure-next-auth.session-token", config["session_token"]
)
self.get_cf_cookies()
else:
raise Exception("Invalid config!")
self.retry_refresh()
def retry_refresh(self):
retries = 5
refresh = True
while refresh:
try:
self.refresh_session()
refresh = False
except Exception:
if retries == 0:
raise Exception("Failed to refresh session!")
retries -= 1
def ask(
self,
prompt,
conversation_id=None,
parent_id=None,
gen_title=False,
session_token=None,
):
if session_token:
self.session.cookies.set("__Secure-next-auth.session-token", session_token)
self.session_token = session_token
self.config["session_token"] = session_token
self.retry_refresh()
self.map_conversations()
if conversation_id == None:
conversation_id = self.conversation_id
if parent_id == None:
parent_id = (
self.parent_id
if conversation_id == self.conversation_id
else self.conversation_mapping[conversation_id]
)
data = {
"action": "next",
"messages": [
{
"id": str(uuid.uuid4()),
"role": "user",
"content": {"content_type": "text", "parts": [prompt]},
},
],
"conversation_id": conversation_id,
"parent_message_id": parent_id or str(uuid.uuid4()),
"model": "text-davinci-002-render",
}
new_conv = data["conversation_id"] is None
self.conversation_id_prev_queue.append(data["conversation_id"]) # for rollback
self.parent_id_prev_queue.append(data["parent_message_id"])
response = self.session.post(
url=BASE_URL + "backend-api/conversation",
data=json.dumps(data),
timeout_seconds=180,
)
if response.status_code != 200:
print(response.text)
self.refresh_session()
raise Exception("Wrong response code! Refreshing session...")
else:
try:
response = response.text.splitlines()[-4]
response = response[6:]
except Exception as exc:
print("Incorrect response from OpenAI API")
raise Exception("Incorrect response from OpenAI API") from exc
# Check if it is JSON
if response.startswith("{"):
response = json.loads(response)
self.parent_id = response["message"]["id"]
self.conversation_id = response["conversation_id"]
message = response["message"]["content"]["parts"][0]
res = {
"message": message,
"conversation_id": self.conversation_id,
"parent_id": self.parent_id,
}
if gen_title and new_conv:
try:
title = self.gen_title(self.conversation_id, self.parent_id)[
"title"
]
except Exception as exc:
split = prompt.split(" ")
title = " ".join(split[:3]) + ("..." if len(split) > 3 else "")
res["title"] = title
return res
else:
return None
def check_response(self, response):
if response.status_code != 200:
print(response.text)
raise Exception("Response code error: ", response.status_code)
def get_conversations(self, offset=0, limit=20):
url = BASE_URL + f"backend-api/conversations?offset={offset}&limit={limit}"
response = self.session.get(url)
self.check_response(response)
data = json.loads(response.text)
return data["items"]
def get_msg_history(self, id):
url = BASE_URL + f"backend-api/conversation/{id}"
response = self.session.get(url)
self.check_response(response)
data = json.loads(response.text)
return data
def gen_title(self, id, message_id):
url = BASE_URL + f"backend-api/conversation/gen_title/{id}"
response = self.session.post(
url,
data=json.dumps(
{"message_id": message_id, "model": "text-davinci-002-render"}
),
)
self.check_response(response)
data = json.loads(response.text)
return data
def change_title(self, id, title):
url = BASE_URL + f"backend-api/conversation/{id}"
response = self.session.patch(url, data=f'{{"title": {title}}}')
self.check_response(response)
def delete_conversation(self, id):
url = BASE_URL + f"backend-api/conversation/{id}"
response = self.session.patch(url, data='{"is_visible": false}')
self.check_response(response)
def map_conversations(self):
conversations = self.get_conversations()
histories = [self.get_msg_history(x["id"]) for x in conversations]
for x, y in zip(conversations, histories):
self.conversation_mapping[x["id"]] = y["current_node"]
def refresh_session(self, session_token=None):
if session_token:
self.session.cookies.set("__Secure-next-auth.session-token", session_token)
self.session_token = session_token
self.config["session_token"] = session_token
url = BASE_URL + "api/auth/session"
response = self.session.get(url, timeout_seconds=180)
if response.status_code == 403:
self.get_cf_cookies()
raise Exception("Clearance refreshing...")
try:
if "error" in response.json():
raise Exception(
f"Failed to refresh session! Error: {response.json()['error']}"
)
elif (
response.status_code != 200
or response.json() == {}
or "accessToken" not in response.json()
):
raise Exception("Failed to refresh session!")
else:
self.session.headers.update(
{"Authorization": "Bearer " + response.json()["accessToken"]}
)
self.session_token = self.session.cookies._find(
"__Secure-next-auth.session-token",
)
except Exception as exc:
print("Failed to refresh session!")
if self.isMicrosoftLogin:
print("Attempting to re-authenticate...")
self.microsoft_login()
elif self.twocaptcha_key:
self.email_login(self.solve_captcha())
else:
raise Exception("Failed to refresh session!") from exc
def reset_chat(self) -> None:
"""
Reset the conversation ID and parent ID.
:return: None
"""
self.conversation_id = None
self.parent_id = str(uuid.uuid4())
def microsoft_login(self) -> None:
"""
Login to OpenAI via Microsoft Login Authentication.
:return: None
"""
try:
# Open the browser
self.cf_cookie_found = False
self.session_cookie_found = False
self.agent_found = False
self.cf_clearance = None
self.user_agent = None
options = self.__get_ChromeOptions()
print("Spawning browser...")
driver = uc.Chrome(
enable_cdp_events=True,
options=options,
driver_executable_path=self.config.get("driver_exec_path"),
browser_executable_path=self.config.get("browser_exec_path"),
)
print("Browser spawned.")
driver.add_cdp_listener(
"Network.responseReceivedExtraInfo",
lambda msg: self.detect_cookies(msg),
)
driver.add_cdp_listener(
"Network.requestWillBeSentExtraInfo",
lambda msg: self.detect_user_agent(msg),
)
driver.get(BASE_URL)
while not self.agent_found or not self.cf_cookie_found:
sleep(5)
self.refresh_headers(
cf_clearance=self.cf_clearance, user_agent=self.user_agent
)
# Wait for the login button to appear
WebDriverWait(driver, 120).until(
EC.element_to_be_clickable(
(By.XPATH, "//button[contains(text(), 'Log in')]")
)
)
# Click the login button
driver.find_element(
by=By.XPATH, value="//button[contains(text(), 'Log in')]"
).click()
# Wait for the Login with Microsoft button to be clickable
WebDriverWait(driver, 60).until(
EC.element_to_be_clickable(
(By.XPATH, "//button[@data-provider='windowslive']")
)
)
# Click the Login with Microsoft button
driver.find_element(
by=By.XPATH, value="//button[@data-provider='windowslive']"
).click()
# Wait for the email input field to appear
WebDriverWait(driver, 60).until(
EC.visibility_of_element_located((By.XPATH, "//input[@type='email']"))
)
# Enter the email
driver.find_element(by=By.XPATH, value="//input[@type='email']").send_keys(
self.config["email"]
)
# Wait for the Next button to be clickable
WebDriverWait(driver, 60).until(
EC.element_to_be_clickable((By.XPATH, "//input[@type='submit']"))
)
# Click the Next button
driver.find_element(by=By.XPATH, value="//input[@type='submit']").click()
# Wait for the password input field to appear
WebDriverWait(driver, 60).until(
EC.visibility_of_element_located(
(By.XPATH, "//input[@type='password']")
)
)
# Enter the password
driver.find_element(
by=By.XPATH, value="//input[@type='password']"
).send_keys(self.config["password"])
# Wait for the Sign in button to be clickable
WebDriverWait(driver, 60).until(
EC.element_to_be_clickable((By.XPATH, "//input[@type='submit']"))
)
# Click the Sign in button
driver.find_element(by=By.XPATH, value="//input[@type='submit']").click()
# Wait for the Allow button to appear
WebDriverWait(driver, 60).until(
EC.element_to_be_clickable((By.XPATH, "//input[@type='submit']"))
)
# click Yes button
driver.find_element(by=By.XPATH, value="//input[@type='submit']").click()
# wait for input box to appear (to make sure we're signed in)
WebDriverWait(driver, 60).until(
EC.visibility_of_element_located((By.XPATH, "//textarea"))
)
while not self.session_cookie_found:
sleep(5)
print(self.GREEN + "Login successful." + self.ENDCOLOR)
finally:
# Close the browser
driver.quit()
del driver
def solve_captcha(self) -> str:
"""
Solve the 2Captcha captcha.
:return: str
"""
twocaptcha_key = self.twocaptcha_key
twocaptcha_solver_config = {
"apiKey": twocaptcha_key,
"defaultTimeout": 120,
"recaptchaTimeout": 600,
"pollingInterval": 10,
}
twocaptcha_solver = TwoCaptcha(**twocaptcha_solver_config)
print("Waiting for captcha to be solved...")
solved_captcha = twocaptcha_solver.recaptcha(
sitekey="6Lc-wnQjAAAAADa5SPd68d0O3xmj0030uaVzpnXP",
url="https://auth0.openai.com/u/login/identifier",
)
if "code" in solved_captcha:
print(self.GREEN + "Captcha solved successfully!" + self.ENDCOLOR)
if self.verbose:
print(
self.GREEN
+ "Captcha token: "
+ self.ENDCOLOR
+ solved_captcha["code"]
)
return solved_captcha
def email_login(self, solved_captcha) -> None:
"""
Login to OpenAI via Email/Password Authentication and 2Captcha.
:return: None
"""
# Open the browser
try:
self.cf_cookie_found = False
self.session_cookie_found = False
self.agent_found = False
self.cf_clearance = None
self.user_agent = None
options = self.__get_ChromeOptions()
print("Spawning browser...")
driver = uc.Chrome(
enable_cdp_events=True,
options=options,
driver_executable_path=self.config.get("driver_exec_path"),
browser_executable_path=self.config.get("browser_exec_path"),
)
print("Browser spawned.")
driver.add_cdp_listener(
"Network.responseReceivedExtraInfo",
lambda msg: self.detect_cookies(msg),
)
driver.add_cdp_listener(
"Network.requestWillBeSentExtraInfo",
lambda msg: self.detect_user_agent(msg),
)
driver.get(BASE_URL)
while not self.agent_found or not self.cf_cookie_found:
sleep(5)
self.refresh_headers(
cf_clearance=self.cf_clearance, user_agent=self.user_agent
)
# Wait for the login button to appear
WebDriverWait(driver, 120).until(
EC.element_to_be_clickable(
(By.XPATH, "//button[contains(text(), 'Log in')]")
)
)
# Click the login button
driver.find_element(
by=By.XPATH, value="//button[contains(text(), 'Log in')]"
).click()
# Wait for the email input field to appear
WebDriverWait(driver, 60).until(
EC.visibility_of_element_located((By.ID, "username"))
)
# Enter the email
driver.find_element(by=By.ID, value="username").send_keys(
self.config["email"]
)
# Wait for Recaptcha to appear
WebDriverWait(driver, 60).until(
EC.presence_of_element_located(
(By.CSS_SELECTOR, "*[name*='g-recaptcha-response']")
)
)
# Find Recaptcha
google_captcha_response_input = driver.find_element(
By.CSS_SELECTOR, "*[name*='g-recaptcha-response']"
)
captcha_input = driver.find_element(By.NAME, "captcha")
# Make input visible
driver.execute_script(
"arguments[0].setAttribute('style','type: text; visibility:visible;');",
google_captcha_response_input,
)
driver.execute_script(
"arguments[0].setAttribute('style','type: text; visibility:visible;');",
captcha_input,
)
driver.execute_script(
"""
document.getElementById("g-recaptcha-response").innerHTML = arguments[0]
""",
solved_captcha.get("code"),
)
driver.execute_script(
"""
document.querySelector("input[name='captcha']").value = arguments[0]
""",
solved_captcha.get("code"),
)
# Hide the captcha input
driver.execute_script(
"arguments[0].setAttribute('style', 'display:none;');",
google_captcha_response_input,
)
# Wait for the Continue button to be clickable
WebDriverWait(driver, 60).until(
EC.element_to_be_clickable((By.XPATH, "//button[@type='submit']"))
)
# Click the Continue button
driver.find_element(by=By.XPATH, value="//button[@type='submit']").click()
# Wait for the password input field to appear
WebDriverWait(driver, 60).until(
EC.visibility_of_element_located((By.ID, "password"))
)
# Enter the password
driver.find_element(by=By.ID, value="password").send_keys(
self.config["password"]
)
# Wait for the Sign in button to be clickable
WebDriverWait(driver, 60).until(
EC.element_to_be_clickable((By.XPATH, "//button[@type='submit']"))
)
# Click the Sign in button
driver.find_element(by=By.XPATH, value="//button[@type='submit']").click()
# wait for input box to appear (to make sure we're signed in)
WebDriverWait(driver, 60).until(
EC.visibility_of_element_located((By.XPATH, "//textarea"))
)
while not self.session_cookie_found:
sleep(5)
print(self.GREEN + "Login successful." + self.ENDCOLOR)
finally:
# Close the browser
driver.quit()
del driver
def __get_ChromeOptions(self):
options = uc.ChromeOptions()
options.add_argument("--start_maximized")
options.add_argument("--disable-extensions")
options.add_argument("--disable-application-cache")
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("--disable-setuid-sandbox")
options.add_argument("--disable-dev-shm-usage")
if self.config.get("proxy", "") != "":
options.add_argument("--proxy-server=" + self.config["proxy"])
return options
def get_cf_cookies(self) -> None:
"""
Get cloudflare cookies.
:return: None
"""
try:
self.cf_cookie_found = False
self.agent_found = False
self.cf_clearance = None
self.user_agent = None
options = self.__get_ChromeOptions()
print("Spawning browser...")
driver = uc.Chrome(
enable_cdp_events=True,
options=options,
driver_executable_path=self.config.get("driver_exec_path"),
browser_executable_path=self.config.get("browser_exec_path"),
)
print("Browser spawned.")
driver.add_cdp_listener(
"Network.responseReceivedExtraInfo",
lambda msg: self.detect_cookies(msg),
)
driver.add_cdp_listener(
"Network.requestWillBeSentExtraInfo",
lambda msg: self.detect_user_agent(msg),
)
driver.get("https://chat.openai.com/chat")
while not self.agent_found or not self.cf_cookie_found:
sleep(5)
finally:
# Close the browser
driver.quit()
del driver
self.refresh_headers(
cf_clearance=self.cf_clearance, user_agent=self.user_agent
)
def detect_cookies(self, message):
if "params" in message:
if "headers" in message["params"]:
if "set-cookie" in message["params"]["headers"]:
# Use regex to get the cookie for cf_clearance=*;
cf_clearance_cookie = re.search(
"cf_clearance=.*?;", message["params"]["headers"]["set-cookie"]
)
session_cookie = re.search(
"__Secure-next-auth.session-token=.*?;",
message["params"]["headers"]["set-cookie"],
)
if cf_clearance_cookie and not self.cf_cookie_found:
print("Found Cloudflare Cookie!")
# remove the semicolon and 'cf_clearance=' from the string
raw_cf_cookie = cf_clearance_cookie.group(0)
self.cf_clearance = raw_cf_cookie.split("=")[1][:-1]
if self.verbose:
print(
self.GREEN
+ "Cloudflare Cookie: "
+ self.ENDCOLOR
+ self.cf_clearance
)
self.cf_cookie_found = True
if session_cookie and not self.session_cookie_found:
print("Found Session Token!")
# remove the semicolon and '__Secure-next-auth.session-token=' from the string
raw_session_cookie = session_cookie.group(0)
self.session_token = raw_session_cookie.split("=")[1][:-1]
self.session.cookies.set(
"__Secure-next-auth.session-token", self.session_token
)
if self.verbose:
print(
self.GREEN
+ "Session Token: "
+ self.ENDCOLOR
+ self.session_token
)
self.session_cookie_found = True
def detect_user_agent(self, message):
if "params" in message:
if "headers" in message["params"]:
if "user-agent" in message["params"]["headers"]:
# Use regex to get the cookie for cf_clearance=*;
user_agent = message["params"]["headers"]["user-agent"]
self.user_agent = user_agent
self.agent_found = True
self.refresh_headers(cf_clearance=self.cf_clearance, user_agent=self.user_agent)
def refresh_headers(self, cf_clearance, user_agent):
del self.session.cookies["cf_clearance"]
self.session.headers.clear()
self.session.cookies.set("cf_clearance", cf_clearance)
self.session.headers.update(
{
"Accept": "text/event-stream",
"Authorization": "Bearer ",
"Content-Type": "application/json",
"User-Agent": user_agent,
"X-Openai-Assistant-App-Id": "",
"Connection": "close",
"Accept-Language": "en-US,en;q=0.9",
"Referer": "https://chat.openai.com/chat",
}
)
def rollback_conversation(self, num=1) -> None:
"""
Rollback the conversation.
:param num: The number of messages to rollback
:return: None
"""
for i in range(num):
self.conversation_id = self.conversation_id_prev_queue.pop()
self.parent_id = self.parent_id_prev_queue.pop()

2
poetry.lock generated
View File

@ -4140,4 +4140,4 @@ testing = ["flake8 (<5)", "func-timeout", "jaraco.functools", "jaraco.itertools"
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "d5757348a31dce3db191455bfba230c11859b968d8bfe76c44ec825e3c1d7738"
content-hash = "8bf46a1e925db8ce484fdc929f72dd07d3f9c49359c26c1efe646d96f8ff9482"

View File

@ -54,6 +54,7 @@ nltk = "^3.8"
beautifulsoup4 = "^4.11.1"
diffusers = "^0.11.1"
openai = "^0.26.0"
huggingface-hub = "^0.11.1"
revchatgpt = "^0.1.1"
[tool.poetry.group.dev.dependencies]

View File

@ -1,3 +1,4 @@
# type: ignore
from typing import Any, List
from django.contrib import admin

View File

@ -1,3 +1,4 @@
# type: ignore
import os
import dotenv