Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 0 additions & 15 deletions .env.development

This file was deleted.

10 changes: 0 additions & 10 deletions .env.production

This file was deleted.

3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,5 @@ log_*
logs/
*.log

summaries.csv
summaries.csv
.env*
28 changes: 22 additions & 6 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [


{
"name": "Python: maple_models",
"type": "python",
Expand All @@ -18,7 +16,7 @@
"debug",
"--logname",
"maple_models_bert_debug",
"--debug-limits",
// "--debug-limits",
// "--run-once"
],
"console": "integratedTerminal",
Expand Down Expand Up @@ -54,7 +52,7 @@
"-i",
"600",
"-l",
"info"
"debug"
],
"console": "integratedTerminal",
"justMyCode": true
Expand Down Expand Up @@ -82,7 +80,7 @@
},
{
"name": "Python: data transfer from prod",
"type": "python",
"type": "debugpy",
"request": "launch",
"program": "scripts/transfer_data.py",
// "args": ["-s", "0.0.0.0:3000", "-d", "0.0.0.0:3000", "-n", "1"],
Expand All @@ -103,7 +101,7 @@
},
{
"name": "Python: Create summaries",
"type": "python",
"type": "debugpy",
"request": "launch",
"program": "scripts/create_summaries.py",
// "args": ["-s", "0.0.0.0:3000", "-d", "0.0.0.0:3000", "-n", "1"],
Expand All @@ -116,5 +114,23 @@
"console": "integratedTerminal",
"justMyCode": true
},
{
"name": "Python: delete model iteration",
"type": "debugpy",
"request": "launch",
"program": "runtime_scripts/delete_model_iteration.py",
"args": [
"-t",
"old",
"-a",
"-c",
"-l",
"debug",
"--use_config",
],
"console": "integratedTerminal",
"justMyCode": true
},

]
}
14 changes: 13 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,17 @@
"maple_proc",
"maple_structures",
"~/rcs-utils",
]
],
"python.testing.unittestArgs": [
"-v",
"-s",
"./maple_interface/tests",
"-s",
"tests",
"-p",
"test_*.py"
],
"python.testing.pytestEnabled": false,
"python.testing.unittestEnabled": true,
"auto-scroll.enabled": false
}
8 changes: 5 additions & 3 deletions install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ install_packages(){

pip install -r requirements.txt

pip install scrapy-fake-useragent

pip install --upgrade pip setuptools

pip install python-socketio python-socketio[client]
Expand Down Expand Up @@ -60,7 +62,7 @@ install_packages(){
pip install -e .

cd ../
[ -d RTPTResearch ] && echo "RTPTResearch directory already exist || git clone git@github.com:ResearchComputingServices/RTPTResearch.git
[ -d RTPTResearch ] && echo "RTPTResearch directory already exist" || git clone git@github.com:ResearchComputingServices/RTPTResearch.git
cd RTPTResearch
git pull
pip install -e .
Expand All @@ -71,8 +73,8 @@ install_packages(){

create_pm2_tasks(){
pm2 delete chatgpt 2> /dev/null && pm2 start runtime_scripts/chatgpt.py --interpreter .venv/bin/python3
pm2 delete data_fetcher 2> /dev/null && pm2 start maple_data_fetcher/data_fetcher.py --interpreter .venv/bin/python3 -- -e prod -i 600 -l info
pm2 delete delete_model_iteration 2> /dev/null && pm2 start runtime_scripts/delete_model_iteration.py --interpreter .venv/bin/python3 -- -t old -a -l debug
pm2 delete data_fetcher 2> /dev/null && pm2 start runtime_scripts/data_fetcher.py --interpreter .venv/bin/python3 -- -e prod -i 600 -l info
pm2 delete delete_model_iteration 2> /dev/null && pm2 start runtime_scripts/delete_model_iteration.py --interpreter .venv/bin/python3 -- -t old -a -c -l debug --use_config
pm2 delete maple_models_bert 2> /dev/null && pm2 start runtime_scripts/maple_models.py --interpreter .venv/bin/python3 --name maple_models_bert -- --model bert --level debug --logname maple_models_bert
pm2 save
pm2 kill
Expand Down
45 changes: 39 additions & 6 deletions maple_chat/maple_chatgpt/chatgpt_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import time
import random
import rcs
from maple_processing.process import chatgpt_summary, chatgpt_topic_name, chatgpt_bullet_summary
from maple_processing.process import LLMProcess, chatgpt_summary, chatgpt_topic_name, chatgpt_bullet_summary
from maple_structures import Article, Topic
from maple_interface import MapleAPI
from .utils import JobType
Expand Down Expand Up @@ -78,7 +78,8 @@ def __init__(
socket_io_port: int,
socket_io_api_key: str,
chatgpt_api_key: str = None,
article_fetching: bool = False) -> None:
article_fetching: bool = False,
use_config: bool = True) -> None:

super().__init__(ping_timeout=600)
self.logger=logging.getLogger('ChatgptServer')
Expand All @@ -87,6 +88,8 @@ def __init__(
self.maple_keys_in_use = []
self.maple_clients = []
self.maple_jobs = []
self._maple_config = None
self._use_config = use_config

self.maple_api = maple_api
self._socket_io_port = socket_io_port
Expand All @@ -99,7 +102,26 @@ def __init__(
self.attach(self._app)
self.register_namespace(ChatgptServerNamespace('/'))
self.loop = asyncio.get_event_loop()


async def update_config(self):
FETCH_CONFIG_INTERVAL = 60
while True:
if self._use_config:
max_attempts = 5
attempts = 0
while self._maple_config is None and attempts < max_attempts:
self.logger.debug("Fetching maple_config")
maple_config = self.maple_api.config_get()
if maple_config is not None:
if self._maple_config != maple_config:
self.logger.info('maple_config has changed')
self._maple_config = maple_config
self.logger.info('Updated maple_config')
break
attempts +=1
await asyncio.sleep(1)
await asyncio.sleep(FETCH_CONFIG_INTERVAL)


def maple_add_job(self, sid: str, api_key: str, job_type: JobType, job_details: any):
with self.maple_lock:
Expand Down Expand Up @@ -207,7 +229,9 @@ async def _process_job_summary(self, job, force_update: bool = False):
return
for _ in range(3):
try:
summary = await chatgpt_summary(article.content, job['api_key'])
llm_process = LLMProcess(config = self._maple_config)
summary = await llm_process.get_summary(article.content, job['api_key'])
# summary = await chatgpt_summary(article.content, job['api_key'])
# summary = chatgpt_summary(article.content, job['api_key'])
break
except Exception as exc:
Expand All @@ -227,7 +251,10 @@ async def _process_job_topic_name(self, job):
job_send = job.copy()
while True:
try:
topic_name = await chatgpt_topic_name( job_send['job_details']['keyword'], job_send['api_key'])
llm_process = LLMProcess(config = self._maple_config)
topic_name = await llm_process.get_topic_name(job['job_details']['keyword'], job['api_key'])

# topic_name = await chatgpt_topic_name( job_send['job_details']['keyword'], job_send['api_key'])
job_send['results'] = topic_name
break
except Exception as exc:
Expand All @@ -253,7 +280,11 @@ async def _process_job_bullet_summary(self, job):

while True:
try:
bullet_summary = await chatgpt_bullet_summary(job['job_details']['content'], job['api_key'])
articles = job['job_details']['content']
llm_process = LLMProcess(config = self._maple_config)
bullet_summary = await llm_process.get_bullet_summary(articles, job['api_key'])

# bullet_summary = await chatgpt_bullet_summary(articles, job['api_key'])
job_send['results'] = bullet_summary
break
except Exception as exc:
Expand Down Expand Up @@ -346,11 +377,13 @@ async def _process(self):
def run(self):
"""run server and tasks
"""
self.start_background_task(self.update_config)
self.start_background_task(self._process)
# self.loop.create_task(self._process())
if self._article_fetching:
self.start_background_task(self._fetch_pending_summaries)
# self.loop.create_task(self._fetch_pending_summaries())

web.run_app(
self._app,
host = self._socket_io_ip,
Expand Down
36 changes: 33 additions & 3 deletions maple_data_fetcher/data_fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
import os
import sys
import time
from maple_interface.maple import MapleAPI
import rcs
from maple_config import config as cfg

sys.path.append(os.path.join(os.path.abspath(""), "newsscrapy"))

print(sys.path)
Expand Down Expand Up @@ -65,11 +67,14 @@ class DataFetcher:

def __init__(
self,
backend_ip: str,
backend_port: str,
spiders: list = [scrapyCBC.CBCHeadlinesSpider, scrapyCTVNews.CTVNewsSpider],
spider_output_file: bool = False,
spider_log_level: str = "warning",
spider_interval_sec: int = 120,
environment= '.env.development'
environment= '.env.development',

) -> None:
self.spider_output_file = spider_output_file

Expand All @@ -82,7 +87,28 @@ def __init__(
self._spider_interval_sec = spider_interval_sec

self._spiders = spiders


self._maple_api = MapleAPI(
f"http://{backend_ip}:{backend_port}"
)

def _update_spider_interval_sec_from_config(self):
if self._environment == '.env.development':
logger.info("Development environment. Not updating spider interval from config.")
return
config = self._maple_api.config_get()
if isinstance(config, dict):
if 'spider_interval_seconds' in config:
if self._spider_interval_sec != config['spider_interval_seconds']:
logger.info("Updating spider interval from %s to %s", self._spider_interval_sec, config['spider_interval_seconds'])
self._spider_interval_sec = config['spider_interval_seconds']
elif config == {}:
logger.warning("Failed to retrieve config from backend. Using last updated spider interval.")
else:
logger.error("spider_interval_seconds not found in config.")
else:
logger.warning("Failed to get config from backend. Using last updated spider interval.")

def _get_project_settings(self):
self._scrapy_settings = get_project_settings()

Expand Down Expand Up @@ -129,6 +155,8 @@ def _catch_error(self, failure):
def _crawl(self, spider):
"""crawl a spider and set callback to schedule next crawl"""
logger.info("Crawling spider: %s", spider)
# Fetch config from backend to update interval in case it changed.
self._update_spider_interval_sec_from_config()
job = self._crawl_job(spider)
job.addCallback(self._schedule_next_crawl, self._spider_interval_sec, spider)
# job.errback(self._catch_error)
Expand Down Expand Up @@ -173,6 +201,8 @@ def main(args):
logger.debug("config: %s", config)

data_fetcher = DataFetcher(
backend_ip=config['MAPLE_BACKEND_IP'],
backend_port=config['MAPLE_BACKEND_PORT'],
spider_output_file=args.o,
spider_interval_sec=args.i,
spider_log_level=args.l,
Expand All @@ -183,4 +213,4 @@ def main(args):

if __name__ == "__main__":
args = parser.parse_args()
main(args)
main(args)
Loading