From 31ecf9617eeab825c0d6ad1e04cb50df2a29c52e Mon Sep 17 00:00:00 2001 From: wustmz <865011721@qq.com> Date: Mon, 15 Aug 2022 10:21:19 +0800 Subject: [PATCH 1/4] update ignore --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index d4593149..b427d7bb 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,7 @@ weibo.db *.log .idea + +run.sh + +user_id_list.txt From 2c45df470e418ff36ca0fa5d12d91b223798466b Mon Sep 17 00:00:00 2001 From: wustmz <865011721@qq.com> Date: Mon, 15 Aug 2022 17:02:41 +0800 Subject: [PATCH 2/4] update weibo --- run.sh | 1 + user_id_list.txt | 1 + weibo_spider/user_id_list.txt | 3 -- weibo_spider/weibo.py | 24 ++++++------ weibo_spider/writer/txt_writer.py | 62 ++++++++++++++++++++++++++----- 5 files changed, 66 insertions(+), 25 deletions(-) create mode 100644 run.sh create mode 100644 user_id_list.txt delete mode 100644 weibo_spider/user_id_list.txt diff --git a/run.sh b/run.sh new file mode 100644 index 00000000..fd6c7074 --- /dev/null +++ b/run.sh @@ -0,0 +1 @@ +python3 -m weibo_spider diff --git a/user_id_list.txt b/user_id_list.txt new file mode 100644 index 00000000..c3a60092 --- /dev/null +++ b/user_id_list.txt @@ -0,0 +1 @@ +2014433131 唐诗主任司马迁 2022-08-15 16:56 \ No newline at end of file diff --git a/weibo_spider/user_id_list.txt b/weibo_spider/user_id_list.txt deleted file mode 100644 index ead74227..00000000 --- a/weibo_spider/user_id_list.txt +++ /dev/null @@ -1,3 +0,0 @@ -1669879400 Dear-迪丽热巴 2020-01-13 19:18 -1223178222 胡歌 2020-01-13 19:28 -1729370543 郭碧婷 2020-01-13 19:33 \ No newline at end of file diff --git a/weibo_spider/weibo.py b/weibo_spider/weibo.py index 54cec7ff..89d24150 100644 --- a/weibo_spider/weibo.py +++ b/weibo_spider/weibo.py @@ -4,29 +4,29 @@ def __init__(self): self.user_id = '' self.content = '' - self.article_url = '' + # self.article_url = '' self.original_pictures = [] self.retweet_pictures = None self.original = None self.video_url = '' - self.publish_place = '' + # self.publish_place = '' self.publish_time = '' - self.publish_tool = '' + # self.publish_tool = '' - self.up_num = 0 - self.retweet_num = 0 - self.comment_num = 0 + # self.up_num = 0 + # self.retweet_num = 0 + # self.comment_num = 0 def __str__(self): """打印一条微博""" result = self.content + '\n' - result += u'微博发布位置:%s\n' % self.publish_place + # result += u'微博发布位置:%s\n' % self.publish_place result += u'发布时间:%s\n' % self.publish_time - result += u'发布工具:%s\n' % self.publish_tool - result += u'点赞数:%d\n' % self.up_num - result += u'转发数:%d\n' % self.retweet_num - result += u'评论数:%d\n' % self.comment_num - result += u'url:https://weibo.cn/comment/%s\n' % self.id + # result += u'发布工具:%s\n' % self.publish_tool + # result += u'点赞数:%d\n' % self.up_num + # result += u'转发数:%d\n' % self.retweet_num + # result += u'评论数:%d\n' % self.comment_num + # result += u'url:https://weibo.cn/comment/%s \n' % self.id return result diff --git a/weibo_spider/writer/txt_writer.py b/weibo_spider/writer/txt_writer.py index 6eddd862..76bcfb14 100644 --- a/weibo_spider/writer/txt_writer.py +++ b/weibo_spider/writer/txt_writer.py @@ -1,5 +1,11 @@ import logging +import smtplib import sys +import time +# 需要 MIMEMultipart 类 +from email.mime.multipart import MIMEMultipart +# 发送字符串的邮件 +from email.mime.text import MIMEText from .writer import Writer @@ -19,9 +25,10 @@ def __init__(self, file_path, filter): self.weibo_header = u'原创微博内容' else: self.weibo_header = u'微博内容' - self.weibo_desc = [('publish_place', '微博位置'), ('publish_time', '发布时间'), - ('up_num', '点赞数'), ('retweet_num', '转发数'), - ('comment_num', '评论数'), ('publish_tool', '发布工具')] + # self.weibo_desc = [('publish_place', '微博位置'), ('publish_time', '发布时间'), + # ('up_num', '点赞数'), ('retweet_num', '转发数'), + # ('comment_num', '评论数'), ('publish_tool', '发布工具')] + self.weibo_desc = [('publish_time', '发布时间')] def write_user(self, user): self.user = user @@ -37,10 +44,10 @@ def write_user(self, user): def write_weibo(self, weibo): """将爬取的信息写入txt文件""" - weibo_header = '' - if self.weibo_header: - weibo_header = self.weibo_header + ':\n' - self.weibo_header = '' + # weibo_header = '' + # if self.weibo_header: + # weibo_header = self.weibo_header + ':\n' + # self.weibo_header = '' try: temp_result = [] @@ -49,9 +56,44 @@ def write_weibo(self, weibo): [v + ':' + str(w.__dict__[k]) for k, v in self.weibo_desc])) result = '\n\n'.join(temp_result) + '\n\n' + # 同步到印象笔记 + self.sendEmail(str(result)) - with open(self.file_path, 'ab') as f: - f.write((weibo_header + result).encode(sys.stdout.encoding)) - logger.info(u'%d条微博写入txt文件完毕,保存路径:%s', len(weibo), self.file_path) + # with open(self.file_path, 'ab') as f: + # f.write((weibo_header + result).encode(sys.stdout.encoding)) + # logger.info(u'%d条微博写入txt文件完毕,保存路径:%s', len(weibo), self.file_path) except Exception as e: logger.exception(e) + + def sendEmail(self, result): + # 设置服务器所需信息 + fromEmailAddr = '865011721@qq.com' # 邮件发送方邮箱地址 + password = 'wnumbsdltnkebehf' # 密码(部分邮箱为授权码) + toEmailAddrs = ['865011721.42f1aa8@m.yinxiang.com'] # 邮件接受方邮箱地址,注意需要[]包裹,这意味着你可以写多个邮件地址群发 + + currentDate = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) + + # 设置email信息 + # ---------------------------发送带附件邮件----------------------------- + # 邮件内容设置 + message = MIMEMultipart() + # 邮件主题 + message['Subject'] = currentDate + # 发送方信息 + message['From'] = fromEmailAddr + # 接受方信息 + message['To'] = toEmailAddrs[0] + + # 邮件正文内容 + message.attach(MIMEText(result, 'plain', 'utf-8')) + # --------------------------------------------------------------------- + + # 登录并发送邮件 + try: + server = smtplib.SMTP('smtp.qq.com') # 邮箱服务器地址,端口默认为25 + server.login(fromEmailAddr, password) + server.sendmail(fromEmailAddr, toEmailAddrs, message.as_string()) + print('success') + server.quit() + except smtplib.SMTPException as e: + print("error:", e) From af6f96c174c29f05ed625555b0ffed6f74abcc1a Mon Sep 17 00:00:00 2001 From: wustmz <865011721@qq.com> Date: Tue, 16 Aug 2022 17:43:25 +0800 Subject: [PATCH 3/4] add notice --- notice.py | 50 +++++++++++++++ update.py | 182 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 232 insertions(+) create mode 100644 notice.py create mode 100755 update.py diff --git a/notice.py b/notice.py new file mode 100644 index 00000000..a4b48af1 --- /dev/null +++ b/notice.py @@ -0,0 +1,50 @@ +# coding=utf-8 +import smtplib +import time +# 需要 MIMEMultipart 类 +from email.mime.multipart import MIMEMultipart +# 发送字符串的邮件 +from email.mime.text import MIMEText + +import update + + +def send(code, dest): + # 设置服务器所需信息 + fromEmailAddr = '865011721@qq.com' # 邮件发送方邮箱地址 + password = 'wnumbsdltnkebehf' # 密码(部分邮箱为授权码) + # toEmailAddrs = ['steve.mei@jfz.com'] # 邮件接受方邮箱地址,注意需要[]包裹,这意味着你可以写多个邮件地址群发 + toEmailAddrs = ['865011721@qq.com'] # 邮件接受方邮箱地址,注意需要[]包裹,这意味着你可以写多个邮件地址群发 + + msg = update.grid_notice(code, dest) + if '1' == msg: + return + + # 设置email信息 + # ---------------------------发送带附件邮件----------------------------- + # 邮件内容设置 + message = MIMEMultipart() + # 邮件主题 + message['Subject'] = '网格提醒-' + code + # 发送方信息 + message['From'] = fromEmailAddr + # 接受方信息 + message['To'] = toEmailAddrs[0] + + # 邮件正文内容 + message.attach(MIMEText(msg, 'plain', 'utf-8')) + # --------------------------------------------------------------------- + + # 登录并发送邮件 + try: + server = smtplib.SMTP('smtp.qq.com') # 邮箱服务器地址,端口默认为25 + server.login(fromEmailAddr, password) + server.sendmail(fromEmailAddr, toEmailAddrs, message.as_string()) + print('success') + server.quit() + except smtplib.SMTPException as e: + print("error:", e) + + +if __name__ == '__main__': + send('588000', 1.23) diff --git a/update.py b/update.py new file mode 100755 index 00000000..8ca52fcb --- /dev/null +++ b/update.py @@ -0,0 +1,182 @@ +# excel_u.py +# coding=utf-8 +# 导入相应模块 +import datetime +import re + +import easyquotation +import matplotlib +import numpy as np +import openpyxl +import requests +from bs4 import BeautifulSoup + +# 处理乱码 +matplotlib.rcParams['font.sans-serif'] = ['SimHei'] +matplotlib.rcParams['font.family'] = 'sans-serif' +matplotlib.rcParams['axes.unicode_minus'] = False + + +# 查询股票信息 +def get_stock(code): + quotation = easyquotation.use('qq') # 新浪 ['sina'] 腾讯 ['tencent', 'qq'] + return quotation.real(code)[code] # 支持直接指定前缀,如 'sh000001' + + +# 每页最多50条数据 +def get_html(code, start_date, end_date, page=1, per=10): + url = 'http://fund.eastmoney.com/f10/F10DataApi.aspx?type=lsjz&code={0}&page={1}&sdate={2}&edate={3}&per={4}'.format( + code, page, start_date, end_date, per) + rsp = requests.get(url) + html = rsp.text + return html + + +def get_fund(code, start_date, end_date, page=1, per=20): + # 获取html + html = get_html(code, start_date, end_date, page, per) + soup = BeautifulSoup(html, 'html.parser') + # 获取总页数 + pattern = re.compile('pages:(.*),') + result = re.search(pattern, html).group(1) + total_page = int(result) + # 获取表头信息 + heads = [] + for head in soup.findAll("th"): + heads.append(head.contents[0]) + + # 数据存取列表 + records = [] + # 获取每一页的数据 + current_page = 1 + while current_page <= total_page: + html = get_html(code, start_date, end_date, current_page, per) + soup = BeautifulSoup(html, 'html.parser') + # 获取数据 + for row in soup.findAll("tbody")[0].findAll("tr"): + row_records = [] + for record in row.findAll('td'): + val = record.contents + # 处理空值 + if not val: + row_records.append(np.nan) + else: + row_records.append(val[0]) + # 记录数据 + # print (row_records[0] , row_records[1]) + records.append(row_records) + # 下一页 + current_page = current_page + 1 + + return records + + +# 获取前五天的时间 +def getLastDay(): + today = datetime.date.today() + oneday = datetime.timedelta(days=5) + yesterday = today - oneday + return yesterday + + +# 获取今天的时间 +def getToday(): + return datetime.date.today() + + +# 场外基金 +def run(code, row): + start_date = getLastDay() + end_date = getToday() + + print(code, 'begin==========>') + + records = get_fund(code, start_date, end_date) + # 最新的净值记录 + record = records[0] + # print(record) + # 基金代码 + # table.cell(row,1,code) + # 最新净值日期 + net_date = record[0] + # 最新单位净值 + net_value = record[1] + + update_excel(net_date, net_value, row) + print(code, 'end==========>') + + +# 场内基金或股票 +def run_stock(code, row): + print(code, 'begin==========>') + data = get_stock(code) + # print(data) + # 最新净值日期 + # net_date = data['date'] + dt = data['datetime'] + # 格式化日期 + net_date = dt.strftime('%Y-%m-%d') + # 最新单位净值 + net_value = data['now'] + update_excel(net_date, net_value, row) + print(code, 'end==========>') + + +# 更新Excel +def update_excel(net_date, net_value, row): + # 加载指定Excel + data = openpyxl.load_workbook('AssetAllocation.xlsx') + # 取第二张表 + table = data['明细'] + # 输出表名 + # print(table.title) + table.cell(row, 11, net_date) + table.cell(row, 12, net_value) + data.save('AssetAllocation.xlsx') + + +def grid(code, row): + # 加载指定Excel + data = openpyxl.load_workbook('tt.xlsx') + # oad_workbook(file, read_only=True, data_only=True) + # 取第一张表 + table = data.worksheets[0] + # 输出表名 + print(table.title) + stock = get_stock(code) + net_value = stock['now'] + print(net_value) + table.cell(row, 2, net_value) + data.save('tt.xlsx') + + +# 更新实验账户 +def grid_1(code, cost, amount, dest): + stock = get_stock(code) + # print(stock) + open = stock['open'] + now = stock['now'] + name = stock['name'] + profit = now - cost + rate = profit / cost + destRate = (dest - now) / now + print(name, + '成本价: {:.3f}'.format(cost), + '现价: {:.3f}'.format(now), + '涨跌幅: {:.2%}'.format(rate), + '盈利: {:.2f}'.format(profit * amount), + '目标价: {:.3f}'.format(dest), + '目标涨跌幅: {:.2%}'.format(destRate)) + return profit * amount + + +def grid_notice(code, dest): + stock = get_stock(code) + # print(stock) + now = stock['now'] + name = stock['name'] + if now >= dest: + s = '叮咚!【' + name + '】达到目标价了,请及时操作哦~' + ' 现价: {:.3f}'.format(now) + return s + else: + return '1' From 2eded98bf12a900a60c2340e01e1f6c3b1b20466 Mon Sep 17 00:00:00 2001 From: wustmz <865011721@qq.com> Date: Mon, 29 Aug 2022 15:25:29 +0800 Subject: [PATCH 4/4] add article_url --- weibo_spider/weibo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/weibo_spider/weibo.py b/weibo_spider/weibo.py index 89d24150..d91ba81c 100644 --- a/weibo_spider/weibo.py +++ b/weibo_spider/weibo.py @@ -4,7 +4,7 @@ def __init__(self): self.user_id = '' self.content = '' - # self.article_url = '' + self.article_url = '' self.original_pictures = [] self.retweet_pictures = None @@ -28,5 +28,5 @@ def __str__(self): # result += u'点赞数:%d\n' % self.up_num # result += u'转发数:%d\n' % self.retweet_num # result += u'评论数:%d\n' % self.comment_num - # result += u'url:https://weibo.cn/comment/%s \n' % self.id + result += u'url:https://weibo.cn/comment/%s \n' % self.id return result