上海网站建设官网东莞网站制作实力乐云seo
上海网站建设官网,东莞网站制作实力乐云seo,seo积分系统,曰本做爰视频网站大家好#xff0c;我是锋哥#xff0c;最近写了AI大模型微调训练 微博舆情分析可视化系统(pytorch2基于BERT大模型训练微调flaskpandasecharts)#xff0c;非常Nice#xff0c;界面非常好看#xff0c;分享下哈。项目介绍本文设计并实现了一个基于BERT大模型微调训练的微博…大家好我是锋哥最近写了AI大模型微调训练 微博舆情分析可视化系统(pytorch2基于BERT大模型训练微调flaskpandasecharts)非常Nice界面非常好看分享下哈。项目介绍本文设计并实现了一个基于BERT大模型微调训练的微博舆情分析可视化系统。系统采用Python语言开发后端使用Flask Web框架前端使用ECharts进行数据可视化数据库采用MySQL 8.0深度学习框架采用PyTorch 2.x。系统主要包含以下功能模块1微博数据爬虫模块通过请求微博API接口自动采集微博文章和评论数据2数据处理与分词模块使用Pandas进行数据清洗使用jieba进行中文分词和词频统计3基于BERT大模型微调的情感分析模块在BERT-base-chinese预训练模型的基础上冻结BERT参数添加全连接分类层使用微博情感标注数据集进行增量微调训练实现文本情感二分类正面/负面4数据可视化模块使用ECharts实现折线图、饼状图、柱状图、词云图、中国地图等多维度可视化展示。源码下载链接https://pan.baidu.com/s/1I1r7XgWUDWZa0XMvU2S5rQ?pwd1234提取码1234系统展示核心代码import pandas as pd from flask import Blueprint, render_template, jsonify, request from snownlp import SnowNLP from dao import articleDao, commentDao from llm.weibo import data_classfication from llm.weibo_train import check_data, check_datas_batch from util import wordcloudUtil, mapUtil pb Blueprint(page, __name__, url_prefix/page, template_foldertemplates) pb.route(/home) def home(): 进入主页面获取相应的数据带到页面去 :return: articleData articleDao.get7DayArticle() xAxis7ArticleData [] yAxis7ArticleData [] for article in articleData: xAxis7ArticleData.append(article[0]) yAxis7ArticleData.append(article[1]) # 获取帖子类别数量 arcTypeData [] articleTypeAmountList articleDao.getArticleTypeAmount() for arcType in articleTypeAmountList: arcTypeData.append({value: arcType[1], name: arcType[0]}) # 获取top50评论用户名 top50CommentUserList commentDao.getTopCommentUser() top50CommentUserNameList [cu[0] for cu in top50CommentUserList] str .join(top50CommentUserNameList) wordcloudUtil.genWordCloudPic(str, comment_mask.jpg, comment_user_cloud.jpg) # 获取7天评论数量 commentData [] commentAmountList commentDao.getCommentAmount() for comment in commentAmountList: commentData.append({value: comment[1], name: comment[0]}) return render_template(index.html, xAxis7ArticleDataxAxis7ArticleData, yAxis7ArticleDatayAxis7ArticleData, arcTypeDataarcTypeData, commentDatacommentData) pb.route(homePageData) def getHomePageData(): 获取主页数据 ajax异步交互 前端每隔5分钟请求一次 实时数据 :return: totalArticle articleDao.getTotalArticle() topAuthor articleDao.getTopAuthor() topRegion articleDao.getTopRegion() topArticles articleDao.getArticleTopZan() return jsonify(totalArticletotalArticle, topAuthortopAuthor, topRegiontopRegion, topArticlestopArticles) pb.route(hotWord) def hotWord(): 热词分析统计 :return: hotwordList [] # 只读取前100条 df pd.read_csv(./fenci/comment_fre.csv, nrows100) for value in df.values: hotwordList.append(value[0]) # 获取请求参数如果没有获取到给个默认值 第一个列表数据 defaultHotWord request.args.get(word, defaulthotwordList[0]) hotwordNum 0 # 出现次数 for value in df.values: if defaultHotWord value[0]: hotwordNum value[1] # 情感分析 sentiments # stc SnowNLP(defaultHotWord).sentiments # if stc 0.6: # sentiments 正面 # elif stc 0.2: # sentiments 负面 # else: # sentiments 中性 # 使用大模型进行舆情分析 # sentiments data_classfication(defaultHotWord) # 使用大模型微调进行舆情分析 sentiments check_data([defaultHotWord]) commentHotWordData commentDao.getCommentHotWordAmount(defaultHotWord) xAxisHotWordData [] yAxisHotWordData [] for comment in commentHotWordData: xAxisHotWordData.append(comment[0]) yAxisHotWordData.append(comment[1]) commentList commentDao.getCommentByHotWord(defaultHotWord) return render_template(hotWord.html, hotwordListhotwordList, defaultHotWorddefaultHotWord, hotwordNumhotwordNum, sentimentssentiments, xAxisHotWordDataxAxisHotWordData, yAxisHotWordDatayAxisHotWordData, commentListcommentList) pb.route(articleData) def articleData(): 微博舆情分析 :return: articleOldList articleDao.getAllArticle() # 获取所有帖子标题 articleTitleList [article[1] for article in articleOldList] # 批量使用大模型进行舆情分析 sentimentList check_datas_batch(articleTitleList) print(sentimentList) # 追加情感分析属性 articleNewList [] for i in range(len(articleOldList)): if sentimentList[i] 1: articleNewList.append(articleOldList[i] (正面,)) else: articleNewList.append(articleOldList[i] (负面,)) # for article in articleOldList: # article list(article) # 情感分析 # sentiments # stc SnowNLP(article[1]).sentiments # if stc 0.6: # sentiments 正面 # elif stc 0.2: # sentiments 负面 # else: # sentiments 中性 # 使用大模型进行舆情分析 # sentiments data_classfication(article[1]) # 使用大模型微调进行舆情分析 # sentiments check_data([article[1]]) # article.append(sentiments) # articleNewList.append(article) return render_template(articleData.html, articleListarticleNewList) pb.route(articleDataAnalysis) def articleDataAnalysis(): 微博数据分析 :return: arcTypeList [] df pd.read_csv(./spider/arcType_data.csv) for value in df.values: arcTypeList.append(value[0]) # 获取请求参数如果没有获取到给个默认值 第一个列表数据 defaultArcType request.args.get(arcType, defaultarcTypeList[0]) articleList articleDao.getArticleByArcType(defaultArcType) xDzData [] # 点赞x轴数据 xPlData [] # 评论x轴数据 xZfData [] # 转发x轴数据 rangeNum 1000 rangeNum2 100 for item in range(0, 10): xDzData.append(str(rangeNum * item) - str(rangeNum * (item 1))) xPlData.append(str(rangeNum * item) - str(rangeNum * (item 1))) for item in range(0, 20): xZfData.append(str(rangeNum2 * item) - str(rangeNum2 * (item 1))) xDzData.append(1万) xPlData.append(1万) xZfData.append(2千) yDzData [0 for x in range(len(xDzData))] # 点赞y轴数据 yPlData [0 for x in range(len(xPlData))] # 评论y轴数据 yZfData [0 for x in range(len(xZfData))] # 转发y轴数据 for article in articleList: for item in range(len(xDzData)): if int(article[4]) rangeNum * (item 1): yDzData[item] 1 break elif int(article[4]) 10000: yDzData[len(xDzData) - 1] 1 break if int(article[3]) rangeNum * (item 1): yPlData[item] 1 break elif int(article[3]) 10000: yPlData[len(xDzData) - 1] 1 break for article in articleList: for item in range(len(xZfData)): if int(article[2]) rangeNum2 * (item 1): yZfData[item] 1 break elif int(article[2]) 2000: yZfData[len(xZfData) - 1] 1 break return render_template(articleDataAnalysis.html, arcTypeListarcTypeList, defaultArcTypedefaultArcType, xDzDataxDzData, yDzDatayDzData, xPlDataxPlData, yPlDatayPlData, xZfDataxZfData, yZfDatayZfData) pb.route(commentDataAnalysis) def commentDataAnalysis(): 微博评论数据分析 :return: commentList commentDao.getAllComment() xDzData [] # 点赞X轴数据 rangeNum 5 for item in range(0, 20): xDzData.append(str(rangeNum * item) - str(rangeNum * (item 1))) xDzData.append(1百) yDzData [0 for x in range(len(xDzData))] # 点赞y轴数据 genderDic {男: 0, 女: 0} for comment in commentList: for item in range(len(xDzData)): if int(comment[4]) rangeNum * (item 1): yDzData[item] 1 break elif int(comment[4]) 100: yDzData[len(xDzData) - 1] 1 break if genderDic.get(comment[8], -1) ! -1: genderDic[comment[8]] 1 genderData [{name: x[0], value: x[1]} for x in genderDic.items()] # 只读取前50条数据 df pd.read_csv(./fenci/comment_fre.csv, nrows50) hotCommentwordList [x[0] for x in df.values] str2 .join(hotCommentwordList) wordcloudUtil.genWordCloudPic(str2, comment_mask.jpg, comment_cloud.jpg) return render_template(commentDataAnalysis.html, xDzDataxDzData, yDzDatayDzData, genderDatagenderData) pb.route(articleCloud) def articleCloud(): 微博内容词云图 :return: # 只读取前50条数据 df pd.read_csv(./fenci/article_fre.csv, nrows50) hotArticlewordList [x[0] for x in df.values] str2 .join(hotArticlewordList) wordcloudUtil.genWordCloudPic(str2, article_mask.jpg, article_cloud.jpg) return render_template(articleCloud.html) pb.route(commentCloud) def commentCloud(): 微博评论词云图 :return: # 只读取前50条数据 df pd.read_csv(./fenci/comment_fre.csv, nrows50) hotCommentwordList [x[0] for x in df.values] str2 .join(hotCommentwordList) wordcloudUtil.genWordCloudPic(str2, comment_mask.jpg, comment_cloud.jpg) return render_template(commentCloud.html) pb.route(commentUserCloud) def commentUserCloud(): 微博评论用户词云图 :return: # 获取top50评论用户名 top50CommentUserList commentDao.getTopCommentUser() top50CommentUserNameList [cu[0] for cu in top50CommentUserList] str .join(top50CommentUserNameList) wordcloudUtil.genWordCloudPic(str, comment_mask.jpg, comment_user_cloud.jpg) return render_template(commentUserCloud.html) pb.route(ipDataAnalysis) def ipDataAnalysis(): IP地址数据分析 :return: cityDic {} # 微博文章作者IP cityList mapUtil.cityList articleList articleDao.getAllArticle() for article in articleList: if article[5]: for city in cityList: if city[province].find(article[5]) ! -1: if cityDic.get(city[province], -1) -1: cityDic[city[province]] 1 else: cityDic[city[province]] 1 articleCityDicList [{name: x[0], value: x[1]} for x in cityDic.items()] cityDic2 {} # 微博评论作者IP commentList commentDao.getAllComment() for comment in commentList: if comment[3]: for city in cityList: if city[province].find(comment[3]) ! -1: if cityDic2.get(city[province], -1) -1: cityDic2[city[province]] 1 else: cityDic2[city[province]] 1 commentCityDicList [{name: x[0], value: x[1]} for x in cityDic2.items()] return render_template(ipDataAnalysis.html, articleCityDicListarticleCityDicList, commentCityDicListcommentCityDicList) pb.route(sentimentAnalysis) def sentimentAnalysis(): 舆情数据分析 :return: xHotBarData [正面, 负面] yHotBarData [0, 0] # 只读取前100条 df pd.read_csv(./fenci/comment_fre.csv, nrows100) for value in df.values: # 情感分析 # stc SnowNLP(value[0]).sentiments # if stc 0.6: # yHotBarData[0] 1 # elif stc 0.2: # yHotBarData[2] 1 # else: # yHotBarData[1] 1 # 使用大模型进行情感分析 # sentiment data_classfication(value[0]) # 使用大模型微调进行情感分析 sentiment check_data([value[0]]) if sentiment 正面: yHotBarData[0] 1 else: yHotBarData[1] 1 hotTreeMapData [{ name: xHotBarData[0], value: yHotBarData[0] }, { name: xHotBarData[1], value: yHotBarData[1] }] commentPieData [{ name: 正面, value: 0 }, { name: 负面, value: 0 }] articlePieData [{ name: 正面, value: 0 }, { name: 负面, value: 0 }] commentList commentDao.getAllComment() # 截取前1000条数据 模拟下 commentList commentList[:100] for comment in commentList: # 情感分析 # stc SnowNLP(comment[1]).sentiments # if stc 0.6: # commentPieData[0][value] 1 # elif stc 0.2: # commentPieData[2][value] 1 # else: # commentPieData[1][value] 1 # 使用大模型进行情感分析 # sentiment data_classfication(comment[1]) # 使用大模型微调进行情感分析 sentiment check_data([comment[1]]) if sentiment 正面: commentPieData[0][value] 1 else: commentPieData[1][value] 1 articleList articleDao.getAllArticle() # 获取前1000条数据 模拟下 articleList articleList[:100] for article in articleList: # 情感分析 # stc SnowNLP(article[1]).sentiments # if stc 0.6: # articlePieData[0][value] 1 # elif stc 0.2: # articlePieData[2][value] 1 # else: # articlePieData[1][value] 1 # 使用大模型进行情感分析 # sentiment data_classfication(article[1]) # 使用大模型微调进行情感分析 sentiment check_data([article[1]]) if sentiment 正面: articlePieData[0][value] 1 else: articlePieData[1][value] 1 df2 pd.read_csv(./fenci/comment_fre.csv, nrows15) xhotData15 [x[0] for x in df2.values][::-1] yhotData15 [x[1] for x in df2.values][::-1] return render_template(sentimentAnalysis.html, xHotBarDataxHotBarData, yHotBarDatayHotBarData, hotTreeMapDatahotTreeMapData, commentPieDatacommentPieData, articlePieDataarticlePieData, xhotData15xhotData15, yhotData15yhotData15) # # pb.route(sentimentAnalysis) # def sentimentAnalysis(): # # 舆情数据分析 # :return: # # xHotBarData [正面, 负面] # yHotBarData [0, 0] # # 只读取前100条 # df pd.read_csv(./fenci/comment_fre.csv, nrows100) # for value in df.values: # # 情感分析 # # stc SnowNLP(value[0]).sentiments # # if stc 0.6: # # yHotBarData[0] 1 # # elif stc 0.2: # # yHotBarData[2] 1 # # else: # # yHotBarData[1] 1 # # 使用大模型进行情感分析 # sentiment data_classfication(value[0]) # if sentiment 正面: # yHotBarData[0] 1 # else: # yHotBarData[1] 1 # # hotTreeMapData [{ # name: xHotBarData[0], # value: yHotBarData[0] # }, { # name: xHotBarData[1], # value: yHotBarData[1] # }] # # commentPieData [{ # name: 正面, # value: 0 # }, { # name: 负面, # value: 0 # }] # articlePieData [{ # name: 正面, # value: 0 # }, { # name: 负面, # value: 0 # }] # commentList commentDao.getAllComment() # for comment in commentList: # # 情感分析 # # stc SnowNLP(comment[1]).sentiments # # if stc 0.6: # # commentPieData[0][value] 1 # # elif stc 0.2: # # commentPieData[2][value] 1 # # else: # # commentPieData[1][value] 1 # # 使用大模型进行情感分析 # sentiment data_classfication(comment[1]) # if sentiment 正面: # commentPieData[0][value] 1 # else: # commentPieData[1][value] 1 # # articleList articleDao.getAllArticle() # for article in articleList: # # 情感分析 # # stc SnowNLP(article[1]).sentiments # # if stc 0.6: # # articlePieData[0][value] 1 # # elif stc 0.2: # # articlePieData[2][value] 1 # # else: # # articlePieData[1][value] 1 # # 使用大模型进行情感分析 # sentiment data_classfication(article[1]) # if sentiment 正面: # articlePieData[0][value] 1 # else: # articlePieData[1][value] 1 # # df2 pd.read_csv(./fenci/comment_fre.csv, nrows15) # xhotData15 [x[0] for x in df2.values][::-1] # yhotData15 [x[1] for x in df2.values][::-1] # return render_template(sentimentAnalysis.html, # xHotBarDataxHotBarData, # yHotBarDatayHotBarData, # hotTreeMapDatahotTreeMapData, # commentPieDatacommentPieData, # articlePieDataarticlePieData, # xhotData15xhotData15, # yhotData15yhotData15) # # pb.route(sentimentAnalysis) # def sentimentAnalysis(): # # 舆情数据分析 # :return: # # xHotBarData [正面, 中性, 负面] # yHotBarData [0, 0, 0] # # 只读取前100条 # df pd.read_csv(./fenci/comment_fre.csv, nrows100) # for value in df.values: # # 情感分析 # stc SnowNLP(value[0]).sentiments # if stc 0.6: # yHotBarData[0] 1 # elif stc 0.2: # yHotBarData[2] 1 # else: # yHotBarData[1] 1 # # hotTreeMapData [{ # name: xHotBarData[0], # value: yHotBarData[0] # }, { # name: xHotBarData[1], # value: yHotBarData[1] # }, { # name: xHotBarData[2], # value: yHotBarData[2] # }] # # commentPieData [{ # name: 正面, # value: 0 # }, { # name: 中性, # value: 0 # }, { # name: 负面, # value: 0 # }] # articlePieData [{ # name: 正面, # value: 0 # }, { # name: 中性, # value: 0 # }, { # name: 负面, # value: 0 # }] # commentList commentDao.getAllComment() # for comment in commentList: # # 情感分析 # stc SnowNLP(comment[1]).sentiments # if stc 0.6: # commentPieData[0][value] 1 # elif stc 0.2: # commentPieData[2][value] 1 # else: # commentPieData[1][value] 1 # # articleList articleDao.getAllArticle() # for article in articleList: # # 情感分析 # stc SnowNLP(article[1]).sentiments # if stc 0.6: # articlePieData[0][value] 1 # elif stc 0.2: # articlePieData[2][value] 1 # else: # articlePieData[1][value] 1 # # df2 pd.read_csv(./fenci/comment_fre.csv, nrows15) # xhotData15 [x[0] for x in df2.values][::-1] # yhotData15 [x[1] for x in df2.values][::-1] # return render_template(sentimentAnalysis.html, # xHotBarDataxHotBarData, # yHotBarDatayHotBarData, # hotTreeMapDatahotTreeMapData, # commentPieDatacommentPieData, # articlePieDataarticlePieData, # xhotData15xhotData15, # yhotData15yhotData15)