匹配關鍵字,給新聞貼上個股標籤
阿新 • • 發佈:2019-01-03
貼上個股標籤
import csv import pandas as pd from database import Database #from connect_keywords.database import Database csv_file = csv.reader(open('finace_news_content.csv', 'r')) csv_keyword = csv.reader(open('Keyword.csv', 'r')) csv_select_one = csv.reader(open('select_one.csv', 'r')) csv_select_double = csv.reader(open('select_double.csv', 'r')) #獲取contents:原資料庫ID:content contents = [] for row in csv_file: content = {} content['id'] = row[0] content['content'] = row[1].strip('\t\n') contents.append(content) print(contents) keyword_dict = {} for row in csv_keyword: keyword_dict[row[1]] = row[2:] print(keyword_dict) select_one = [] for row in csv_select_one: value = str(row[0]).split('%') select_one.append(value[1]) print(select_one) select_two = [] for row in csv_select_double: values = str(row[0]).split('%')[1] value = values.split(',') select_two.append(value) print(select_two) double_id = [] texts = [] connect_dict = {} for i in range(len(contents)): flag = True text = contents[i] content = text['content'] for m in range(len(select_one)): word = select_one[m] if word in content: flag = False break if flag: for n in range(len(select_two)): words = select_two[n] word1 = words[0] word2 = words[1] if word1 in content and word2 in content: flag = False break if flag: #取出個股及對應的關鍵字 for key, value in keyword_dict.items(): value = str(value[0]).split(',') #遍歷指定個股的關鍵字 for j in range(len(value)): keyword = value[j] #判斷關鍵字是否在文字中 if keyword in content: #判斷文字是否已有匹配到的個股 for item in texts: if item['id'] == text['id'] and text['id'] != None: item['stoc_id'].extend([key]) #print(item['id'] + keyword + key ) double_id.append(item['id']) flag = False if flag: jre = {} jre['id'] = text['id'] jre['stoc_id'] = [key] jre['content'] = content texts.append(jre) #print(keyword + key) break print(texts) print(double_id) def run(): insert = 'INSERT IGNORE INTO news_connect_keywords(news_id, content, stock_id) VALUES (%s, %s, %s)' db = Database() db.connect('news_connect_keyword') for i in range(len(texts)): data = texts[i] db.execute(insert, [data['id'], data['content'], str(data['stoc_id'])]) db.close() # if __name__ == '__main__': # run()