import pandas as pd from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score import random import csv # 读取数据 data = pd.read_csv('data.csv') """ 划分训练集和测试集 test_size 表示测试集占总数据集的比例 random_state 随机数种子,可自行定义,保证训练模型唯一 """ X_train, X_test, y_train, y_test = train_test_split(data.iloc[:, :-2], data.iloc[:, -1], test_size=0.2, random_state=random.randint(0, 1024)) # 定义模型 # 参考文档 https://scikit-learn.org/stable/modules/preprocessing.html model = LogisticRegression() # 训练模型 model.fit(X_train, y_train) # 在测试集上测试模型准确率 y_pred = model.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print('模型准确率:', accuracy) test_d = [] # 测试数据验证 with open('test.csv', newline='', encoding='utf-8') as csvfile: # 创建csv reader对象 reader = csv.reader(csvfile) # 遍历每一行数据,并转换为列表 for row in reader: test_d.append(row) for d in range(1, len(test_d)): title_d = test_d[0][:-2] new_d = pd.DataFrame([test_d[d][:-2]],columns=title_d) y_new = model.predict(new_d) print(new_d) # 0 or 1 0表示未过线,1表示已过线 print('是否过线: ', y_new) |