from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


f = open("./result.txt", "a")



# 1.导入数据集
dataset = pd.read_csv('train.csv')
dataset2 = pd.read_csv('test.csv')
X = dataset.iloc[:, [1, 2, 3]].values
X2 = dataset2.iloc[:, [1, 2, 3]].values
Y = dataset.iloc[:, 4].values

# 性别转化为数字
labelencoder_X = LabelEncoder()
X[:, 0] = labelencoder_X.fit_transform(X[:, 0])

labelencoder_X2 = LabelEncoder()
X2[:, 0] = labelencoder_X2.fit_transform(X2[:, 0])

# 2.将数据集分成训练集和测试集
# X_train, X_test, y_train, y_test = train_test_split(
#     X, Y, test_size=0.25, random_state=0)

# 3.特征缩放
sc = StandardScaler()
X_train = sc.fit_transform(X)
X2_test = sc.transform(X2)

# 4.训练
classifier = LogisticRegression()
classifier.fit(X_train, Y)

# 5.预测
y_pred = classifier.predict(X2_test)
print(y_pred)
cnt=0
for i in y_pred:
    cnt=cnt+1
    f.write(str(cnt)+":"+str(i)+"\n")


# 6.评估预测

# 生成混淆矩阵
# from sklearn.metrics import confusion_matrix
# cm = confusion_matrix(y_test, y_pred)
# print(cm)

0 comments

No comments so far...