除了运筹优化的问题,也可以用精确求解方式 尝试分类问题。让我们试试鸢尾花数据集。
from sklearn import datasets iris = datasets.load_iris() iris.data.shape 可以看到 ?(150, 4)。 再看看 这4个特征与目标的相关性情况? [np.corrcoef(iris.data[:, x: x+1].reshape(-1), iris.target)[0][1] for x in range(iris.data.shape[1])]
看到相关性 有正有负,也有强相关的特征。? [0.7825612318100814, ?-0.4266575607811243, ?0.9490346990083884, ?0.9565473328764028]
这里 minimize时? 是minimization误差的绝对值。
绝对值 如何转成线性的呢? 这对于数学基础好的同学 可能很容易;对本人来说 好像是经验.. 经历过才/就知道。先看看这个:
【MATLAB】线性规划问题中碰到绝对值的情况_Luo199561的博客-CSDN博客_线性规划绝对值
把y_true - y_pred这个error信息 表示为 e1 - e2 , 那abs(y_true - y_pred)的值 的这个变量 可以表示为 e1 + e2。??
from sklearn import datasets
import numpy as np
from pyscipopt import quicksum
from pyscipopt import Model as ScipModel
from docplex.mp.model import Model as CplexModel
from sklearn.model_selection import train_test_split
class ExactSolution:
def __init__(self):
pass
def __call__(self):
self.add_decision_var()
self.add_constraint()
self.set_obj()
# start solve
self.optimize()
return None
def add_decision_var(self):
raise NotImplementedError
def add_constraint(self):
raise NotImplementedError
def set_obj(self):
raise NotImplementedError
def optimize(self):
raise NotImplementedError
def get_result(self):
raise NotImplementedError
def run_test(self, x, y_true, print_info="the accuracy result is: "):
_, weight = self.get_result()
y_pre = [weight[-1] + sum([weight[i] * x[j, i] for i in range(x.shape[1])]) for j in range(x.shape[0])]
y_pre = [int(round(pre, 0)) for pre in y_pre]
accuracy = sum(y_pre == y_true) / x.shape[0]
print(print_info + str(round(accuracy, 2)))
class ScipSolution(ExactSolution):
def __init__(self, model_name, data_x, data_y):
self.model = ScipModel(model_name)
self.data_x = data_x
self.data_y = data_y
self.sample_no = list(range(data_x.shape[0]))
super().__init__()
def add_decision_var(self):
# w_var = {w1, w2, ..., w0} # 加入到模型的权重变量。
self.w_var = {}
for ix in range(self.data_x.shape[1]):
self.w_var[ix] = self.model.addVar(vtype="C", lb=0)
self.w_var[self.data_x.shape[1]] = self.model.addVar(vtype="C") # 最后一个 偏置
self.e1_var = {}
self.e2_var = {}
for s_id in self.sample_no:
self.e1_var[s_id] = self.model.addVar(vtype="C", lb=0)
self.e2_var[s_id] = self.model.addVar(vtype="C", lb=0)
def add_constraint(self):
# 约束为: 真实值和预测值的差 error 用 e1 - e2表示
for s_id in self.sample_no:
self.model.addCons(
self.e1_var[s_id] - self.e2_var[s_id] == self.w_var[self.data_x.shape[1]] + quicksum(
self.w_var[i] * self.data_x[s_id, i] for i in range(self.data_x.shape[1])) - self.data_y[s_id])
def set_obj(self):
# 目标函数为 真实值和预测值的差的绝对值 abs(error) = e1 + e2
self.model.setObjective(quicksum(self.e1_var[s_id] + self.e2_var[s_id] for s_id in self.sample_no),
sense="minimize")
# 改用平方和 scip不能支持, 报错: ValueError: Nonlinear objective functions are not supported!
# self.model.setObjective(quicksum(self.e1_var[s_id] * self.e1_var[s_id] + self.e2_var[s_id] * self.e2_var[s_id]
# for s_id in self.sample_no),
# sense="minimize")
def optimize(self):
self.model.optimize()
def get_result(self):
# result
obj = self.model.getObjVal()
weight = [self.model.getVal(self.w_var[i]) for i in range(self.data_x.shape[1] + 1)]
return obj, weight
class CplexSolution(ExactSolution):
def __init__(self, model_name, data_x, data_y):
self.model = CplexModel(model_name)
self.data_x = data_x
self.data_y = data_y
self.sample_no = list(range(data_x.shape[0]))
super().__init__()
def add_decision_var(self):
# w_var = {w1, w2, ..., w0} # 加入到模型的权重变量
self.w_var = {}
for ix in range(self.data_x.shape[1]):
self.w_var[ix] = self.model.continuous_var(name="weight%s" % ix)
self.w_var[self.data_x.shape[1]] = self.model.continuous_var(name="bias") # 最后一个 偏置
self.e1_var = self.model.continuous_var_dict(self.sample_no, name="e1")
self.e2_var = self.model.continuous_var_dict(self.sample_no, name="e2")
def add_constraint(self):
# 约束为: 真实值和预测值的差 error 用 e1 - e2表示
self.model.add_constraints(
self.e1_var[s_id] - self.e2_var[s_id] == self.w_var[self.data_x.shape[1]] + self.model.sum(
self.w_var[i] * self.data_x[s_id, i] for i in range(self.data_x.shape[1])) - self.data_y[s_id]
for s_id in self.sample_no)
def set_obj(self):
# 目标函数为 真实值和预测值的差的绝对值 abs(error) = e1 + e2
self.model.minimize(self.model.sum(self.e1_var[s_id] + self.e2_var[s_id] for s_id in self.sample_no))
# 改用平方和 cplex 也支持
# self.model.minimize(
# self.model.sum(self.e1_var[s_id] * self.e1_var[s_id] + self.e2_var[s_id] * self.e2_var[s_id]
# for s_id in self.sample_no))
def optimize(self):
self.solution = self.model.solve(log_output=False)
if not self.solution:
raise RuntimeError("can't get solution!")
def get_result(self):
obj = self.solution.objective_value
weight = [self.w_var[i].solution_value for i in range(self.data_x.shape[1] + 1)]
# print("wwwwwwwwwwww weight", weight)
return obj, weight
def optimiz(data_x, data_y, solver_type='cplex', model_name='iris_sample'):
if solver_type == 'cplex':
sol = CplexSolution(model_name, data_x, data_y)
sol()
elif solver_type == 'scip':
sol = ScipSolution(model_name, data_x, data_y)
sol()
else:
raise ValueError("the solver_type %s is not allowed!" % solver_type)
return sol
def get_data(test_size=0.2):
iris = datasets.load_iris()
return train_test_split(iris.data, iris.target, test_size=test_size, shuffle=True) # 为了测试shuffle
def run(solver_type='cplex'):
x_train, x_valid, y_train, y_valid = get_data()
sol = optimiz(x_train, y_train, solver_type=solver_type)
sol.run_test(x_train, y_train, print_info="\nsolver_type: %s: the train data accuracy result is: " % solver_type)
sol.run_test(x_valid, y_valid, print_info="\nsolver_type: %s: the test data accuracy result is: " % solver_type)
if __name__ == '__main__':
run(solver_type='cplex') # "scip" or "cplex"
? ? # w_var = {w1, w2, ..., w0} ? ? ? ? ? ? ? ?# 加入到模型的权重变量 初理解时 这里用列表也行 [w1, w2, ..., w0] ?但cplex的continuous_var_dict返回的是dict,还是都用dict吧, 且dict更快。 后来发现pypi的介绍中也有一句:请注意,在大多数情况下,需要使用dictionary 来 指定SCIP所需的返回值。
也试了 minimize时 使用平方和,而不是绝对值;cplex可以成功求解,scipy会报错:ValueError: Nonlinear objective functions are not supported! 由于求解器的限制,?对于内在线性规律较强的数据集 做分类的准确率应该会好些。有平方特征的cplex也可以。
准备率结果参考(每次结果不完全一样):
solver_type: cplex: the train data accuracy result is: 0.94
solver_type: cplex: the test data accuracy result is: 1.0
solver_type: scip: the train data accuracy result is: 0.95
solver_type: scip: the test data accuracy result is: 0.97
|