當前位置：首頁 > 编程资源 > 编程问答 >内容正文

编程问答

[实践篇] 逻辑回归

發布時間：2025/3/15 编程问答 12 豆豆

生活随笔收集整理的這篇文章主要介紹了 [实践篇] 逻辑回归小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

代碼、數據已經上傳，可以自主下載。https://download.csdn.net/download/shenziheng1/10719760

1. 訓練過程

import numpy as npdef load_data(file_name):"""input: file_name(string)output: feature_data(mat)label_data(mat)"""f = open(file_name,"r")feature_data = []label_data = []for line in f.readlines():feature_tmp = []label_tmp = []lines = line.strip().split("\t")feature_tmp.append(1) # x0 = 1for i in xrange(len(lines)-1):feature_tmp.append(float(lines[i]))label_tmp.append(float(lines[-1]))feature_data.append(feature_tmp)label_data.append(label_tmp)f.close()return np.mat(feature_data), np.mat(label_data)def sig(x):"""sigmiod function"""return 1.0 / (1 + np.exp(-x))def error_rate(h, label):"""input: h(mat) predicting datalabel(mat) labelsoutput: err/m(float) false ratio"""m = np.shape(h)[0]sum_err = 0.0for i in xrange(m):if h[i,0] > 0 and (1 - h[i,0]) >0:sum_err = sum_err - (label[i,0] * np.log(h[i,0]) +(1-label[i,0]) * np.log(1-h[i,0]))else:sum_err = sum_errreturn sum_err / mdef lr_train_bgd(feature, label, maxCycle, alpha):"""input: feature(mat)label(mat)maxcycle(int)alpha(float)output: w(mat) weights"""n = np.shape(feature)[1] # the number of featurew = np.mat(np.ones((n,1))) # the number of weighti = 0while i <= maxCycle:i = i+1h = sig(feature * w)err = label - hif i % 100 == 0:print "\t--------iter=" + str(i) + \", train error rate=" + str(error_rate(h, label))w = w + alpha * feature.T * err # modifying weightsreturn wdef save_model(file_name, w):"""input: file_name(string) the filepath for saving modelw weights"""m = np.shape(w)[0]f_w = open(file_name, "w")w_array = []for i in xrange(m):w_array.append(str(w[i,0]))f_w.write("\t".join(w_array))f_w.close()#def imgplot(feature, w):if __name__ == "__main__":""" import training data """print "--------load data--------"feature, label = load_data("data.txt")""" training logistic regression model """print "--------training--------"w = lr_train_bgd(feature, label, 1000, 0.01)""" save model """print "--------save model---------"save_model("weights",w)

訓練結果為：

2. 測試代碼

import numpy as np from logistic_training import sigdef load_weight(w):f = open(w)w = []for line in f.readlines():lines = line.strip().split("\t")w_tmp = []for x in lines:w_tmp.append(float(x))w.append(w_tmp)f.close()return np.mat(w)def load_data(file_name, n):f = open(file_name)feature_data = []for line in f.readlines():feature_tmp = []lines = line.strip().split("\t")if len(lines) <> n - 1:continuefeature_tmp.append(1)for x in lines:feature_tmp.append(float(x))feature_data.append(feature_tmp)f.close()return np.mat(feature_data)def predict(data, w):h = sig(data * w.T)m = np.shape(h)[0]for i in xrange(m):if h[i, 0] < 0.5:h[i, 0] = 0.0else:h[i, 0] = 1.0return hdef save_result(file_name, result):m = np.shape(result)[0]tmp = []for i in xrange(m):tmp.append(str(result[i, 0]))f_result = open(file_name, "w")f_result.write("\t".join(tmp))f_result.close()if __name__ == "__main__":"""loading LR model"""print "--------load model---------"w = load_weight("weights")n = np.shape(w)[1]"""loading testing data"""testData = load_data("test_data", n)"""predicting test data"""print "--------prediction--------"h = predict(testData, w)print h"""save prediction results"""print "--------save prediction--------"save_result("results", h)

3. 補充知識

readlines(): ?用于讀取所有行(直到結束符 EOF)并返回列表，該列表可以由 Python 的 for... in ... 結構進行處理。如果碰到結束符 EOF 則返回空字符串。
strip():用于移除字符串頭尾指定的字符（默認為空格或換行符）或字符序列。注意：該方法只能刪除開頭或是結尾的字符，不能刪除中間部分的字符。

str = "00000003210Runoob01230000000"; print str.strip( '0' ); # 去除首尾字符 0>>> 3210Runoob0123