Beating the zero benchmark in Kaggle's Loan default prediction competition. Comments are most welcome :)
"""
Beating the Benchmark :::::: Kaggle Loan Default Prediction Challenge.
__author__ : Abhishek
"""
import pandas as pd
import numpy as np
import cPickle
from sklearn import preprocessing
from sklearn.svm import LinearSVC
import scipy.stats as stats
import sklearn.linear_model as lm
def testdata(filename):
X = pd.read_table(filename, sep=',', warn_bad_lines=True, error_bad_lines=True)
X = np.asarray(X.values, dtype = float)
col_mean = stats.nanmean(X,axis=0)
inds = np.where(np.isnan(X))
X[inds]=np.take(col_mean,inds[1])
data = np.asarray(X[:,1:-3], dtype = float)
return data
def data(filename):
X = pd.read_table(filename, sep=',', warn_bad_lines=True, error_bad_lines=True)
X = np.asarray(X.values, dtype = float)
col_mean = stats.nanmean(X,axis=0)
inds = np.where(np.isnan(X))
X[inds]=np.take(col_mean,inds[1])
labels = np.asarray(X[:,-1], dtype = float)
data = np.asarray(X[:,1:-4], dtype = float)
return data, labels
def createSub(clf, traindata, labels, testdata):
sub = 1
labels = np.asarray(map(int,labels))
niter = 10
auc_list = []
mean_auc = 0.0; itr = 0
if sub == 1:
xtrain = traindata#[train]
xtest = testdata#[test]
ytrain = labels#[train]
predsorig = np.asarray([0] * testdata.shape[0]) #np.copy(ytest)
labelsP = []
for i in range(len(labels)):
if labels[i] > 0:
labelsP.append(1)
else:
labelsP.append(0)
labelsP = np.asarray(labelsP)
ytrainP = labelsP
lsvc = LinearSVC(C=0.01, penalty="l1", dual=False, verbose = 2)
lsvc.fit(xtrain, ytrainP)
xtrainP = lsvc.transform(xtrain)
xtestP = lsvc.transform(xtest)
clf.fit(xtrainP,ytrainP)
predsP = clf.predict(xtestP)
nztrain = np.where(ytrainP > 0)[0]
nztest = np.where(predsP == 1)[0]
nztrain0 = np.where(ytrainP == 0)[0]
nztest0 = np.where(predsP == 0)[0]
xtrainP = xtrain[nztrain]
xtestP = xtest[nztest]
ytrain0 = ytrain[nztrain0]
ytrain1 = ytrain[nztrain]
clf.fit(xtrainP,ytrain1)
preds = clf.predict(xtestP)
predsorig[nztest] = preds
predsorig[nztest0] = 0
np.savetxt('predictions.csv',predsorig ,delimiter = ',', fmt = '%d')
if __name__ == '__main__':
filename = 'trainv2.csv'
X_test = testdata('testv2.csv')
X, labels = data(filename)
clf = lm.LogisticRegression(penalty='l2', dual=True, tol=0.0001,
C=1.0, fit_intercept=True, intercept_scaling=1.0,
class_weight=None, random_state=None)
X = preprocessing.scale(X)
X_test = preprocessing.scale(X_test)
createSub(clf, X, labels, X_test)
"beating the benchmark", huh? u gonna git a letter from my lawyer :P
ReplyDeleteHaha.. dont worry. u inspired me :P
ReplyDeletenice work ....
ReplyDeletebut when i try to replicate this on my machine...
I get a Memory error in python , I'm running this on a 4GB VM
I have tried increasing the resource by importing the resource module and setting limits as 4gb,
but no luck....
is this a python thing( which I don't believe it should be ) ,but if it's a machine issue
isn't 4gb of ram enough
I have a mac osx with 8 gb ram and this code works fine. If you are facing memory problems, try to load the data in chunks using pandas.
ReplyDelete
ReplyDeleteEVERYBODY READ THIS TESTIMONY ON HOW I GOT MY LOAN FROM A LEGIT AND TRUSTED LOAN COMPANY My name is Kjerstin Lis, I have been searching for a loan to settle my debts, everyone I met scammed and took my money until I finally met Mr, Pedro He was able to give me a loan of R 450,000.00.He also helped some other colleagues of mine. i am talking as the happiest person in the whole wide world today and i told myself that any lender that rescue my family from our poor situation, i will tell the name to the whole wide world and i am so happy to say that my family is back for good because i was in need a loan to start my life all over as i am a single mum with 3 kids and the whole world seemed like it was hanging on me until i meant the GOD sent loan lender that changed my life and that of my family, a GOD fearing lender, Mr, Pedro, he was the Savior GOD sent to rescue my family and at first i thought it was not going to be possible until i received my loan, i invited him over to my family get-together party which he did not decline and i will advise any one who is in genuine need of a loan to contact Mr, Pedro via email at (pedroloanss@gmail.com ) because he is the most understanding and kind hearten lender I have ever met with a caring heart. He doesn't know that I am doing this by spreading his goodwill towards me but I feel I should share this with you all. Contact the right loan company Email via: pedroloanss@gmail.com