0% found this document useful (0 votes)
56 views

Aiml Ex 4-7

This document contains code for several machine learning algorithms including linear regression, logistic regression, decision trees, random forests, and support vector machines (SVM). It includes code to import datasets, split data into training and test sets, fit models to training data, make predictions on test data, and evaluate model performance through metrics like accuracy. Visualizations like plots of regression lines and decision boundaries are also generated.

Uploaded by

Lakshmi Dheeba K
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
56 views

Aiml Ex 4-7

This document contains code for several machine learning algorithms including linear regression, logistic regression, decision trees, random forests, and support vector machines (SVM). It includes code to import datasets, split data into training and test sets, fit models to training data, make predictions on test data, and evaluate model performance through metrics like accuracy. Visualizations like plots of regression lines and decision boundaries are also generated.

Uploaded by

Lakshmi Dheeba K
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 8

PROGRAM:

# Importing library
import math
import random
import csv
# the categorical class names are changed to numberic data
# eg: yes and no encoded to 1 and 0
def encode_class(mydata):
classes = []
for i in range(len(mydata)):
if mydata[i][-1] not in classes:
classes.append(mydata[i][-1])
for i in range(len(classes)):
for j in range(len(mydata)):
if mydata[j][-1] == classes[i]:
mydata[j][-1] = i
return mydata
# Splitting the data
def splitting(mydata, ratio):
train_num = int(len(mydata) * ratio)
train = []
# initially testset will have all the dataset
test = list(mydata)
while len(train) < train_num:
# index generated randomly from range 0
# to length of testset
index = random.randrange(len(test))
# from testset, pop data rows and put it in train
train.append(test.pop(index))
return train, test
# Group the data rows under each class yes or
# no in dictionary eg: dict[yes] and dict[no]
def groupUnderClass(mydata):
dict = {}
for i in range(len(mydata)):
if (mydata[i][-1] not in dict):
dict[mydata[i][-1]] = []
dict[mydata[i][-1]].append(mydata[i])
return dict
# Calculating Mean
def mean(numbers):
return sum(numbers) / float(len(numbers))
# Calculating Standard Deviation
def std_dev(numbers):
avg = mean(numbers)
variance = sum([pow(x - avg, 2) for x in numbers]) / float(len(numbers) - 1)
return math.sqrt(variance)
def MeanAndStdDev(mydata):
info = [(mean(attribute), std_dev(attribute)) for attribute in zip(*mydata)]
# eg: list = [ [a, b, c], [m, n, o], [x, y, z]]
# here mean of 1st attribute =(a + m+x), mean of 2nd attribute = (b + n+y)/3
# delete summaries of last class
del info[-1]
return info
# find Mean and Standard Deviation under each class
def MeanAndStdDevForClass(mydata):
info = {}
dict = groupUnderClass(mydata)
for classValue, instances in dict.items():
info[classValue] = MeanAndStdDev(instances)
return info
# Calculate Gaussian Probability Density Function
def calculateGaussianProbability(x, mean, stdev):
expo = math.exp(-(math.pow(x - mean, 2) / (2 * math.pow(stdev, 2))))
return (1 / (math.sqrt(2 * math.pi) * stdev)) * expo
# Calculate Class Probabilities
def calculateClassProbabilities(info, test):
probabilities = {}
for classValue, classSummaries in info.items():
probabilities[classValue] = 1
for i in range(len(classSummaries)):
mean, std_dev = classSummaries[i]
x = test[i]
probabilities[classValue] *= calculateGaussianProbability(x, mean, std_dev)
return probabilities
# Make prediction - highest probability is the prediction
def predict(info, test):
probabilities = calculateClassProbabilities(info, test)
bestLabel, bestProb = None, -1
for classValue, probability in probabilities.items():
if bestLabel is None or probability > bestProb:
bestProb = probability
bestLabel = classValue
return bestLabel
# returns predictions for a set of examples
def getPredictions(info, test):
predictions = []
for i in range(len(test)):
result = predict(info, test[i])
predictions.append(result)
return predictions
# Accuracy score
def accuracy_rate(test, predictions):
correct = 0
for i in range(len(test)):
if test[i][-1] == predictions[i]:
correct += 1
return (correct / float(len(test))) * 100.0
# driver code
# add the data path in your system
filename = r'E:\pythonProject1\pima-indians-diabetes.csv'
# load the file and store it in mydata list
mydata = csv.reader(open(filename, "rt"))
mydata = list(mydata)
mydata = encode_class(mydata)
for i in range(len(mydata)):
mydata[i] = [float(x) for x in mydata[i]]
# split ratio = 0.7
# 70% of data is training data and 30% is test data used for testing
ratio = 0.7
train_data, test_data = splitting(mydata, ratio)
print('Total number of examples are: ', len(mydata))
print('Out of these, training examples are: ', len(train_data))
print("Test examples are: ", len(test_data))
# prepare model
info = MeanAndStdDevForClass(train_data)
# test model
predictions = getPredictions(info, test_data)
accuracy = accuracy_rate(test_data, predictions)
print("Accuracy of your model is: ", accuracy)
PROGRAM:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
x = np.array([1, 2, 3, 4, 5]).reshape(-1, 1)
y = np.arr([2, 4, 6, 8, 10])
regressor = LinearRegression()
regressor.fit(x, y)
y_pred = regressor.predict(x)
print(‘Coefficients:’, regressor.coef_)
print(‘Intercept:’, regressor.intercept_)
plt.scatter(x, y, color = ‘black’)
plt.plot(x, y_pred, color = ‘blue’, linewidth = 3)
plt.xlabel(‘x’)
plt.ylabel(‘y’)
plt.show()
PROGRAM:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
X = np.array([[1, 2], [2, 3], [4, 5], [5, 6]])
y = np.array([0, 0, 1, 1])
classifier = LogisticRegression()
classifier.fit(X, y)
print(‘Coefficient:’, classifier.coef_)
print(‘Intercept:’, classifier.intercept_)
xx, yy = np.meshgrid(np.arange(0, 6, 0.01), np.arange(0, 8, 0.01))
z = classifier.predict(np.c_[xx.ravel(),yy.ravel()])
z = z.reshape(xx.shape)
plt.contourf(xx, yy, z, cmap = plt.cm.RdBu)
plt.scatter(X[:, 0], X[:, 1], c = y, cmap = plt.cm.RdBu_r, edgecolors = ‘k’)
plt.xlabel(‘Feature 1’)
plt.ylabel(‘Feature 2’)
plt.title(‘Logistic Regression’)
plt.show()
PROGRAM:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt
iris=load_iris()
clf = DecisionTreeClassifier(random_state = 0)
clf.fit(iris.data,iris.taget)
plot_tree(clf,filled=True)
plt.show()
PROGRAM:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrices import accuracy_score
import matplotlib.pyplot as plt
iris = load_iris()
df = pd.DataFrame(data = iris.data, columns = iris.feature_names)
df[‘target’] = iris.target
X_train, X_test, y_train, y_test = train_test_split(df[iris.feature_names], df[‘target’], test_size = 0.3)
rfc = RandomForestClassifier(n_estimators = 100, max_depth = 2, random_state = 0)
rfc.fit(X_train,y_train)
y_pred = rfc.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
importances = rfc.feature_importances_
indices = list(range(len(importances)))
plt.bar(indices, importances, color = ‘r’)
plt.xticks(indices, iris.feature_names, rotation = 90)
plt.title(‘Feature Importance’)
plt.show()
PROGRAM:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
# Load data
data = pd.read_csv("apples_and_oranges.csv")
# Classify predictors and target
X = data.iloc[:, 0:2].values
Y = data.iloc[:, 2].values
# Encode target labels
le = LabelEncoder()
Y = le.fit_transform(Y)
# Split data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=1)
# Initialize SVM classifier and fit training data
classifier = SVC(kernel='rbf', random_state=1)
classifier.fit(X_train, Y_train)
# Predict classes for test set
Y_pred = classifier.predict(X_test)
# Compute accuracy and confusion matrix
cm = confusion_matrix(Y_test, Y_pred)
accuracy = float(cm.diagonal().sum()) / len(Y_test)
print("Accuracy of SVM for the given dataset: ", accuracy)
# Plot decision boundary and data points
plt.figure(figsize=(7, 7))
X_set, Y_set = X_train, Y_train
X1, X2 = np.meshgrid(np.arange(start=X_set[:, 0].min()-1, stop=X_set[:, 0].max()+1, step=0.01),
np.arange(start=X_set[:, 1].min()-1, stop=X_set[:, 1].max()+1, step=0.01))
Z = classifier.predict(np.array([X1.ravel(), X2.ravel()]).T)
Z = Z.reshape(X1.shape)
plt.contourf(X1, X2, Z, alpha=0.75, cmap=ListedColormap(('black', 'white')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(Y_set)):
plt.scatter(X_set[Y_set == j, 0], X_set[Y_set == j, 1], color=ListedColormap(('red', 'orange'))(i), label=j)
plt.title('Apples Vs Oranges')
plt.xlabel('Weight in grams')
plt.ylabel('Size in cm')
plt.legend()
plt.show()

You might also like