mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-07-26 01:43:17 +08:00
Added Dequeue in Python
This commit is contained in:
139
machine_learning/decision_tree.py
Normal file
139
machine_learning/decision_tree.py
Normal file
@ -0,0 +1,139 @@
|
||||
"""
|
||||
Implementation of a basic regression decision tree.
|
||||
Input data set: The input data set must be 1-dimensional with continuous labels.
|
||||
Output: The decision tree maps a real number input to a real number output.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
class Decision_Tree:
|
||||
def __init__(self, depth = 5, min_leaf_size = 5):
|
||||
self.depth = depth
|
||||
self.decision_boundary = 0
|
||||
self.left = None
|
||||
self.right = None
|
||||
self.min_leaf_size = min_leaf_size
|
||||
self.prediction = None
|
||||
|
||||
def mean_squared_error(self, labels, prediction):
|
||||
"""
|
||||
mean_squared_error:
|
||||
@param labels: a one dimensional numpy array
|
||||
@param prediction: a floating point value
|
||||
return value: mean_squared_error calculates the error if prediction is used to estimate the labels
|
||||
"""
|
||||
if labels.ndim != 1:
|
||||
print("Error: Input labels must be one dimensional")
|
||||
|
||||
return np.mean((labels - prediction) ** 2)
|
||||
|
||||
def train(self, X, y):
|
||||
"""
|
||||
train:
|
||||
@param X: a one dimensional numpy array
|
||||
@param y: a one dimensional numpy array.
|
||||
The contents of y are the labels for the corresponding X values
|
||||
|
||||
train does not have a return value
|
||||
"""
|
||||
|
||||
"""
|
||||
this section is to check that the inputs conform to our dimensionality constraints
|
||||
"""
|
||||
if X.ndim != 1:
|
||||
print("Error: Input data set must be one dimensional")
|
||||
return
|
||||
if len(X) != len(y):
|
||||
print("Error: X and y have different lengths")
|
||||
return
|
||||
if y.ndim != 1:
|
||||
print("Error: Data set labels must be one dimensional")
|
||||
return
|
||||
|
||||
if len(X) < 2 * self.min_leaf_size:
|
||||
self.prediction = np.mean(y)
|
||||
return
|
||||
|
||||
if self.depth == 1:
|
||||
self.prediction = np.mean(y)
|
||||
return
|
||||
|
||||
best_split = 0
|
||||
min_error = self.mean_squared_error(X,np.mean(y)) * 2
|
||||
|
||||
|
||||
"""
|
||||
loop over all possible splits for the decision tree. find the best split.
|
||||
if no split exists that is less than 2 * error for the entire array
|
||||
then the data set is not split and the average for the entire array is used as the predictor
|
||||
"""
|
||||
for i in range(len(X)):
|
||||
if len(X[:i]) < self.min_leaf_size:
|
||||
continue
|
||||
elif len(X[i:]) < self.min_leaf_size:
|
||||
continue
|
||||
else:
|
||||
error_left = self.mean_squared_error(X[:i], np.mean(y[:i]))
|
||||
error_right = self.mean_squared_error(X[i:], np.mean(y[i:]))
|
||||
error = error_left + error_right
|
||||
if error < min_error:
|
||||
best_split = i
|
||||
min_error = error
|
||||
|
||||
if best_split != 0:
|
||||
left_X = X[:best_split]
|
||||
left_y = y[:best_split]
|
||||
right_X = X[best_split:]
|
||||
right_y = y[best_split:]
|
||||
|
||||
self.decision_boundary = X[best_split]
|
||||
self.left = Decision_Tree(depth = self.depth - 1, min_leaf_size = self.min_leaf_size)
|
||||
self.right = Decision_Tree(depth = self.depth - 1, min_leaf_size = self.min_leaf_size)
|
||||
self.left.train(left_X, left_y)
|
||||
self.right.train(right_X, right_y)
|
||||
else:
|
||||
self.prediction = np.mean(y)
|
||||
|
||||
return
|
||||
|
||||
def predict(self, x):
|
||||
"""
|
||||
predict:
|
||||
@param x: a floating point value to predict the label of
|
||||
the prediction function works by recursively calling the predict function
|
||||
of the appropriate subtrees based on the tree's decision boundary
|
||||
"""
|
||||
if self.prediction is not None:
|
||||
return self.prediction
|
||||
elif self.left or self.right is not None:
|
||||
if x >= self.decision_boundary:
|
||||
return self.right.predict(x)
|
||||
else:
|
||||
return self.left.predict(x)
|
||||
else:
|
||||
print("Error: Decision tree not yet trained")
|
||||
return None
|
||||
|
||||
def main():
|
||||
"""
|
||||
In this demonstration we're generating a sample data set from the sin function in numpy.
|
||||
We then train a decision tree on the data set and use the decision tree to predict the
|
||||
label of 10 different test values. Then the mean squared error over this test is displayed.
|
||||
"""
|
||||
X = np.arange(-1., 1., 0.005)
|
||||
y = np.sin(X)
|
||||
|
||||
tree = Decision_Tree(depth = 10, min_leaf_size = 10)
|
||||
tree.train(X,y)
|
||||
|
||||
test_cases = (np.random.rand(10) * 2) - 1
|
||||
predictions = np.array([tree.predict(x) for x in test_cases])
|
||||
avg_error = np.mean((predictions - test_cases) ** 2)
|
||||
|
||||
print("Test values: " + str(test_cases))
|
||||
print("Predictions: " + str(predictions))
|
||||
print("Average error: " + str(avg_error))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
121
machine_learning/gradient_descent.py
Normal file
121
machine_learning/gradient_descent.py
Normal file
@ -0,0 +1,121 @@
|
||||
"""
|
||||
Implementation of gradient descent algorithm for minimizing cost of a linear hypothesis function.
|
||||
"""
|
||||
import numpy
|
||||
|
||||
# List of input, output pairs
|
||||
train_data = (((5, 2, 3), 15), ((6, 5, 9), 25),
|
||||
((11, 12, 13), 41), ((1, 1, 1), 8), ((11, 12, 13), 41))
|
||||
test_data = (((515, 22, 13), 555), ((61, 35, 49), 150))
|
||||
parameter_vector = [2, 4, 1, 5]
|
||||
m = len(train_data)
|
||||
LEARNING_RATE = 0.009
|
||||
|
||||
|
||||
def _error(example_no, data_set='train'):
|
||||
"""
|
||||
:param data_set: train data or test data
|
||||
:param example_no: example number whose error has to be checked
|
||||
:return: error in example pointed by example number.
|
||||
"""
|
||||
return calculate_hypothesis_value(example_no, data_set) - output(example_no, data_set)
|
||||
|
||||
|
||||
def _hypothesis_value(data_input_tuple):
|
||||
"""
|
||||
Calculates hypothesis function value for a given input
|
||||
:param data_input_tuple: Input tuple of a particular example
|
||||
:return: Value of hypothesis function at that point.
|
||||
Note that there is an 'biased input' whose value is fixed as 1.
|
||||
It is not explicitly mentioned in input data.. But, ML hypothesis functions use it.
|
||||
So, we have to take care of it separately. Line 36 takes care of it.
|
||||
"""
|
||||
hyp_val = 0
|
||||
for i in range(len(parameter_vector) - 1):
|
||||
hyp_val += data_input_tuple[i]*parameter_vector[i+1]
|
||||
hyp_val += parameter_vector[0]
|
||||
return hyp_val
|
||||
|
||||
|
||||
def output(example_no, data_set):
|
||||
"""
|
||||
:param data_set: test data or train data
|
||||
:param example_no: example whose output is to be fetched
|
||||
:return: output for that example
|
||||
"""
|
||||
if data_set == 'train':
|
||||
return train_data[example_no][1]
|
||||
elif data_set == 'test':
|
||||
return test_data[example_no][1]
|
||||
|
||||
|
||||
def calculate_hypothesis_value(example_no, data_set):
|
||||
"""
|
||||
Calculates hypothesis value for a given example
|
||||
:param data_set: test data or train_data
|
||||
:param example_no: example whose hypothesis value is to be calculated
|
||||
:return: hypothesis value for that example
|
||||
"""
|
||||
if data_set == "train":
|
||||
return _hypothesis_value(train_data[example_no][0])
|
||||
elif data_set == "test":
|
||||
return _hypothesis_value(test_data[example_no][0])
|
||||
|
||||
|
||||
def summation_of_cost_derivative(index, end=m):
|
||||
"""
|
||||
Calculates the sum of cost function derivative
|
||||
:param index: index wrt derivative is being calculated
|
||||
:param end: value where summation ends, default is m, number of examples
|
||||
:return: Returns the summation of cost derivative
|
||||
Note: If index is -1, this means we are calculating summation wrt to biased parameter.
|
||||
"""
|
||||
summation_value = 0
|
||||
for i in range(end):
|
||||
if index == -1:
|
||||
summation_value += _error(i)
|
||||
else:
|
||||
summation_value += _error(i)*train_data[i][0][index]
|
||||
return summation_value
|
||||
|
||||
|
||||
def get_cost_derivative(index):
|
||||
"""
|
||||
:param index: index of the parameter vector wrt to derivative is to be calculated
|
||||
:return: derivative wrt to that index
|
||||
Note: If index is -1, this means we are calculating summation wrt to biased parameter.
|
||||
"""
|
||||
cost_derivative_value = summation_of_cost_derivative(index, m)/m
|
||||
return cost_derivative_value
|
||||
|
||||
|
||||
def run_gradient_descent():
|
||||
global parameter_vector
|
||||
# Tune these values to set a tolerance value for predicted output
|
||||
absolute_error_limit = 0.000002
|
||||
relative_error_limit = 0
|
||||
j = 0
|
||||
while True:
|
||||
j += 1
|
||||
temp_parameter_vector = [0, 0, 0, 0]
|
||||
for i in range(0, len(parameter_vector)):
|
||||
cost_derivative = get_cost_derivative(i-1)
|
||||
temp_parameter_vector[i] = parameter_vector[i] - \
|
||||
LEARNING_RATE*cost_derivative
|
||||
if numpy.allclose(parameter_vector, temp_parameter_vector,
|
||||
atol=absolute_error_limit, rtol=relative_error_limit):
|
||||
break
|
||||
parameter_vector = temp_parameter_vector
|
||||
print("Number of iterations:", j)
|
||||
|
||||
|
||||
def test_gradient_descent():
|
||||
for i in range(len(test_data)):
|
||||
print("Actual output value:", output(i, 'test'))
|
||||
print("Hypothesis output:", calculate_hypothesis_value(i, 'test'))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run_gradient_descent()
|
||||
print("\nTesting gradient descent for a linear hypothesis function.\n")
|
||||
test_gradient_descent()
|
172
machine_learning/k_means_clust.py
Normal file
172
machine_learning/k_means_clust.py
Normal file
@ -0,0 +1,172 @@
|
||||
'''README, Author - Anurag Kumar(mailto:anuragkumarak95@gmail.com)
|
||||
|
||||
Requirements:
|
||||
- sklearn
|
||||
- numpy
|
||||
- matplotlib
|
||||
|
||||
Python:
|
||||
- 3.5
|
||||
|
||||
Inputs:
|
||||
- X , a 2D numpy array of features.
|
||||
- k , number of clusters to create.
|
||||
- initial_centroids , initial centroid values generated by utility function(mentioned in usage).
|
||||
- maxiter , maximum number of iterations to process.
|
||||
- heterogeneity , empty list that will be filled with hetrogeneity values if passed to kmeans func.
|
||||
|
||||
Usage:
|
||||
1. define 'k' value, 'X' features array and 'hetrogeneity' empty list
|
||||
|
||||
2. create initial_centroids,
|
||||
initial_centroids = get_initial_centroids(
|
||||
X,
|
||||
k,
|
||||
seed=0 # seed value for initial centroid generation, None for randomness(default=None)
|
||||
)
|
||||
|
||||
3. find centroids and clusters using kmeans function.
|
||||
|
||||
centroids, cluster_assignment = kmeans(
|
||||
X,
|
||||
k,
|
||||
initial_centroids,
|
||||
maxiter=400,
|
||||
record_heterogeneity=heterogeneity,
|
||||
verbose=True # whether to print logs in console or not.(default=False)
|
||||
)
|
||||
|
||||
|
||||
4. Plot the loss function, hetrogeneity values for every iteration saved in hetrogeneity list.
|
||||
plot_heterogeneity(
|
||||
heterogeneity,
|
||||
k
|
||||
)
|
||||
|
||||
5. Have fun..
|
||||
|
||||
'''
|
||||
from sklearn.metrics import pairwise_distances
|
||||
import numpy as np
|
||||
|
||||
TAG = 'K-MEANS-CLUST/ '
|
||||
|
||||
def get_initial_centroids(data, k, seed=None):
|
||||
'''Randomly choose k data points as initial centroids'''
|
||||
if seed is not None: # useful for obtaining consistent results
|
||||
np.random.seed(seed)
|
||||
n = data.shape[0] # number of data points
|
||||
|
||||
# Pick K indices from range [0, N).
|
||||
rand_indices = np.random.randint(0, n, k)
|
||||
|
||||
# Keep centroids as dense format, as many entries will be nonzero due to averaging.
|
||||
# As long as at least one document in a cluster contains a word,
|
||||
# it will carry a nonzero weight in the TF-IDF vector of the centroid.
|
||||
centroids = data[rand_indices,:]
|
||||
|
||||
return centroids
|
||||
|
||||
def centroid_pairwise_dist(X,centroids):
|
||||
return pairwise_distances(X,centroids,metric='euclidean')
|
||||
|
||||
def assign_clusters(data, centroids):
|
||||
|
||||
# Compute distances between each data point and the set of centroids:
|
||||
# Fill in the blank (RHS only)
|
||||
distances_from_centroids = centroid_pairwise_dist(data,centroids)
|
||||
|
||||
# Compute cluster assignments for each data point:
|
||||
# Fill in the blank (RHS only)
|
||||
cluster_assignment = np.argmin(distances_from_centroids,axis=1)
|
||||
|
||||
return cluster_assignment
|
||||
|
||||
def revise_centroids(data, k, cluster_assignment):
|
||||
new_centroids = []
|
||||
for i in range(k):
|
||||
# Select all data points that belong to cluster i. Fill in the blank (RHS only)
|
||||
member_data_points = data[cluster_assignment==i]
|
||||
# Compute the mean of the data points. Fill in the blank (RHS only)
|
||||
centroid = member_data_points.mean(axis=0)
|
||||
new_centroids.append(centroid)
|
||||
new_centroids = np.array(new_centroids)
|
||||
|
||||
return new_centroids
|
||||
|
||||
def compute_heterogeneity(data, k, centroids, cluster_assignment):
|
||||
|
||||
heterogeneity = 0.0
|
||||
for i in range(k):
|
||||
|
||||
# Select all data points that belong to cluster i. Fill in the blank (RHS only)
|
||||
member_data_points = data[cluster_assignment==i, :]
|
||||
|
||||
if member_data_points.shape[0] > 0: # check if i-th cluster is non-empty
|
||||
# Compute distances from centroid to data points (RHS only)
|
||||
distances = pairwise_distances(member_data_points, [centroids[i]], metric='euclidean')
|
||||
squared_distances = distances**2
|
||||
heterogeneity += np.sum(squared_distances)
|
||||
|
||||
return heterogeneity
|
||||
|
||||
from matplotlib import pyplot as plt
|
||||
def plot_heterogeneity(heterogeneity, k):
|
||||
plt.figure(figsize=(7,4))
|
||||
plt.plot(heterogeneity, linewidth=4)
|
||||
plt.xlabel('# Iterations')
|
||||
plt.ylabel('Heterogeneity')
|
||||
plt.title('Heterogeneity of clustering over time, K={0:d}'.format(k))
|
||||
plt.rcParams.update({'font.size': 16})
|
||||
plt.show()
|
||||
|
||||
def kmeans(data, k, initial_centroids, maxiter=500, record_heterogeneity=None, verbose=False):
|
||||
'''This function runs k-means on given data and initial set of centroids.
|
||||
maxiter: maximum number of iterations to run.(default=500)
|
||||
record_heterogeneity: (optional) a list, to store the history of heterogeneity as function of iterations
|
||||
if None, do not store the history.
|
||||
verbose: if True, print how many data points changed their cluster labels in each iteration'''
|
||||
centroids = initial_centroids[:]
|
||||
prev_cluster_assignment = None
|
||||
|
||||
for itr in range(maxiter):
|
||||
if verbose:
|
||||
print(itr, end='')
|
||||
|
||||
# 1. Make cluster assignments using nearest centroids
|
||||
cluster_assignment = assign_clusters(data,centroids)
|
||||
|
||||
# 2. Compute a new centroid for each of the k clusters, averaging all data points assigned to that cluster.
|
||||
centroids = revise_centroids(data,k, cluster_assignment)
|
||||
|
||||
# Check for convergence: if none of the assignments changed, stop
|
||||
if prev_cluster_assignment is not None and \
|
||||
(prev_cluster_assignment==cluster_assignment).all():
|
||||
break
|
||||
|
||||
# Print number of new assignments
|
||||
if prev_cluster_assignment is not None:
|
||||
num_changed = np.sum(prev_cluster_assignment!=cluster_assignment)
|
||||
if verbose:
|
||||
print(' {0:5d} elements changed their cluster assignment.'.format(num_changed))
|
||||
|
||||
# Record heterogeneity convergence metric
|
||||
if record_heterogeneity is not None:
|
||||
# YOUR CODE HERE
|
||||
score = compute_heterogeneity(data,k,centroids,cluster_assignment)
|
||||
record_heterogeneity.append(score)
|
||||
|
||||
prev_cluster_assignment = cluster_assignment[:]
|
||||
|
||||
return centroids, cluster_assignment
|
||||
|
||||
# Mock test below
|
||||
if False: # change to true to run this test case.
|
||||
import sklearn.datasets as ds
|
||||
dataset = ds.load_iris()
|
||||
k = 3
|
||||
heterogeneity = []
|
||||
initial_centroids = get_initial_centroids(dataset['data'], k, seed=0)
|
||||
centroids, cluster_assignment = kmeans(dataset['data'], k, initial_centroids, maxiter=400,
|
||||
record_heterogeneity=heterogeneity, verbose=True)
|
||||
plot_heterogeneity(heterogeneity, k)
|
108
machine_learning/linear_regression.py
Normal file
108
machine_learning/linear_regression.py
Normal file
@ -0,0 +1,108 @@
|
||||
"""
|
||||
Linear regression is the most basic type of regression commonly used for
|
||||
predictive analysis. The idea is preety simple, we have a dataset and we have
|
||||
a feature's associated with it. The Features should be choose very cautiously
|
||||
as they determine, how much our model will be able to make future predictions.
|
||||
We try to set these Feature weights, over many iterations, so that they best
|
||||
fits our dataset. In this particular code, i had used a CSGO dataset (ADR vs
|
||||
Rating). We try to best fit a line through dataset and estimate the parameters.
|
||||
"""
|
||||
|
||||
import requests
|
||||
import numpy as np
|
||||
|
||||
|
||||
def collect_dataset():
|
||||
""" Collect dataset of CSGO
|
||||
The dataset contains ADR vs Rating of a Player
|
||||
:return : dataset obtained from the link, as matrix
|
||||
"""
|
||||
response = requests.get('https://raw.githubusercontent.com/yashLadha/' +
|
||||
'The_Math_of_Intelligence/master/Week1/ADRvs' +
|
||||
'Rating.csv')
|
||||
lines = response.text.splitlines()
|
||||
data = []
|
||||
for item in lines:
|
||||
item = item.split(',')
|
||||
data.append(item)
|
||||
data.pop(0) # This is for removing the labels from the list
|
||||
dataset = np.matrix(data)
|
||||
return dataset
|
||||
|
||||
|
||||
def run_steep_gradient_descent(data_x, data_y,
|
||||
len_data, alpha, theta):
|
||||
""" Run steep gradient descent and updates the Feature vector accordingly_
|
||||
:param data_x : contains the dataset
|
||||
:param data_y : contains the output associated with each data-entry
|
||||
:param len_data : length of the data_
|
||||
:param alpha : Learning rate of the model
|
||||
:param theta : Feature vector (weight's for our model)
|
||||
;param return : Updated Feature's, using
|
||||
curr_features - alpha_ * gradient(w.r.t. feature)
|
||||
"""
|
||||
n = len_data
|
||||
|
||||
prod = np.dot(theta, data_x.transpose())
|
||||
prod -= data_y.transpose()
|
||||
sum_grad = np.dot(prod, data_x)
|
||||
theta = theta - (alpha / n) * sum_grad
|
||||
return theta
|
||||
|
||||
|
||||
def sum_of_square_error(data_x, data_y, len_data, theta):
|
||||
""" Return sum of square error for error calculation
|
||||
:param data_x : contains our dataset
|
||||
:param data_y : contains the output (result vector)
|
||||
:param len_data : len of the dataset
|
||||
:param theta : contains the feature vector
|
||||
:return : sum of square error computed from given feature's
|
||||
"""
|
||||
error = 0.0
|
||||
prod = np.dot(theta, data_x.transpose())
|
||||
prod -= data_y.transpose()
|
||||
sum_elem = np.sum(np.square(prod))
|
||||
error = sum_elem / (2 * len_data)
|
||||
return error
|
||||
|
||||
|
||||
def run_linear_regression(data_x, data_y):
|
||||
""" Implement Linear regression over the dataset
|
||||
:param data_x : contains our dataset
|
||||
:param data_y : contains the output (result vector)
|
||||
:return : feature for line of best fit (Feature vector)
|
||||
"""
|
||||
iterations = 100000
|
||||
alpha = 0.0001550
|
||||
|
||||
no_features = data_x.shape[1]
|
||||
len_data = data_x.shape[0] - 1
|
||||
|
||||
theta = np.zeros((1, no_features))
|
||||
|
||||
for i in range(0, iterations):
|
||||
theta = run_steep_gradient_descent(data_x, data_y,
|
||||
len_data, alpha, theta)
|
||||
error = sum_of_square_error(data_x, data_y, len_data, theta)
|
||||
print('At Iteration %d - Error is %.5f ' % (i + 1, error))
|
||||
|
||||
return theta
|
||||
|
||||
|
||||
def main():
|
||||
""" Driver function """
|
||||
data = collect_dataset()
|
||||
|
||||
len_data = data.shape[0]
|
||||
data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float)
|
||||
data_y = data[:, -1].astype(float)
|
||||
|
||||
theta = run_linear_regression(data_x, data_y)
|
||||
len_result = theta.shape[1]
|
||||
print('Resultant Feature vector : ')
|
||||
for i in range(0, len_result):
|
||||
print('%.5f' % (theta[0, i]))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
123
machine_learning/perceptron.py
Normal file
123
machine_learning/perceptron.py
Normal file
@ -0,0 +1,123 @@
|
||||
'''
|
||||
|
||||
Perceptron
|
||||
w = w + N * (d(k) - y) * x(k)
|
||||
|
||||
Using perceptron network for oil analysis,
|
||||
with Measuring of 3 parameters that represent chemical characteristics we can classify the oil, in p1 or p2
|
||||
p1 = -1
|
||||
p2 = 1
|
||||
|
||||
'''
|
||||
|
||||
import random
|
||||
|
||||
|
||||
class Perceptron:
|
||||
def __init__(self, sample, exit, learn_rate=0.01, epoch_number=1000, bias=-1):
|
||||
self.sample = sample
|
||||
self.exit = exit
|
||||
self.learn_rate = learn_rate
|
||||
self.epoch_number = epoch_number
|
||||
self.bias = bias
|
||||
self.number_sample = len(sample)
|
||||
self.col_sample = len(sample[0])
|
||||
self.weight = []
|
||||
|
||||
def trannig(self):
|
||||
for sample in self.sample:
|
||||
sample.insert(0, self.bias)
|
||||
|
||||
for i in range(self.col_sample):
|
||||
self.weight.append(random.random())
|
||||
|
||||
self.weight.insert(0, self.bias)
|
||||
|
||||
epoch_count = 0
|
||||
|
||||
while True:
|
||||
erro = False
|
||||
for i in range(self.number_sample):
|
||||
u = 0
|
||||
for j in range(self.col_sample + 1):
|
||||
u = u + self.weight[j] * self.sample[i][j]
|
||||
y = self.sign(u)
|
||||
if y != self.exit[i]:
|
||||
|
||||
for j in range(self.col_sample + 1):
|
||||
|
||||
self.weight[j] = self.weight[j] + self.learn_rate * (self.exit[i] - y) * self.sample[i][j]
|
||||
erro = True
|
||||
#print('Epoch: \n',epoch_count)
|
||||
epoch_count = epoch_count + 1
|
||||
# if you want controle the epoch or just by erro
|
||||
if erro == False:
|
||||
print('\nEpoch:\n',epoch_count)
|
||||
print('------------------------\n')
|
||||
#if epoch_count > self.epoch_number or not erro:
|
||||
break
|
||||
|
||||
def sort(self, sample):
|
||||
sample.insert(0, self.bias)
|
||||
u = 0
|
||||
for i in range(self.col_sample + 1):
|
||||
u = u + self.weight[i] * sample[i]
|
||||
|
||||
y = self.sign(u)
|
||||
|
||||
if y == -1:
|
||||
print('Sample: ', sample)
|
||||
print('classification: P1')
|
||||
else:
|
||||
print('Sample: ', sample)
|
||||
print('classification: P2')
|
||||
|
||||
def sign(self, u):
|
||||
return 1 if u >= 0 else -1
|
||||
|
||||
|
||||
samples = [
|
||||
[-0.6508, 0.1097, 4.0009],
|
||||
[-1.4492, 0.8896, 4.4005],
|
||||
[2.0850, 0.6876, 12.0710],
|
||||
[0.2626, 1.1476, 7.7985],
|
||||
[0.6418, 1.0234, 7.0427],
|
||||
[0.2569, 0.6730, 8.3265],
|
||||
[1.1155, 0.6043, 7.4446],
|
||||
[0.0914, 0.3399, 7.0677],
|
||||
[0.0121, 0.5256, 4.6316],
|
||||
[-0.0429, 0.4660, 5.4323],
|
||||
[0.4340, 0.6870, 8.2287],
|
||||
[0.2735, 1.0287, 7.1934],
|
||||
[0.4839, 0.4851, 7.4850],
|
||||
[0.4089, -0.1267, 5.5019],
|
||||
[1.4391, 0.1614, 8.5843],
|
||||
[-0.9115, -0.1973, 2.1962],
|
||||
[0.3654, 1.0475, 7.4858],
|
||||
[0.2144, 0.7515, 7.1699],
|
||||
[0.2013, 1.0014, 6.5489],
|
||||
[0.6483, 0.2183, 5.8991],
|
||||
[-0.1147, 0.2242, 7.2435],
|
||||
[-0.7970, 0.8795, 3.8762],
|
||||
[-1.0625, 0.6366, 2.4707],
|
||||
[0.5307, 0.1285, 5.6883],
|
||||
[-1.2200, 0.7777, 1.7252],
|
||||
[0.3957, 0.1076, 5.6623],
|
||||
[-0.1013, 0.5989, 7.1812],
|
||||
[2.4482, 0.9455, 11.2095],
|
||||
[2.0149, 0.6192, 10.9263],
|
||||
[0.2012, 0.2611, 5.4631]
|
||||
|
||||
]
|
||||
|
||||
exit = [-1, -1, -1, 1, 1, -1, 1, -1, 1, 1, -1, 1, -1, -1, -1, -1, 1, 1, 1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, 1]
|
||||
|
||||
network = Perceptron(sample=samples, exit = exit, learn_rate=0.01, epoch_number=1000, bias=-1)
|
||||
|
||||
network.trannig()
|
||||
|
||||
while True:
|
||||
sample = []
|
||||
for i in range(3):
|
||||
sample.insert(i, float(input('value: ')))
|
||||
network.sort(sample)
|
63
machine_learning/scoring_functions.py
Normal file
63
machine_learning/scoring_functions.py
Normal file
@ -0,0 +1,63 @@
|
||||
import numpy
|
||||
|
||||
""" Here I implemented the scoring functions.
|
||||
MAE, MSE, RMSE, RMSLE are included.
|
||||
|
||||
Those are used for calculating differences between
|
||||
predicted values and actual values.
|
||||
|
||||
Metrics are slightly differentiated. Sometimes squared, rooted,
|
||||
even log is used.
|
||||
|
||||
Using log and roots can be perceived as tools for penalizing big
|
||||
erors. However, using appropriate metrics depends on the situations,
|
||||
and types of data
|
||||
"""
|
||||
|
||||
#Mean Absolute Error
|
||||
def mae(predict, actual):
|
||||
predict = np.array(predict)
|
||||
actual = np.array(actual)
|
||||
|
||||
difference = abs(predict - actual)
|
||||
score = difference.mean()
|
||||
|
||||
return score
|
||||
|
||||
#Mean Squared Error
|
||||
def mse(predict, actual):
|
||||
predict = np.array(predict)
|
||||
actual = np.array(actual)
|
||||
|
||||
difference = predict - actual
|
||||
square_diff = np.square(difference)
|
||||
|
||||
score = square_diff.mean()
|
||||
return score
|
||||
|
||||
#Root Mean Squared Error
|
||||
def rmse(predict, actual):
|
||||
predict = np.array(predict)
|
||||
actual = np.array(actual)
|
||||
|
||||
difference = predict - actual
|
||||
square_diff = np.square(dfference)
|
||||
mean_square_diff = square_diff.mean()
|
||||
score = np.sqrt(mean_square_diff)
|
||||
return score
|
||||
|
||||
#Root Mean Square Logarithmic Error
|
||||
def rmsle(predict, actual):
|
||||
predict = np.array(predict)
|
||||
actual = np.array(actual)
|
||||
|
||||
log_predict = np.log(predict+1)
|
||||
log_actual = np.log(actual+1)
|
||||
|
||||
difference = log_predict - log_actual
|
||||
square_diff = np.square(difference)
|
||||
mean_square_diff = square_diff.mean()
|
||||
|
||||
score = np.sqrt(mean_square_diff)
|
||||
|
||||
return score
|
Reference in New Issue
Block a user