Added Dequeue in Python

2025-07-26 01:43:17 +08:00 · 2017-10-25 01:37:11 +05:30
commit 9bc80eac2d
105 changed files with 295341 additions and 0 deletions
--- a/machine_learning/decision_tree.py
+++ b/machine_learning/decision_tree.py
@ -0,0 +1,139 @@
+"""
+Implementation of a basic regression decision tree.
+Input data set: The input data set must be 1-dimensional with continuous labels.
+Output: The decision tree maps a real number input to a real number output. 
+"""
+
+import numpy as np
+
+class Decision_Tree:
+    def __init__(self, depth = 5, min_leaf_size = 5):
+        self.depth = depth
+        self.decision_boundary = 0
+        self.left = None
+        self.right = None
+        self.min_leaf_size = min_leaf_size
+        self.prediction = None
+
+    def mean_squared_error(self, labels, prediction):
+        """
+        mean_squared_error:
+        @param labels: a one dimensional numpy array 
+        @param prediction: a floating point value
+        return value: mean_squared_error calculates the error if prediction is used to estimate the labels
+        """
+        if labels.ndim != 1:
+            print("Error: Input labels must be one dimensional")
+
+        return np.mean((labels - prediction) ** 2)
+
+    def train(self, X, y):
+        """
+        train:
+        @param X: a one dimensional numpy array
+        @param y: a one dimensional numpy array. 
+        The contents of y are the labels for the corresponding X values
+
+        train does not have a return value
+        """
+
+        """
+        this section is to check that the inputs conform to our dimensionality constraints
+        """
+        if X.ndim != 1:
+            print("Error: Input data set must be one dimensional")
+            return
+        if len(X) != len(y):
+            print("Error: X and y have different lengths")
+            return
+        if y.ndim != 1:
+            print("Error: Data set labels must be one dimensional")
+            return
+
+        if len(X) < 2 * self.min_leaf_size:
+            self.prediction = np.mean(y)
+            return
+
+        if self.depth == 1:
+            self.prediction = np.mean(y)
+            return
+
+        best_split = 0
+        min_error = self.mean_squared_error(X,np.mean(y)) * 2
+
+
+        """
+        loop over all possible splits for the decision tree. find the best split.
+        if no split exists that is less than 2 * error for the entire array
+        then the data set is not split and the average for the entire array is used as the predictor
+        """
+        for i in range(len(X)):
+            if len(X[:i]) < self.min_leaf_size:
+                continue
+            elif len(X[i:]) < self.min_leaf_size:
+                continue
+            else:
+                error_left = self.mean_squared_error(X[:i], np.mean(y[:i]))
+                error_right = self.mean_squared_error(X[i:], np.mean(y[i:]))
+                error = error_left + error_right
+                if error < min_error:
+                    best_split = i
+                    min_error = error
+
+        if best_split != 0:
+            left_X = X[:best_split]
+            left_y = y[:best_split]
+            right_X = X[best_split:]
+            right_y = y[best_split:]
+
+            self.decision_boundary = X[best_split]
+            self.left = Decision_Tree(depth = self.depth - 1, min_leaf_size = self.min_leaf_size)
+            self.right = Decision_Tree(depth = self.depth - 1, min_leaf_size = self.min_leaf_size)
+            self.left.train(left_X, left_y)
+            self.right.train(right_X, right_y)
+        else:
+            self.prediction = np.mean(y)
+
+        return
+
+    def predict(self, x):
+        """
+        predict:
+        @param x: a floating point value to predict the label of
+        the prediction function works by recursively calling the predict function
+        of the appropriate subtrees based on the tree's decision boundary
+        """
+        if self.prediction is not None:
+            return self.prediction
+        elif self.left or self.right is not None:
+            if x >= self.decision_boundary:
+                return self.right.predict(x)
+            else:
+                return self.left.predict(x)
+        else:
+            print("Error: Decision tree not yet trained")
+            return None
+
+def main():
+    """
+    In this demonstration we're generating a sample data set from the sin function in numpy.
+    We then train a decision tree on the data set and use the decision tree to predict the
+    label of 10 different test values. Then the mean squared error over this test is displayed.
+    """
+    X = np.arange(-1., 1., 0.005)
+    y = np.sin(X)
+
+    tree = Decision_Tree(depth = 10, min_leaf_size = 10)
+    tree.train(X,y)
+
+    test_cases = (np.random.rand(10) * 2) - 1
+    predictions = np.array([tree.predict(x) for x in test_cases])
+    avg_error = np.mean((predictions - test_cases) ** 2)
+
+    print("Test values: " + str(test_cases))
+    print("Predictions: " + str(predictions))
+    print("Average error: " + str(avg_error))
+
+            
+if __name__ == '__main__':
+    main()
--- a/machine_learning/gradient_descent.py
+++ b/machine_learning/gradient_descent.py
@ -0,0 +1,121 @@
+"""
+Implementation of gradient descent algorithm for minimizing cost of a linear hypothesis function.
+"""
+import numpy
+
+# List of input, output pairs
+train_data = (((5, 2, 3), 15), ((6, 5, 9), 25),
+              ((11, 12, 13), 41), ((1, 1, 1), 8), ((11, 12, 13), 41))
+test_data = (((515, 22, 13), 555), ((61, 35, 49), 150))
+parameter_vector = [2, 4, 1, 5]
+m = len(train_data)
+LEARNING_RATE = 0.009
+
+
+def _error(example_no, data_set='train'):
+    """
+    :param data_set: train data or test data
+    :param example_no: example number whose error has to be checked
+    :return: error in example pointed by example number.
+    """
+    return calculate_hypothesis_value(example_no, data_set) - output(example_no, data_set)
+
+
+def _hypothesis_value(data_input_tuple):
+    """
+    Calculates hypothesis function value for a given input
+    :param data_input_tuple: Input tuple of a particular example
+    :return: Value of hypothesis function at that point.
+    Note that there is an 'biased input' whose value is fixed as 1.
+    It is not explicitly mentioned in input data.. But, ML hypothesis functions use it.
+    So, we have to take care of it separately. Line 36 takes care of it.
+    """
+    hyp_val = 0
+    for i in range(len(parameter_vector) - 1):
+        hyp_val += data_input_tuple[i]*parameter_vector[i+1]
+    hyp_val += parameter_vector[0]
+    return hyp_val
+
+
+def output(example_no, data_set):
+    """
+    :param data_set: test data or train data
+    :param example_no: example whose output is to be fetched
+    :return: output for that example
+    """
+    if data_set == 'train':
+        return train_data[example_no][1]
+    elif data_set == 'test':
+        return test_data[example_no][1]
+
+
+def calculate_hypothesis_value(example_no, data_set):
+    """
+    Calculates hypothesis value for a given example
+    :param data_set: test data or train_data
+    :param example_no: example whose hypothesis value is to be calculated
+    :return: hypothesis value for that example
+    """
+    if data_set == "train":
+        return _hypothesis_value(train_data[example_no][0])
+    elif data_set == "test":
+        return _hypothesis_value(test_data[example_no][0])
+
+
+def summation_of_cost_derivative(index, end=m):
+    """
+    Calculates the sum of cost function derivative
+    :param index: index wrt derivative is being calculated
+    :param end: value where summation ends, default is m, number of examples
+    :return: Returns the summation of cost derivative
+    Note: If index is -1, this means we are calculating summation wrt to biased parameter.
+    """
+    summation_value = 0
+    for i in range(end):
+        if index == -1:
+            summation_value += _error(i)
+        else:
+            summation_value += _error(i)*train_data[i][0][index]
+    return summation_value
+
+
+def get_cost_derivative(index):
+    """
+    :param index: index of the parameter vector wrt to derivative is to be calculated
+    :return: derivative wrt to that index
+    Note: If index is -1, this means we are calculating summation wrt to biased parameter.
+    """
+    cost_derivative_value = summation_of_cost_derivative(index, m)/m
+    return cost_derivative_value
+
+
+def run_gradient_descent():
+    global parameter_vector
+    # Tune these values to set a tolerance value for predicted output
+    absolute_error_limit = 0.000002
+    relative_error_limit = 0
+    j = 0
+    while True:
+        j += 1
+        temp_parameter_vector = [0, 0, 0, 0]
+        for i in range(0, len(parameter_vector)):
+            cost_derivative = get_cost_derivative(i-1)
+            temp_parameter_vector[i] = parameter_vector[i] - \
+                LEARNING_RATE*cost_derivative
+        if numpy.allclose(parameter_vector, temp_parameter_vector,
+                          atol=absolute_error_limit, rtol=relative_error_limit):
+            break
+        parameter_vector = temp_parameter_vector
+    print("Number of iterations:", j)
+
+
+def test_gradient_descent():
+    for i in range(len(test_data)):
+        print("Actual output value:", output(i, 'test'))
+        print("Hypothesis output:", calculate_hypothesis_value(i, 'test'))
+
+
+if __name__ == '__main__':
+    run_gradient_descent()
+    print("\nTesting gradient descent for a linear hypothesis function.\n")
+    test_gradient_descent()
--- a/machine_learning/k_means_clust.py
+++ b/machine_learning/k_means_clust.py
@ -0,0 +1,172 @@
+'''README, Author - Anurag Kumar(mailto:anuragkumarak95@gmail.com)
+
+Requirements:
+  - sklearn
+  - numpy
+  - matplotlib
+
+Python:
+  - 3.5
+
+Inputs:
+  - X , a 2D numpy array of features.
+  - k , number of clusters to create.
+  - initial_centroids , initial centroid values generated by utility function(mentioned in usage).
+  - maxiter , maximum number of iterations to process.
+  - heterogeneity , empty list that will be filled with hetrogeneity values if passed to kmeans func.
+
+Usage:
+  1. define 'k' value, 'X' features array and 'hetrogeneity' empty list
+  
+  2. create initial_centroids,
+        initial_centroids = get_initial_centroids(
+            X, 
+            k, 
+            seed=0 # seed value for initial centroid generation, None for randomness(default=None)
+            )
+
+  3. find centroids and clusters using kmeans function.
+  
+        centroids, cluster_assignment = kmeans(
+            X, 
+            k, 
+            initial_centroids, 
+            maxiter=400,
+            record_heterogeneity=heterogeneity, 
+            verbose=True # whether to print logs in console or not.(default=False)
+            )
+  
+  
+  4. Plot the loss function, hetrogeneity values for every iteration saved in hetrogeneity list.
+        plot_heterogeneity(
+            heterogeneity, 
+            k
+        )
+  
+  5. Have fun..
+  
+'''
+from sklearn.metrics import pairwise_distances
+import numpy as np
+
+TAG = 'K-MEANS-CLUST/ '
+
+def get_initial_centroids(data, k, seed=None):
+    '''Randomly choose k data points as initial centroids'''
+    if seed is not None: # useful for obtaining consistent results
+        np.random.seed(seed)
+    n = data.shape[0] # number of data points
+        
+    # Pick K indices from range [0, N).
+    rand_indices = np.random.randint(0, n, k)
+    
+    # Keep centroids as dense format, as many entries will be nonzero due to averaging.
+    # As long as at least one document in a cluster contains a word,
+    # it will carry a nonzero weight in the TF-IDF vector of the centroid.
+    centroids = data[rand_indices,:]
+    
+    return centroids
+
+def centroid_pairwise_dist(X,centroids):
+    return pairwise_distances(X,centroids,metric='euclidean')
+
+def assign_clusters(data, centroids):
+    
+    # Compute distances between each data point and the set of centroids:
+    # Fill in the blank (RHS only)
+    distances_from_centroids = centroid_pairwise_dist(data,centroids)
+    
+    # Compute cluster assignments for each data point:
+    # Fill in the blank (RHS only)
+    cluster_assignment = np.argmin(distances_from_centroids,axis=1)
+    
+    return cluster_assignment
+
+def revise_centroids(data, k, cluster_assignment):
+    new_centroids = []
+    for i in range(k):
+        # Select all data points that belong to cluster i. Fill in the blank (RHS only)
+        member_data_points = data[cluster_assignment==i]
+        # Compute the mean of the data points. Fill in the blank (RHS only)
+        centroid = member_data_points.mean(axis=0)
+        new_centroids.append(centroid)
+    new_centroids = np.array(new_centroids)
+    
+    return new_centroids
+
+def compute_heterogeneity(data, k, centroids, cluster_assignment):
+    
+    heterogeneity = 0.0
+    for i in range(k):
+        
+        # Select all data points that belong to cluster i. Fill in the blank (RHS only)
+        member_data_points = data[cluster_assignment==i, :]
+        
+        if member_data_points.shape[0] > 0: # check if i-th cluster is non-empty
+            # Compute distances from centroid to data points (RHS only)
+            distances = pairwise_distances(member_data_points, [centroids[i]], metric='euclidean')
+            squared_distances = distances**2
+            heterogeneity += np.sum(squared_distances)
+        
+    return heterogeneity
+
+from matplotlib import pyplot as plt
+def plot_heterogeneity(heterogeneity, k):
+    plt.figure(figsize=(7,4))
+    plt.plot(heterogeneity, linewidth=4)
+    plt.xlabel('# Iterations')
+    plt.ylabel('Heterogeneity')
+    plt.title('Heterogeneity of clustering over time, K={0:d}'.format(k))
+    plt.rcParams.update({'font.size': 16})
+    plt.show()
+
+def kmeans(data, k, initial_centroids, maxiter=500, record_heterogeneity=None, verbose=False):
+    '''This function runs k-means on given data and initial set of centroids.
+       maxiter: maximum number of iterations to run.(default=500)
+       record_heterogeneity: (optional) a list, to store the history of heterogeneity as function of iterations
+                             if None, do not store the history.
+       verbose: if True, print how many data points changed their cluster labels in each iteration'''
+    centroids = initial_centroids[:]
+    prev_cluster_assignment = None
+    
+    for itr in range(maxiter):        
+        if verbose:
+            print(itr, end='')
+        
+        # 1. Make cluster assignments using nearest centroids
+        cluster_assignment = assign_clusters(data,centroids)
+            
+        # 2. Compute a new centroid for each of the k clusters, averaging all data points assigned to that cluster.
+        centroids = revise_centroids(data,k, cluster_assignment)
+            
+        # Check for convergence: if none of the assignments changed, stop
+        if prev_cluster_assignment is not None and \
+          (prev_cluster_assignment==cluster_assignment).all():
+            break
+        
+        # Print number of new assignments 
+        if prev_cluster_assignment is not None:
+            num_changed = np.sum(prev_cluster_assignment!=cluster_assignment)
+            if verbose:
+                print('    {0:5d} elements changed their cluster assignment.'.format(num_changed))   
+        
+        # Record heterogeneity convergence metric
+        if record_heterogeneity is not None:
+            # YOUR CODE HERE
+            score = compute_heterogeneity(data,k,centroids,cluster_assignment)
+            record_heterogeneity.append(score)
+        
+        prev_cluster_assignment = cluster_assignment[:]
+        
+    return centroids, cluster_assignment
+
+# Mock test below
+if False: # change to true to run this test case.
+    import sklearn.datasets as ds
+    dataset = ds.load_iris()
+    k = 3
+    heterogeneity = []
+    initial_centroids = get_initial_centroids(dataset['data'], k, seed=0)
+    centroids, cluster_assignment = kmeans(dataset['data'], k, initial_centroids, maxiter=400,
+                                        record_heterogeneity=heterogeneity, verbose=True)
+    plot_heterogeneity(heterogeneity, k)
--- a/machine_learning/linear_regression.py
+++ b/machine_learning/linear_regression.py
@ -0,0 +1,108 @@
+"""
+Linear regression is the most basic type of regression commonly used for
+predictive analysis. The idea is preety simple, we have a dataset and we have
+a feature's associated with it. The Features should be choose very cautiously
+as they determine, how much our model will be able to make future predictions.
+We try to set these Feature weights, over many iterations, so that they best
+fits our dataset. In this particular code, i had used a CSGO dataset (ADR vs
+Rating). We try to best fit a line through dataset and estimate the parameters.
+"""
+
+import requests
+import numpy as np
+
+
+def collect_dataset():
+    """ Collect dataset of CSGO
+    The dataset contains ADR vs Rating of a Player
+    :return : dataset obtained from the link, as matrix
+    """
+    response = requests.get('https://raw.githubusercontent.com/yashLadha/' +
+                            'The_Math_of_Intelligence/master/Week1/ADRvs' +
+                            'Rating.csv')
+    lines = response.text.splitlines()
+    data = []
+    for item in lines:
+        item = item.split(',')
+        data.append(item)
+    data.pop(0)  # This is for removing the labels from the list
+    dataset = np.matrix(data)
+    return dataset
+
+
+def run_steep_gradient_descent(data_x, data_y,
+                               len_data, alpha, theta):
+    """ Run steep gradient descent and updates the Feature vector accordingly_
+    :param data_x   : contains the dataset
+    :param data_y   : contains the output associated with each data-entry
+    :param len_data : length of the data_
+    :param alpha    : Learning rate of the model
+    :param theta    : Feature vector (weight's for our model)
+    ;param return    : Updated Feature's, using
+                       curr_features - alpha_ * gradient(w.r.t. feature)
+    """
+    n = len_data
+
+    prod = np.dot(theta, data_x.transpose())
+    prod -= data_y.transpose()
+    sum_grad = np.dot(prod, data_x)
+    theta = theta - (alpha / n) * sum_grad
+    return theta
+
+
+def sum_of_square_error(data_x, data_y, len_data, theta):
+    """ Return sum of square error for error calculation
+    :param data_x    : contains our dataset
+    :param data_y    : contains the output (result vector)
+    :param len_data  : len of the dataset
+    :param theta     : contains the feature vector
+    :return          : sum of square error computed from given feature's
+    """
+    error = 0.0
+    prod = np.dot(theta, data_x.transpose())
+    prod -= data_y.transpose()
+    sum_elem = np.sum(np.square(prod))
+    error = sum_elem / (2 * len_data)
+    return error
+
+
+def run_linear_regression(data_x, data_y):
+    """ Implement Linear regression over the dataset
+    :param data_x  : contains our dataset
+    :param data_y  : contains the output (result vector)
+    :return        : feature for line of best fit (Feature vector)
+    """
+    iterations = 100000
+    alpha = 0.0001550
+
+    no_features = data_x.shape[1]
+    len_data = data_x.shape[0] - 1
+
+    theta = np.zeros((1, no_features))
+
+    for i in range(0, iterations):
+        theta = run_steep_gradient_descent(data_x, data_y,
+                                           len_data, alpha, theta)
+        error = sum_of_square_error(data_x, data_y, len_data, theta)
+        print('At Iteration %d - Error is %.5f ' % (i + 1, error))
+
+    return theta
+
+
+def main():
+    """ Driver function """
+    data = collect_dataset()
+
+    len_data = data.shape[0]
+    data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float)
+    data_y = data[:, -1].astype(float)
+
+    theta = run_linear_regression(data_x, data_y)
+    len_result = theta.shape[1]
+    print('Resultant Feature vector : ')
+    for i in range(0, len_result):
+        print('%.5f' % (theta[0, i]))
+
+
+if __name__ == '__main__':
+    main()
--- a/machine_learning/perceptron.py
+++ b/machine_learning/perceptron.py
@ -0,0 +1,123 @@
+'''
+
+	Perceptron
+	w = w + N * (d(k) - y) * x(k)
+
+	Using perceptron network for oil analysis,
+	with Measuring of 3 parameters that represent chemical characteristics we can classify the oil, in p1 or p2
+	p1 = -1
+	p2 = 1
+
+'''
+
+import random
+
+
+class Perceptron:
+    def __init__(self, sample, exit, learn_rate=0.01, epoch_number=1000, bias=-1):
+        self.sample = sample
+        self.exit = exit
+        self.learn_rate = learn_rate
+        self.epoch_number = epoch_number
+        self.bias = bias
+        self.number_sample = len(sample)
+        self.col_sample = len(sample[0])
+        self.weight = []
+
+    def trannig(self):
+        for sample in self.sample:
+            sample.insert(0, self.bias)
+
+        for i in range(self.col_sample):
+           self.weight.append(random.random())
+
+        self.weight.insert(0, self.bias)
+
+        epoch_count = 0
+
+        while True:
+            erro = False
+            for i in range(self.number_sample):
+                u = 0
+                for j in range(self.col_sample + 1):
+                    u = u + self.weight[j] * self.sample[i][j]
+                y = self.sign(u)
+                if y != self.exit[i]:
+
+                    for j in range(self.col_sample + 1):
+
+                        self.weight[j] = self.weight[j] + self.learn_rate * (self.exit[i] - y) * self.sample[i][j]
+                    erro = True
+            #print('Epoch: \n',epoch_count)
+            epoch_count = epoch_count + 1
+            # if you want controle the epoch or just by erro
+            if erro == False:
+                print('\nEpoch:\n',epoch_count)
+                print('------------------------\n')
+            #if epoch_count > self.epoch_number or not erro:
+                break
+
+    def sort(self, sample):
+        sample.insert(0, self.bias)
+        u = 0
+        for i in range(self.col_sample + 1):
+            u = u + self.weight[i] * sample[i]
+
+        y = self.sign(u)
+
+        if  y == -1:
+            print('Sample: ', sample)
+            print('classification: P1')
+        else:
+            print('Sample: ', sample)
+            print('classification: P2')
+
+    def sign(self, u):
+        return 1 if u >= 0 else -1
+
+
+samples = [
+    [-0.6508, 0.1097, 4.0009],
+    [-1.4492, 0.8896, 4.4005],
+    [2.0850, 0.6876, 12.0710],
+    [0.2626, 1.1476, 7.7985],
+    [0.6418, 1.0234, 7.0427],
+    [0.2569, 0.6730, 8.3265],
+    [1.1155, 0.6043, 7.4446],
+    [0.0914, 0.3399, 7.0677],
+    [0.0121, 0.5256, 4.6316],
+    [-0.0429, 0.4660, 5.4323],
+    [0.4340, 0.6870, 8.2287],
+    [0.2735, 1.0287, 7.1934],
+    [0.4839, 0.4851, 7.4850],
+    [0.4089, -0.1267, 5.5019],
+    [1.4391, 0.1614, 8.5843],
+    [-0.9115, -0.1973, 2.1962],
+    [0.3654, 1.0475, 7.4858],
+    [0.2144, 0.7515, 7.1699],
+    [0.2013, 1.0014, 6.5489],
+    [0.6483, 0.2183, 5.8991],
+    [-0.1147, 0.2242, 7.2435],
+    [-0.7970, 0.8795, 3.8762],
+    [-1.0625, 0.6366, 2.4707],
+    [0.5307, 0.1285, 5.6883],
+    [-1.2200, 0.7777, 1.7252],
+    [0.3957, 0.1076, 5.6623],
+    [-0.1013, 0.5989, 7.1812],
+    [2.4482, 0.9455, 11.2095],
+    [2.0149, 0.6192, 10.9263],
+    [0.2012, 0.2611, 5.4631]
+
+]
+
+exit = [-1, -1, -1, 1, 1, -1, 1, -1, 1, 1, -1, 1, -1, -1, -1, -1, 1, 1, 1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, 1]
+
+network = Perceptron(sample=samples, exit = exit, learn_rate=0.01, epoch_number=1000, bias=-1)
+
+network.trannig()
+
+while True:
+    sample = []
+    for i in range(3):
+        sample.insert(i, float(input('value: ')))
+    network.sort(sample)
--- a/machine_learning/scoring_functions.py
+++ b/machine_learning/scoring_functions.py
@ -0,0 +1,63 @@
+import numpy
+
+""" Here I implemented the scoring functions.
+    MAE, MSE, RMSE, RMSLE are included.
+
+    Those are used for calculating differences between
+    predicted values and actual values.
+
+    Metrics are slightly differentiated. Sometimes squared, rooted,
+    even log is used.
+
+    Using log and roots can be perceived as tools for penalizing big
+    erors. However, using appropriate metrics depends on the situations,
+    and types of data
+"""
+
+#Mean Absolute Error
+def mae(predict, actual):
+    predict = np.array(predict)
+    actual = np.array(actual)
+
+    difference = abs(predict - actual)
+    score = difference.mean()
+
+    return score
+
+#Mean Squared Error
+def mse(predict, actual):
+    predict = np.array(predict)
+    actual = np.array(actual)
+
+    difference = predict - actual
+    square_diff = np.square(difference)
+
+    score = square_diff.mean()
+    return score
+
+#Root Mean Squared Error
+def rmse(predict, actual):
+    predict = np.array(predict)
+    actual = np.array(actual)
+
+    difference = predict - actual
+    square_diff = np.square(dfference)
+    mean_square_diff = square_diff.mean()
+    score = np.sqrt(mean_square_diff)
+    return score
+
+#Root Mean Square Logarithmic Error
+def rmsle(predict, actual):
+    predict = np.array(predict)
+    actual = np.array(actual)
+
+    log_predict = np.log(predict+1)
+    log_actual = np.log(actual+1)
+
+    difference = log_predict - log_actual
+    square_diff = np.square(difference)
+    mean_square_diff = square_diff.mean()
+
+    score = np.sqrt(mean_square_diff)
+
+    return score