Reenable files when TensorFlow supports the current Python (#8602)

* Remove python_version < "3.11" for tensorflow * Reenable neural_network/input_data.py_tf * updating DIRECTORY.md * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Try to fix ruff * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Try to fix ruff * Try to fix ruff * Try to fix ruff * Try to fix pre-commit * Try to fix * Fix * Fix * Reenable dynamic_programming/k_means_clustering_tensorflow.py_tf * updating DIRECTORY.md * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Try to fix ruff --------- Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2025-07-05 17:34:49 +08:00 · 2023-04-01 20:43:11 +03:00
parent 84b6852de8
commit 56a40eb3ee
4 changed files with 54 additions and 55 deletions
--- a/dynamic_programming/k_means_clustering_tensorflow.py
+++ b/dynamic_programming/k_means_clustering_tensorflow.py
@ -0,0 +1,146 @@
+from random import shuffle
+
+import tensorflow as tf
+from numpy import array
+
+
+def tf_k_means_cluster(vectors, noofclusters):
+    """
+    K-Means Clustering using TensorFlow.
+    'vectors' should be a n*k 2-D NumPy array, where n is the number
+    of vectors of dimensionality k.
+    'noofclusters' should be an integer.
+    """
+
+    noofclusters = int(noofclusters)
+    assert noofclusters < len(vectors)
+
+    # Find out the dimensionality
+    dim = len(vectors[0])
+
+    # Will help select random centroids from among the available vectors
+    vector_indices = list(range(len(vectors)))
+    shuffle(vector_indices)
+
+    # GRAPH OF COMPUTATION
+    # We initialize a new graph and set it as the default during each run
+    # of this algorithm. This ensures that as this function is called
+    # multiple times, the default graph doesn't keep getting crowded with
+    # unused ops and Variables from previous function calls.
+
+    graph = tf.Graph()
+
+    with graph.as_default():
+        # SESSION OF COMPUTATION
+
+        sess = tf.Session()
+
+        ##CONSTRUCTING THE ELEMENTS OF COMPUTATION
+
+        ##First lets ensure we have a Variable vector for each centroid,
+        ##initialized to one of the vectors from the available data points
+        centroids = [
+            tf.Variable(vectors[vector_indices[i]]) for i in range(noofclusters)
+        ]
+        ##These nodes will assign the centroid Variables the appropriate
+        ##values
+        centroid_value = tf.placeholder("float64", [dim])
+        cent_assigns = []
+        for centroid in centroids:
+            cent_assigns.append(tf.assign(centroid, centroid_value))
+
+        ##Variables for cluster assignments of individual vectors(initialized
+        ##to 0 at first)
+        assignments = [tf.Variable(0) for i in range(len(vectors))]
+        ##These nodes will assign an assignment Variable the appropriate
+        ##value
+        assignment_value = tf.placeholder("int32")
+        cluster_assigns = []
+        for assignment in assignments:
+            cluster_assigns.append(tf.assign(assignment, assignment_value))
+
+        ##Now lets construct the node that will compute the mean
+        # The placeholder for the input
+        mean_input = tf.placeholder("float", [None, dim])
+        # The Node/op takes the input and computes a mean along the 0th
+        # dimension, i.e. the list of input vectors
+        mean_op = tf.reduce_mean(mean_input, 0)
+
+        ##Node for computing Euclidean distances
+        # Placeholders for input
+        v1 = tf.placeholder("float", [dim])
+        v2 = tf.placeholder("float", [dim])
+        euclid_dist = tf.sqrt(tf.reduce_sum(tf.pow(tf.sub(v1, v2), 2)))
+
+        ##This node will figure out which cluster to assign a vector to,
+        ##based on Euclidean distances of the vector from the centroids.
+        # Placeholder for input
+        centroid_distances = tf.placeholder("float", [noofclusters])
+        cluster_assignment = tf.argmin(centroid_distances, 0)
+
+        ##INITIALIZING STATE VARIABLES
+
+        ##This will help initialization of all Variables defined with respect
+        ##to the graph. The Variable-initializer should be defined after
+        ##all the Variables have been constructed, so that each of them
+        ##will be included in the initialization.
+        init_op = tf.initialize_all_variables()
+
+        # Initialize all variables
+        sess.run(init_op)
+
+        ##CLUSTERING ITERATIONS
+
+        # Now perform the Expectation-Maximization steps of K-Means clustering
+        # iterations. To keep things simple, we will only do a set number of
+        # iterations, instead of using a Stopping Criterion.
+        noofiterations = 100
+        for _ in range(noofiterations):
+            ##EXPECTATION STEP
+            ##Based on the centroid locations till last iteration, compute
+            ##the _expected_ centroid assignments.
+            # Iterate over each vector
+            for vector_n in range(len(vectors)):
+                vect = vectors[vector_n]
+                # Compute Euclidean distance between this vector and each
+                # centroid. Remember that this list cannot be named
+                #'centroid_distances', since that is the input to the
+                # cluster assignment node.
+                distances = [
+                    sess.run(euclid_dist, feed_dict={v1: vect, v2: sess.run(centroid)})
+                    for centroid in centroids
+                ]
+                # Now use the cluster assignment node, with the distances
+                # as the input
+                assignment = sess.run(
+                    cluster_assignment, feed_dict={centroid_distances: distances}
+                )
+                # Now assign the value to the appropriate state variable
+                sess.run(
+                    cluster_assigns[vector_n], feed_dict={assignment_value: assignment}
+                )
+
+            ##MAXIMIZATION STEP
+            # Based on the expected state computed from the Expectation Step,
+            # compute the locations of the centroids so as to maximize the
+            # overall objective of minimizing within-cluster Sum-of-Squares
+            for cluster_n in range(noofclusters):
+                # Collect all the vectors assigned to this cluster
+                assigned_vects = [
+                    vectors[i]
+                    for i in range(len(vectors))
+                    if sess.run(assignments[i]) == cluster_n
+                ]
+                # Compute new centroid location
+                new_location = sess.run(
+                    mean_op, feed_dict={mean_input: array(assigned_vects)}
+                )
+                # Assign value to appropriate variable
+                sess.run(
+                    cent_assigns[cluster_n], feed_dict={centroid_value: new_location}
+                )
+
+        # Return centroids and assignments
+        centroids = sess.run(centroids)
+        assignments = sess.run(assignments)
+        return centroids, assignments