first commit

2025-06-02 02:26:51 +08:00 · 2021-11-01 23:12:52 +01:00
commit 4bd027b0d7
40 changed files with 6016 additions and 0 deletions
--- a/utils/cluster/clustering.py
+++ b/utils/cluster/clustering.py
@ -0,0 +1,98 @@
+import numpy as np
+import pandas as pd
+import itertools
+from ast import literal_eval
+import matplotlib.pyplot as plt
+from scipy.cluster.vq import kmeans2, whiten
+from scipy.spatial.distance import pdist
+from scipy.cluster.hierarchy import ward, fcluster
+from utils.routing.distances import *
+
+def cluster_locations(list_coord, distance_threshold, dist_method, clust_start):
+    ''' Step 1: Create clusters of locations'''
+    # Create linkage matrix
+    if dist_method == 'euclidian':
+        Z = ward(pdist(np.stack(list_coord)))
+    else:
+        Z = ward(pdist(np.stack(list_coord), metric = distance_picking_cluster))
+    # Single cluster array
+    fclust1 = fcluster(Z, t = distance_threshold, criterion = 'distance')
+    return fclust1
+
+
+def clustering_mapping(df, distance_threshold, dist_method, orders_number, wave_start, clust_start, df_type): # clustering_loc
+    '''Step 2: Clustering and mapping'''
+    # 1. Create Clusters
+    list_coord, list_OrderNumber, clust_id, df = cluster_wave(df, distance_threshold, 'custom', clust_start, df_type)
+    clust_idmax = max(clust_id) # Last Cluster ID
+    # 2. Mapping Order lines
+    dict_map, dict_omap, df, Wave_max = lines_mapping_clst(df, list_coord, list_OrderNumber, clust_id, orders_number, wave_start)
+    return dict_map, dict_omap, df, Wave_max, clust_idmax
+
+
+def cluster_wave(df, distance_threshold, dist_method, clust_start, df_type):
+    '''Step 3: Create waves by clusters'''
+    # Create Column for Clustering
+    if df_type == 'df_mono':
+        df['Coord_Cluster'] = df['Coord'] 
+    # Mapping points
+    df_map = pd.DataFrame(df.groupby(['OrderNumber', 'Coord_Cluster'])['SKU'].count()).reset_index() 	# Here we use Coord Cluster
+    list_coord, list_OrderNumber = np.stack(df_map.Coord_Cluster.apply(lambda t: literal_eval(t)).values), df_map.OrderNumber.values
+    # Cluster picking locations
+    clust_id = cluster_locations(list_coord, distance_threshold, dist_method, clust_start)
+    clust_id = [(i + clust_start) for i in clust_id]
+    # List_coord
+    list_coord = np.stack(list_coord)
+    return list_coord, list_OrderNumber, clust_id, df
+
+
+def lines_mapping(df, orders_number, wave_start):
+    '''Step 4: Mapping Order lines mapping without clustering '''
+    # Unique order numbers list
+    list_orders = df.OrderNumber.unique()
+    # Dictionnary for mapping
+    dict_map = dict(zip(list_orders, [i for i in range(1, len(list_orders))]))
+    # Order ID mapping
+    df['OrderID'] = df['OrderNumber'].map(dict_map)
+    # Grouping Orders by Wave of orders_number 
+    df['WaveID'] = (df.OrderID%orders_number == 0).shift(1).fillna(0).cumsum() + wave_start
+    # Counting number of Waves
+    waves_number = df.WaveID.max() + 1
+    return df, waves_number
+
+
+def lines_mapping_clst(df, list_coord, list_OrderNumber, clust_id, orders_number, wave_start):
+    '''Step 4: Mapping Order lines mapping with clustering '''
+    # Dictionnary for mapping by cluster
+    dict_map = dict(zip(list_OrderNumber, clust_id))
+    # Dataframe mapping
+    df['ClusterID'] = df['OrderNumber'].map(dict_map)
+    # Order by ID and mapping
+    df = df.sort_values(['ClusterID','OrderNumber'], ascending = True)
+    list_orders = list(df.OrderNumber.unique())
+    # Dictionnary for order mapping 
+    dict_omap = dict(zip(list_orders, [i for i in range(1, len(list_orders))]))
+    # Order ID mapping
+    df['OrderID'] = df['OrderNumber'].map(dict_omap)
+    # Create Waves: Increment when reaching orders_number or changing cluster
+    df['WaveID'] = wave_start + ((df.OrderID%orders_number == 0) | (df.ClusterID.diff() != 0)).shift(1).fillna(0).cumsum() 
+
+    wave_max = df.WaveID.max()
+    return dict_map, dict_omap, df, wave_max
+
+
+def locations_listing(df_orderlines, wave_id):
+    ''' Step 5: Listing location per Wave of orders'''
+
+    # Filter by wave_id
+    df = df_orderlines[df_orderlines.WaveID == wave_id]
+    # Create coordinates listing
+    list_coord = list(df['Coord'].apply(lambda t: literal_eval(t)).values) 	# Here we use Coord for distance
+    list_coord.sort()
+    # Get unique Unique coordinates
+    list_coord = list(k for k,_ in itertools.groupby(list_coord))
+    n_locs = len(list_coord)
+    n_lines = len(df)
+    n_pcs = df.PCS.sum()
+
+    return list_coord, n_locs, n_lines, n_pcs