mirror of
https://github.com/samirsaci/picking-route.git
synced 2025-06-02 02:26:51 +08:00
first commit
This commit is contained in:
98
utils/cluster/clustering.py
Normal file
98
utils/cluster/clustering.py
Normal file
@ -0,0 +1,98 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from ast import literal_eval
|
||||
import matplotlib.pyplot as plt
|
||||
from scipy.cluster.vq import kmeans2, whiten
|
||||
from scipy.spatial.distance import pdist
|
||||
from scipy.cluster.hierarchy import ward, fcluster
|
||||
from utils.routing.distances import *
|
||||
|
||||
def cluster_locations(list_coord, distance_threshold, dist_method, clust_start):
|
||||
''' Step 1: Create clusters of locations'''
|
||||
# Create linkage matrix
|
||||
if dist_method == 'euclidian':
|
||||
Z = ward(pdist(np.stack(list_coord)))
|
||||
else:
|
||||
Z = ward(pdist(np.stack(list_coord), metric = distance_picking_cluster))
|
||||
# Single cluster array
|
||||
fclust1 = fcluster(Z, t = distance_threshold, criterion = 'distance')
|
||||
return fclust1
|
||||
|
||||
|
||||
def clustering_mapping(df, distance_threshold, dist_method, orders_number, wave_start, clust_start, df_type): # clustering_loc
|
||||
'''Step 2: Clustering and mapping'''
|
||||
# 1. Create Clusters
|
||||
list_coord, list_OrderNumber, clust_id, df = cluster_wave(df, distance_threshold, 'custom', clust_start, df_type)
|
||||
clust_idmax = max(clust_id) # Last Cluster ID
|
||||
# 2. Mapping Order lines
|
||||
dict_map, dict_omap, df, Wave_max = lines_mapping_clst(df, list_coord, list_OrderNumber, clust_id, orders_number, wave_start)
|
||||
return dict_map, dict_omap, df, Wave_max, clust_idmax
|
||||
|
||||
|
||||
def cluster_wave(df, distance_threshold, dist_method, clust_start, df_type):
|
||||
'''Step 3: Create waves by clusters'''
|
||||
# Create Column for Clustering
|
||||
if df_type == 'df_mono':
|
||||
df['Coord_Cluster'] = df['Coord']
|
||||
# Mapping points
|
||||
df_map = pd.DataFrame(df.groupby(['OrderNumber', 'Coord_Cluster'])['SKU'].count()).reset_index() # Here we use Coord Cluster
|
||||
list_coord, list_OrderNumber = np.stack(df_map.Coord_Cluster.apply(lambda t: literal_eval(t)).values), df_map.OrderNumber.values
|
||||
# Cluster picking locations
|
||||
clust_id = cluster_locations(list_coord, distance_threshold, dist_method, clust_start)
|
||||
clust_id = [(i + clust_start) for i in clust_id]
|
||||
# List_coord
|
||||
list_coord = np.stack(list_coord)
|
||||
return list_coord, list_OrderNumber, clust_id, df
|
||||
|
||||
|
||||
def lines_mapping(df, orders_number, wave_start):
|
||||
'''Step 4: Mapping Order lines mapping without clustering '''
|
||||
# Unique order numbers list
|
||||
list_orders = df.OrderNumber.unique()
|
||||
# Dictionnary for mapping
|
||||
dict_map = dict(zip(list_orders, [i for i in range(1, len(list_orders))]))
|
||||
# Order ID mapping
|
||||
df['OrderID'] = df['OrderNumber'].map(dict_map)
|
||||
# Grouping Orders by Wave of orders_number
|
||||
df['WaveID'] = (df.OrderID%orders_number == 0).shift(1).fillna(0).cumsum() + wave_start
|
||||
# Counting number of Waves
|
||||
waves_number = df.WaveID.max() + 1
|
||||
return df, waves_number
|
||||
|
||||
|
||||
def lines_mapping_clst(df, list_coord, list_OrderNumber, clust_id, orders_number, wave_start):
|
||||
'''Step 4: Mapping Order lines mapping with clustering '''
|
||||
# Dictionnary for mapping by cluster
|
||||
dict_map = dict(zip(list_OrderNumber, clust_id))
|
||||
# Dataframe mapping
|
||||
df['ClusterID'] = df['OrderNumber'].map(dict_map)
|
||||
# Order by ID and mapping
|
||||
df = df.sort_values(['ClusterID','OrderNumber'], ascending = True)
|
||||
list_orders = list(df.OrderNumber.unique())
|
||||
# Dictionnary for order mapping
|
||||
dict_omap = dict(zip(list_orders, [i for i in range(1, len(list_orders))]))
|
||||
# Order ID mapping
|
||||
df['OrderID'] = df['OrderNumber'].map(dict_omap)
|
||||
# Create Waves: Increment when reaching orders_number or changing cluster
|
||||
df['WaveID'] = wave_start + ((df.OrderID%orders_number == 0) | (df.ClusterID.diff() != 0)).shift(1).fillna(0).cumsum()
|
||||
|
||||
wave_max = df.WaveID.max()
|
||||
return dict_map, dict_omap, df, wave_max
|
||||
|
||||
|
||||
def locations_listing(df_orderlines, wave_id):
|
||||
''' Step 5: Listing location per Wave of orders'''
|
||||
|
||||
# Filter by wave_id
|
||||
df = df_orderlines[df_orderlines.WaveID == wave_id]
|
||||
# Create coordinates listing
|
||||
list_coord = list(df['Coord'].apply(lambda t: literal_eval(t)).values) # Here we use Coord for distance
|
||||
list_coord.sort()
|
||||
# Get unique Unique coordinates
|
||||
list_coord = list(k for k,_ in itertools.groupby(list_coord))
|
||||
n_locs = len(list_coord)
|
||||
n_lines = len(df)
|
||||
n_pcs = df.PCS.sum()
|
||||
|
||||
return list_coord, n_locs, n_lines, n_pcs
|
Reference in New Issue
Block a user