diff --git a/julia/data_preprocess.jl b/julia/data_preprocess.jl
index 27036f259ce3fb28e7255c37cade49a0a5ffadd2..36d727d9b77731356947a9ebb6054636e7cedde1 100644
--- a/julia/data_preprocess.jl
+++ b/julia/data_preprocess.jl
@@ -1,5 +1,4 @@
-using CSV
-using DataFrames
+using CSV, DataFrames, StatsBase, Random, Distributions
 
 
 input = "/home/a24jacqb/Documents/Code/pdptw-main/julia/LCN_01.csv"
@@ -35,10 +34,42 @@ function count_is_missing(column)
     return count(x -> ismissing(x), column)
 end
 
+
 # remove doublon
+df = unique(df)
 
-# remove empty order
+# remove missing # order
+dropmissing!(df, ["# order"])
 
 # fill missing data (weight & revenue) with a normal distribution
 
+
+function fill_missing(value, type)
+    if ismissing(value) && type == "PICKUP"
+        new_value = round(rand(Normal(mu, sigma)), digits=2)
+        new_value = max(new_value, 1.0)
+        new_value = min(new_value, maxi)
+        return new_value
+    else
+        return value
+    end
+end
+
+df.weight = map(parse_weight, df.weight)
+
+maxi = maximum(skipmissing(df.weight))
+mu = mean(skipmissing(df.weight))
+sigma = std(skipmissing(df.weight))
+
+df.weight = map(fill_missing, df.weight, df.type)
+
+
+for row in eachrow(df)
+    if row.type == "DROPOFF"
+        row.weight = - df.weight[(df."# order" .== row."# order") .& (df.type .== "PICKUP")][1]
+    end
+end
+
+println(first(df, 15))
+
 CSV.write("clean_LCN_01.csv", df)
\ No newline at end of file